In [1]:
# Import dependencies
from pathlib import Path
from natsort import natsorted, index_natsorted, order_by_index
import natsort
import pandas as pd
import numpy as np
import math
import re

## Read in CSV
### Order of the columns is important. Columns headers can be anything but must be present.
source plate name, source well, destination plate name, destination well, transfer volume (uL)

In [2]:
# Read in CSV file
file_path = Path('smallhitpickfile.csv')
raw_hitpick_df = pd.read_csv(file_path)
raw_hitpick_df

Unnamed: 0,src_plt,src_well,dest_plt,dest_well,volume
0,1000823_1,A01,103832_a,C14,613.303333
1,1000823_2,A01,103832_b,C15,703.705556
2,1000823_3,A10,103832_c,C17,680.0
3,1000823_4,A12,103832_d,C07,683.947778
4,1000823_3,A13,103832_e,C06,611.055556


## Standardize index and column names

In [3]:
# Source Plate Name, Source Well, Destination Plate Name, Destination Well, Transfer Volume (uL)

column1 = raw_hitpick_df.columns[0]
column2 = raw_hitpick_df.columns[1]
column3 = raw_hitpick_df.columns[2]
column4 = raw_hitpick_df.columns[3]
column5 = raw_hitpick_df.columns[4]


hitpick_df = raw_hitpick_df.rename(columns={
    column1:'Source_Plate_Name',
    column2:'Source_Well',
    column3:'Destination_Plate_Name',
    column4:'Destination_Well',
    column5:'Transfer_Volume_(uL)'
})

# Start dataframe at index 1 and rename the column to Transfer
hitpick_df.index = np.arange(1, len(hitpick_df) + 1)
hitpick_df.index.name = 'Transfer'

hitpick_df

Unnamed: 0_level_0,Source_Plate_Name,Source_Well,Destination_Plate_Name,Destination_Well,Transfer_Volume_(uL)
Transfer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1000823_1,A01,103832_a,C14,613.303333
2,1000823_2,A01,103832_b,C15,703.705556
3,1000823_3,A10,103832_c,C17,680.0
4,1000823_4,A12,103832_d,C07,683.947778
5,1000823_3,A13,103832_e,C06,611.055556


## Format CSV

In [4]:
def well_number_to_identifier(number, total_well_count):
    
    # 24-well plate (4 row x 6 col)
    if total_well_count == 24:
        try:
            well_identifier = 'ABCD'[(number - 1) // 6] + str((number - 1) % 6 + 1)
            # print(f'{number} ==> {well_identifier}')
        except IndexError:
            print(f'{number} is an invalid well number. Plate does not have that well.')
    
    
    # 48-well plate (8 row x 6 col)
    if total_well_count == 48:
        try:
            well_identifier = 'ABCDEFGH'[(number - 1) // 6] + str((number - 1) % 6 + 1)
            # print(f'{number} ==> {well_identifier}')
        except IndexError:
            print(f'{number} is an invalid well number. Plate does not have that well.')
    
    
    # 96-well plate (8 row x 12 col)
    if total_well_count == 96:
        try:
            # number = 13
            # well_identifier = 'ABCDEFGH[(12 // 12)] + %02d % ((12 % 12) + 1)
            # well_identifier = B
            well_identifier = 'ABCDEFGH'[(number - 1) // 12] + str((number - 1) % 12 + 1)
            # print(f'{number} ==> {well_identifier}')
            return well_identifier
        except IndexError:
            print(f'{number} is an invalid well number. Plate does not have that well.')
                  
    # 384-well plate (16 rows x 24 col)
    if total_well_count == 384:
        try:
            well_identifier = 'ABCDEFGHIJKLMNOP'[(number - 1) // 24] + str((number - 1) % 24 + 1)
            # print(f'{number} ==> {well_identifier}')
            return well_identifier
        except IndexError:
            print(f'{number} is an invalid well number. Plate does not have that well.')

In [5]:
# Remove leading zeros and format well identifier to be uppercase
# Handle A01 if the user has that value in the column
def remove_leading_zeros(identifier):
    # Only capture the leading zeros until a nonzero digit is found
    regex = "(0+)(?=[1-9])"
    formatted_identifier = re.sub(regex, "", identifier.upper())
    print(formatted_identifier)
    return formatted_identifier
    
remove_leading_zeros('A01')
remove_leading_zeros('A0100')

A1
A100


'A100'

In [6]:
print(hitpick_df['Source_Well'])

Transfer
1    A01
2    A01
3    A10
4    A12
5    A13
Name: Source_Well, dtype: object


In [7]:
# Determine if the wells are formatted numerically (1-96) or by well identifier (e.g. A1 or A01)

if (str(hitpick_df.loc[hitpick_df.index[0], 'Source_Well']).isnumeric() == True):
    
    # Replace the numerical values in hitpick_df
    hitpick_df['Source_Well'] = hitpick_df['Source_Well'].apply(lambda row: well_number_to_identifier(row, 96))
    hitpick_df['Destination_Well'] = hitpick_df['Destination_Well'].apply(lambda row: well_number_to_identifier(row, 96))

# Only check the first character to see if it is a letter
elif (str(hitpick_df.loc[hitpick_df.index[0], 'Source_Well'][0]).isalpha() == True):
    
    
    # Replace the well name values in hitpick_df to ensure that there are no leading zeros
    hitpick_df['Source_Well'] = hitpick_df['Source_Well'].apply(lambda row: remove_leading_zeros(row))
    hitpick_df['Destination_Well'] = hitpick_df['Destination_Well'].apply(lambda row: remove_leading_zeros(row))

A1
A1
A10
A12
A13
C14
C15
C17
C7
C6


In [8]:
# Format all transfer volumes to the tenths place
hitpick_df['Transfer_Volume_(uL)'] = hitpick_df['Transfer_Volume_(uL)'].apply(lambda row: "{:.1f}".format(row))

## Show transformed worklist

In [9]:
# Show transformed dataframe
hitpick_df

Unnamed: 0_level_0,Source_Plate_Name,Source_Well,Destination_Plate_Name,Destination_Well,Transfer_Volume_(uL)
Transfer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1000823_1,A1,103832_a,C14,613.3
2,1000823_2,A1,103832_b,C15,703.7
3,1000823_3,A10,103832_c,C17,680.0
4,1000823_4,A12,103832_d,C7,683.9
5,1000823_3,A13,103832_e,C6,611.1


## Display important worklist information

In [10]:
# Get the smallest and largest transfer volumes
smallest_transfer_vol = hitpick_df['Transfer_Volume_(uL)'].min()
largest_transfer_vol = hitpick_df['Transfer_Volume_(uL)'].max()
print(f'Smallest transfer (uL): {smallest_transfer_vol}')
print(f'Largest transfer (uL): {largest_transfer_vol}')

Smallest transfer (uL): 611.1
Largest transfer (uL): 703.7


In [11]:
# Get the unique values in the source and destination plate names columns
# Add them to a total_labware list

num_source_plates = hitpick_df.Source_Plate_Name.nunique()
source_plate_names = hitpick_df.Source_Plate_Name.unique()
print(f'{num_source_plates} source plates: {source_plate_names}')

num_destination_plates = hitpick_df.Destination_Plate_Name.nunique()
destination_plate_names = hitpick_df.Destination_Plate_Name.unique()
print(f'{num_destination_plates} destination plates: {destination_plate_names}')

num_tips_required = len(hitpick_df.index - 1) # subtract 1 for header row
num_tip_boxes = math.ceil(num_tips_required / 96)
print(f'{num_tip_boxes} tip boxes needed for {num_tips_required} transfers')

4 source plates: ['1000823_1' '1000823_2' '1000823_3' '1000823_4']
5 destination plates: ['103832_a' '103832_b' '103832_c' '103832_d' '103832_e']
1 tip boxes needed for 5 transfers


In [12]:
# Calculate how many decks are required to complete the hitpick...
# since there may be longer hitpicks that require more than 12 deck positions

# Deck layout
# P1    P4    P7    P10
# P2    P5    P8    P11
# P3    P6    P9    P12

total_deck_positions = 12
total_labware_positions = num_source_plates + num_destination_plates + num_tip_boxes
total_available_deck_positions = total_deck_positions - total_labware_positions

# If the total available deck positions is a negative value, that means there is too much labware
print(f'The hitpick requires {total_labware_positions} deck positions')

The hitpick requires 10 deck positions


In [13]:
# Add the labware TYPES (tips, source, destination) to a list and dynamically populate the deck
all_labware_names = []

for source_index in range(num_source_plates):
    all_labware_names.append(source_plate_names[source_index])
    
for dest_index in range(num_destination_plates):
    all_labware_names.append(destination_plate_names[dest_index])
    
for tips_index in range(num_tip_boxes):
    all_labware_names.append('tips_' + str(tips_index + 1))

all_labware_names

['1000823_1',
 '1000823_2',
 '1000823_3',
 '1000823_4',
 '103832_a',
 '103832_b',
 '103832_c',
 '103832_d',
 '103832_e',
 'tips_1']

## Sort labware by most transfers (descending) and then alphanumerically

In [14]:
# Count how many times each source labware appears in the worklist
sorted_source_name_series = hitpick_df.Source_Plate_Name.value_counts().sort_index(ascending=True).sort_values(ascending=False)

# Convert series to a dictionary, where the key = plate name and the value = # transfers out of that plate
sorted_source_name_series.to_dict()

# Sort by most transfers first, then alphanumerically
sorted_source_name_series = natsort.natsorted(sorted_source_name_series.items(), key=lambda x: (-x[1], x[0]))

# Convert the sorted series into a dataframe
sorted_source_names_df = pd.DataFrame(sorted_source_name_series)

# Rename the columns and set the labware name as the index
sorted_source_names_df.rename(columns={0: "Source_Name", 1: "Number_of_Transfers"}, inplace=True)
sorted_source_names_df = sorted_source_names_df.set_index("Source_Name")

# Display the dataframe
sorted_source_names_df.head()

Unnamed: 0_level_0,Number_of_Transfers
Source_Name,Unnamed: 1_level_1
1000823_3,2
1000823_1,1
1000823_2,1
1000823_4,1


In [15]:
# Count how many times each destination labware appears in the worklist
sorted_destination_name_series = hitpick_df.Destination_Plate_Name.value_counts().sort_index(ascending=True).sort_values(ascending=False)

# Convert series to a dictionary, where the key = plate name and the value = # transfers out of that plate
sorted_destination_name_series.to_dict()

# Sort by most transfers first, then alphanumerically
sorted_destination_name_series = natsort.natsorted(sorted_destination_name_series.items(), key=lambda x: (-x[1], x[0]))

# Convert the sorted series into a dataframe
sorted_destination_name_df = pd.DataFrame(sorted_destination_name_series)

# Rename the columns and set the labware name as the index
sorted_destination_name_df.rename(columns={0: "Destination_Name", 1: "Number_of_Transfers"}, inplace=True)
sorted_destination_name_df = sorted_destination_name_df.set_index("Destination_Name")

# Display the dataframe
sorted_destination_name_df.head()

Unnamed: 0_level_0,Number_of_Transfers
Destination_Name,Unnamed: 1_level_1
103832_a,1
103832_b,1
103832_c,1
103832_d,1
103832_e,1


In [16]:
# Count how many times each source well appears in the worklist
sorted_source_well_series = hitpick_df.Source_Well.value_counts().sort_index(ascending=True).sort_values(ascending=False)

# Convert series to a dictionary, where the key = source well identifier and the value = # transfers out of that well
sorted_source_well_series.to_dict()

# Sort by most transfers first, then alphanumerically
sorted_source_well_series = natsort.natsorted(sorted_source_well_series.items(), key=lambda x: (-x[1], x[0]))

# Convert the sorted series into a dataframe
sorted_source_well_df = pd.DataFrame(sorted_source_well_series)

# Rename the columns and set the labware name as the index
sorted_source_well_df.rename(columns={0: "Source_Well_Identifier", 1: "Number_of_Transfers"}, inplace=True)
sorted_source_well_df = sorted_source_well_df.set_index("Source_Well_Identifier")

# Display the dataframe
sorted_source_well_df.head()

Unnamed: 0_level_0,Number_of_Transfers
Source_Well_Identifier,Unnamed: 1_level_1
A1,2
A10,1
A12,1
A13,1


In [17]:
# Count how many times each destination well appears in the worklist
sorted_destination_well_series = hitpick_df.Destination_Well.value_counts().sort_index(ascending=True).sort_values(ascending=False)

# Convert series to a dictionary, where the key = destination well identifier and the value = # transfers out of that well
sorted_destination_well_series.to_dict()

# Sort by most transfers first, then alphanumerically
sorted_destination_well_series = natsort.natsorted(sorted_destination_well_series.items(), key=lambda x: (-x[1], x[0]))

# Convert the sorted series into a dataframe
sorted_destination_well_df = pd.DataFrame(sorted_destination_well_series)

# Rename the columns and set the labware name as the index
sorted_destination_well_df.rename(columns={0: "Destination_Well_Identifier", 1: "Number_of_Transfers"}, inplace=True)
sorted_destination_well_df = sorted_destination_well_df.set_index("Destination_Well_Identifier")

# Display the dataframe
sorted_destination_well_df.head()

Unnamed: 0_level_0,Number_of_Transfers
Destination_Well_Identifier,Unnamed: 1_level_1
C6,1
C7,1
C14,1
C15,1
C17,1


## Split the hitpick worklist into multiple files (if necessary)

In [18]:
# Create a variable to track if the hitpick needs to be split
split_hitpick = False

# If there are not enough deck positions then a new instrument setup is needed ...
# and the hitpick needs to be split into multiple files
additional_instrument_setups_needed = math.ceil(abs(total_available_deck_positions / 12))

# Determine how many deck setups are needed
if total_available_deck_positions < 0:
    print('There has to be ' + str(additional_instrument_setups_needed) + ' additional instrument setup(s)')
    split_hitpick = True


# If the hitpick needs to be split into multiple files, determine how to split them
# CHRONOLOGICALLY, MAXIMIZE DECK SPACE AS MUCH AS POSSIBLE
# can limit based on source, dest, or tip type


# heuristic / greedy algorithm for optimization
# treat source and dest plates the same but sort based off of transfers
# eliminate plates from least used to most used
# however may not be the most optimal with combination of source/dest

# can also do pair-wise

# can also eliminate source first

# or eliminate destination first

In [19]:
# Chronological split (maximize deck space)


In [20]:
# Greedy split (use labware with most transfers first)