In [104]:
import pandas as pd
import itertools 
import logging
from math import ceil

def common_member(a, b):
    a_set = set(a)
    b_set = set(b)
    if (a_set & b_set):
        return True 
    else:
        return False
mlprep_sites = [1,2,3,4,5,6,7,8]
plate_size=96


key = pd.read_csv("data/combo_key_example.csv",header=0,  quoting=3)
source = pd.read_csv("data/source_sheet_example.csv",header=0,  quoting=3)
print(key)
print(source)
all_contained =  all(elem in list(source.pgp) for elem in list(key.pgp))
if all_contained == False:
    raise SystemExit("You have pgp ids in your key file that dont exist in your source plate. Please fix.")
key = pd.merge(left=key, right=source[["pgp", "target","site"]], how="left", on="pgp")
print(key)
source_sites = list(set(key.site))
num_sourcesites= len(source_sites)
mlprep_sites =  [x for x in mlprep_sites if x not in source_sites]
if (num_sourcesites > 2):
     raise ValueError("The software isnt designed to handle more than two sites in the MLprep for source plates.") 
print(mlprep_sites)
# number of unique steps
num_steps= len(set(key.step))
num_pgps= len(set(key.pgp))
num_targets=len(set(key.target))
g = key.groupby(by = 'step')
#list of lists with each sublist all the pgps that can possibly be used in that step
pgp_steps = [list(group[1].pgp) for group in g]
target_steps = [list(group[1].target) for group in g]
pgp_at_multiple_steps=any([common_member(x,y) for i,x in enumerate(pgp_steps) for j,y in enumerate(pgp_steps) if i != j])
if pgp_at_multiple_steps == True:
    raise SystemExit("You have pgp ids at multiple steps. Each pgp should exist at one and only one step. Please fix.")
print(num_steps)
print(num_pgps)
print(num_targets)
pgp_combos = [p for p in itertools.product(*pgp_steps)]
pgp_flat_combos = [proguide for sublist in pgp_combos for proguide in sublist]
target_combos = [p for p in itertools.product(*target_steps)]
target_flat_combos = [target for sublist in target_combos for target in sublist]
num_cellgos = len(pgp_combos)
print(num_cellgos)
wells = [f'{letter}{i}'for letter in ['A','B','C','D','E','F','G','H']  for i in range(1, 13)]
wells_cycle=list(itertools.islice(itertools.cycle(wells), num_cellgos))
# as many wells in target plate as number of cellgos, for each well the number of rows is determined by the number 
# of steps as that is how many transfers have to occur
well_rows = [x for item in wells_cycle for x in itertools.repeat(item, num_steps)]
num_targetsites = ceil(len(wells_cycle)/plate_size)
num_pipetsites = ceil(len(well_rows)/plate_size)
print(num_targetsites)
print(num_pipetsites )
if (( num_sourcesites + num_targetsites  + num_pipetsites) > 8):
    raise SystemExit(f"There are 8 total physical sites on the MLprep you can place plates. You are attempting to use {num_sourcesites + num_targetsites  + num_pipetsites} sites. Please lower the number of cellgorithms you are attempting to create to get below 8 total sites . # SourceSites you are attempting to use: {num_sourcesites}. # TargetSites you are attempting to use: {num_targetsites}. # PipetSites you are attempting to use: {num_pipetsites}")
target_sites = mlprep_sites[:num_targetsites]
mlprep_sites =  [x for x in mlprep_sites if x not in target_sites]
print(num_targetsites)
print(mlprep_sites)
print(len(wells_cycle))
print(len(well_rows))
targetsites_sheet=[item for item in target_sites for i in range(plate_size)]
targetsites_sheet= targetsites_sheet[:len(wells_cycle)]
targetsites_rows = [x for item in targetsites_sheet for x in itertools.repeat(item,num_steps)]
print(targetsites_sheet)
print(len(targetsites_sheet))
print(len(targetsites_rows))
# make skeleton of automation sheet
df = pd.DataFrame(columns=["SourceSite","SourceWell","TargetSite","TargetWell",'pgp','target'])
# make 96 well representation as a list of 96 elements

df['TargetWell'] = well_rows
df['TargetSite'] = targetsites_rows
df['pgp'] = pgp_flat_combos
df['target'] = target_flat_combos
idx = source.reset_index().set_index('pgp').loc[df.pgp, 'index'].values.tolist()
df['SourceWell']=source.well[idx].tolist()
df['SourceSite']=source.site[idx].tolist()
df
df.to_csv('combo_output.txt',sep=',',index=False,quoting=None)
pgp_table = df.pgp.value_counts()
pgp_max=pgp_table.max()
pgp_min=pgp_table.min()
max_pgp_list=pgp_table[pgp_table == pgp_max].index.tolist()
min_pgp_list=pgp_table[pgp_table == pgp_min].index.tolist()
pipets_needed =len(df.index)


with open('manual.log', 'w'):
   pass
logging.basicConfig(filename='manual.log',
                            filemode='w',
                            format='%(message)s',
                            datefmt='%H:%M:%S',
                            level=logging.INFO,force=True)
logging.info(f'Max number of unique cellgorithm steps is {max_steps}')
logging.info(f'Number of proguides used is {num_pgps}')
logging.info(f'Number of unique targets is {num_targets}')
logging.info(f'Number of unique cellgos|unique TargetWells is {num_cellgos}')
logging.info(f'Total number of pipets needed/robotic transfers is {pipets_needed}')
logging.info(f'{max_pgp_list} proguide(s) use the most material. Each proguide(s) requires {pgp_max} transfers and {4  * pgp_max} ul minimum in the source plate (@ 4ul per transfer)')
logging.info(f'{min_pgp_list} proguide(s) use the least material require {pgp_min} transfers and {4 * pgp_min} ul minimum in the source plate')




       pgp  step
0   PGP001     1
1   PGP002     2
2   PGP003     1
3   PGP004     2
4   PGP005     4
5   PGP007     3
6   PGP008     2
7   PGP009     4
8   PGP010     3
9   PGP011     4
10  PGP012     5
11  PGP013     5
12  PGP014     6
   target     pgp well  site
0   MEIS1  PGP001   A1     1
1   MEIS1  PGP002   A2     1
2    TBX2  PGP003   A3     1
3    TBX2  PGP004   A4     1
4   blank  PGP005   A5     1
5    CD19  PGP006   A6     1
6    PAX3  PGP007   A7     1
7    PAX3  PGP008   A8     1
8    EGFR  PGP009   A9     1
9    EGFR  PGP010  A10     1
10   GOOG  PGP011   A1     2
11  PARP1  PGP012  A11     1
12   AAK1  PGP013  A12     1
13  ITGB1  PGP014   B1     1
14   KRAS  PGP015   B2     1
       pgp  step target  site
0   PGP001     1  MEIS1     1
1   PGP002     2  MEIS1     1
2   PGP003     1   TBX2     1
3   PGP004     2   TBX2     1
4   PGP005     4  blank     1
5   PGP007     3   PAX3     1
6   PGP008     2   PAX3     1
7   PGP009     4   EGFR     1
8   PGP010     3   EGFR     

NameError: name 'max_steps' is not defined