In [1]:
import pandas as pd
import os
import glob

Create experiment metadata table using the barcode_platemap.csv files

In [2]:
col_order = ['Batch', 'Plate_Map_Name','Assay_Plate_Barcode', 'Type', 'Anomaly', 'Density', 'Polybrene']
type_df = pd.DataFrame(
    {
        'Plate_Map_Name' : ['control',
                            'JUMP-Target-2_compound_platemap',
                            'JUMP-Target-1_compound_platemap',
                            'bortezomib'],
        'Type' : ['Untreated',
                  'Target2',
                  'Target1',
                  'Bortezomib']
    }
)

anomalous_plates = {
    'dye': {
        'Batch1': ['BR00121541',
                   'BR00121540',
                   'BR00121438',
                   'BR00121439',
                   'BR00121539',
                   'BR00121537',
                   'BR00121538',
                   'BR00121557',
                   'BR00121561',
                   'BR00121564',
                   'BR00121566',
                   'BR00121558',
                   'BR00121565',
                   'BR00121563'],
        'Batch5': 'all',
        'Batch6': 'all',
        'Batch7': 'all',
        'Batch12': 'all'
    },
    'segmentation': {
        'Batch3': ['BR00123613'],
        'Batch9': ['BR00126054'],
    },
    'infection': {
        'Batch4': ['BR00123787',
                   'BR00123786',
                   'BR00123785',
                   'BR00123791',
                   'BR00123790'],
        'Batch13': ['BR00123539']
    },
    'other': {
        'Batch2': ['BR00123528A']
    }
}

seeding_density = {
    '120': {
        'Batch12': 'BR00126117',
        'Batch13': 'BR00127146'
    },
    '80': {
        'Batch12': 'BR00126116',
        'Batch13': 'BR00127145'
    },
    '50': {
        'Batch12': 'BR00126115',
        'Batch13': 'BR00127148'
    },
    '20': {
        'Batch12': 'BR00126114',
        'Batch13': 'BR00127147'
    }
}

In [3]:
# Read the csv files
path = os.path.join('../jump-orf-data/metadata/platemaps/*/barcode_platemap.csv')
files = glob.glob(path)
df = pd.concat(
    (
        (
            pd.read_csv(_)
            .assign(Batch=_.split('/')[4].split('_')[-1])
            .assign(Anomaly='none')
            .assign(Density='100')
        ) for _ in files),
    ignore_index=True
)

In [4]:
# Assigning type
df = df.merge(type_df, on='Plate_Map_Name', how='left')
df.fillna('ORF', inplace=True)

In [5]:
# Print the number of plates of each Type
df.Type.value_counts()

ORF           237
Target2        22
Untreated      14
Bortezomib      4
Target1         4
Name: Type, dtype: int64

In [6]:
# Add anomaly type to plates
for anomaly in anomalous_plates:
    for batch in anomalous_plates[anomaly]:
        if anomalous_plates[anomaly][batch] == "all":
            df.loc[df.Batch==batch, "Anomaly"] = anomaly
        else:
            for plate in anomalous_plates[anomaly][batch]:
                df.loc[df.Assay_Plate_Barcode==plate, "Anomaly"] = anomaly

In [7]:
# Print the number of plates with each type of Anomaly
df.Anomaly.value_counts()

none            178
dye              94
infection         6
segmentation      2
other             1
Name: Anomaly, dtype: int64

In [8]:
# Add seeding density
for density in seeding_density:
    for batch in seeding_density[density]:
        plate = seeding_density[density][batch]
        df.loc[df.Assay_Plate_Barcode==plate, "Density"] = density

In [9]:
# Add Polybrene status
orf_polybrene = pd.DataFrame({'Type': 'ORF', 'Polybrene': 'Present'}, index=[0])
df = df.merge(orf_polybrene, on='Type',how='left')
df.loc[df.Assay_Plate_Barcode=="BR00121540", "Polybrene"] = "Present"
df.loc[df.Assay_Plate_Barcode=="BR00121439", "Polybrene"] = "Present"
df.fillna("Absent", inplace=True)

In [10]:
# Sort rows and columns
df = df.sort_values(by=['Batch','Plate_Map_Name','Assay_Plate_Barcode'], ignore_index=True)[col_order]


In [11]:
df.to_csv('output/experiment-metadata.tsv', index=False, sep='\t')