In [1]:
# import libraries

import glob
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

# type in the information in this cell

In [26]:
# Input information in this cell

## Cell name
SelectedCellamong4CellLine = 'T47D'

## Set number
SetNumber = 'set7'

## change directory

os.chdir(r"O:\ImStor\sorger\data\Cytell\Kyun\cycIF_analysis_python_v2\set7_4CL_d234_ERcycD\T47D_p6_ERcycD")
print(os.getcwd())


## cycle number - how many rounds in cycIF?
TotalCycleNumber = 2     #cycIF cycle number

O:\ImStor\sorger\data\Cytell\Kyun\cycIF_analysis_python_v2\set7_4CL_d234_ERcycD\T47D_p6_ERcycD


# run

In [27]:
# Load all Nuc*.txt files into a single dataframe.
df_Nuc = pd.concat([
    pd.read_table(path, index_col=0)
    for path in glob.glob('Results-Nuc-*.txt')
])


# Parse the Label column into its components, in a new dataframe.
label_Nuc = df_Nuc.Label.str.extract(r'([A-Z])(\d\d)_fld(\d+):(\d+)-(\d+):(.*)',
                                  expand=True)
label_Nuc.columns = ['Row', 'Column', 'Field', 'ObjNum', 'Unknown', 'Channel']
label_Nuc['Well'] = label_Nuc['Row'] + label_Nuc['Column']

for f in  'Column', 'Field', 'ObjNum', 'Unknown':
    label_Nuc[f] = label_Nuc[f].astype(int)

# Append these new columns to the data.
dfl_Nuc = pd.concat([label_Nuc, df_Nuc], axis=1)

# Load all Cyto*.txt files into a single dataframe.
df_Cyto = pd.concat([
    pd.read_table(path, index_col=0)
    for path in glob.glob('Results-Cyto-*.txt')
])


# Parse the Label column into its components, in a new dataframe.
label_Cyto = df_Cyto.Label.str.extract(r'([A-Z])(\d\d)_fld(\d+):(\d+)-(\d+):(.*)',
                                  expand=True)
label_Cyto.columns = ['Row', 'Column', 'Field', 'ObjNum', 'Unknown', 'Channel']
label_Cyto['Well'] = label_Cyto['Row'] + label_Cyto['Column']

for f in 'Column', 'Field', 'ObjNum', 'Unknown':
    label_Cyto[f] = label_Cyto[f].astype(int)

# Append these new columns to the data.
dfl_Cyto = pd.concat([label_Cyto, df_Cyto], axis=1)

# Choose columns that are informative
## Nuclear intensity Summary
Nuc_Sum = dfl_Nuc.loc[:,['Row','Column','Field','ObjNum','Channel','Well','Label','Area','Mean']]

Nuc_Sum['Content'] = dfl_Nuc['Area']*dfl_Nuc['Mean']

Nuc_cID = Nuc_Sum.Label.str.extract(r'([A-Z]\d+_[f][l][d]\d+:\d+)(.*)',
                                  expand=True)

Nuc_cID.columns = ['CellID', '2ndPart']
Nuc_Sum.index = Nuc_cID['CellID']


## Cytoplasmic intensity Summary
Cyto_Sum = dfl_Cyto.loc[:,['Row','Column','Field','ObjNum','Channel','Well','Label','Area','Mean']]

Cyto_Sum['Content'] = dfl_Cyto['Area']*dfl_Cyto['Mean']

Cyto_cID = Cyto_Sum.Label.str.extract(r'([A-Z]\d+_[f][l][d]\d+:\d+)(.*)',
                                  expand=True)

Cyto_cID.columns = ['CellID', '2ndPart']
Cyto_Sum.index = Cyto_cID['CellID']

In [28]:
## Merge dataframes (Nuc_Sum and Cyto_Sum) and metadata (two metadata files - Well and Staining)
# change directory (move up)
os.chdir('..')
#print(os.getcwd())

# obtain metadata files (Well and Staining)

file1 = glob.glob('metadata_set*_Well.csv')
file2 = glob.glob('metadata_set*_Staining.csv')

MetadataWell = pd.read_csv(str(file1)[2:-2], index_col=False)
MetadataStaining = pd.read_csv(str(file2)[2:-2], index_col=False)


MetadataStaining['IDforMerge'] = MetadataStaining['Top/Bottom'] + ':' + MetadataStaining['Channel']


# Combine Nuc_Sum and Cyto_Sum with metadata

Nuc_temp = Nuc_Sum.merge(MetadataWell[['Top/Bottom', 'Treatment', 'Time']], 
                         left_on=Nuc_Sum['Well'], 
                         right_on=MetadataWell['Well'], 
                         how='outer')

Cyto_temp = Cyto_Sum.merge(MetadataWell[['Top/Bottom', 'Treatment', 'Time']], 
                           left_on=Cyto_Sum['Well'], 
                           right_on=MetadataWell['Well'], 
                           how='outer')

# add "IDforMerge" column to use for merging, and merge

Nuc_temp['IDforMerge'] = Nuc_temp['Top/Bottom'] + ':' + Nuc_temp['Channel']
Cyto_temp['IDforMerge'] = Cyto_temp['Top/Bottom'] + ':' + Cyto_temp['Channel']

Nuc_Merged = Nuc_temp.merge(MetadataStaining[['Staining']], left_on=Nuc_temp['IDforMerge'], right_on=MetadataStaining['IDforMerge'], how='outer')
Cyto_Merged = Cyto_temp.merge(MetadataStaining[['Staining']], left_on=Cyto_temp['IDforMerge'], right_on=MetadataStaining['IDforMerge'], how='outer')

# add "Compartment" Column
Nuc_Merged['Compartment'] ="Nucleus"
Cyto_Merged['Compartment'] ="Cytoplasm"

# Generate df_all, combining Nuc and Cyto info 

df1 = pd.concat([Nuc_Merged, Cyto_Merged])

df1['Cell'] = SelectedCellamong4CellLine

## save this first file if you want to check it out
# firstfilename = SelectedCellamong4CellLine + '_' + SetNumber + '.csv'
# df_all.to_csv(firstfilename)

# Single cell dataframe

In [29]:
# Design - dataframe of single cell (SCdesign)

df1_cID = df1.Label.str.extract(r'([A-Z]\d+_[f][l][d]\d+:\d+)(.*)',
                                  expand=True)

df1_cID.columns = ['CellID', '2ndPart']

df1.index = df1_cID['CellID']

SCdesign = df1.loc[df1['Channel']=='DAPI-0001'][['Row', 'Column', 'Field', 'ObjNum', 'Well',
       'Label', 'Content', 'Top/Bottom', 'Treatment', 'Time', 'Compartment',
       'IDforMerge', 'Cell',
       ]]

# make 'CellID' column
SCdesign['CellID'] = SCdesign.index
df1['CellID'] = df1.index

# Fill in the antibody information

Channel = pd.Series(['DAPI-0001', 'FITC-0001', 'Cy3-0001', 'Cy5-0001',
                     'DAPI-0002', 'FITC-0002', 'Cy3-0002', 'Cy5-0002',
                     'DAPI-0003', 'FITC-0003', 'Cy3-0003', 'Cy5-0003',
                     'DAPI-0004', 'FITC-0004', 'Cy3-0004', 'Cy5-0004',
                     ])


# Make dataframe for each fluorochrome
Group1 = df1.loc[(df1['Channel']==Channel[0] )][['CellID','Mean','Compartment']]
Group2 = df1.loc[(df1['Channel']==Channel[1] )][['CellID','Mean','Compartment']]
Group3 = df1.loc[(df1['Channel']==Channel[2] )][['CellID','Mean','Compartment']]
Group4 = df1.loc[(df1['Channel']==Channel[3] )][['CellID','Mean','Compartment']]

Group5 = df1.loc[(df1['Channel']==Channel[4] )][['CellID','Mean','Compartment']]
Group6 = df1.loc[(df1['Channel']==Channel[5] )][['CellID','Mean','Compartment']]
Group7 = df1.loc[(df1['Channel']==Channel[6] )][['CellID','Mean','Compartment']]
Group8 = df1.loc[(df1['Channel']==Channel[7] )][['CellID','Mean','Compartment']]

if TotalCycleNumber >= 3:
    Group9 = df1.loc[(df1['Channel']==Channel[8] )][['CellID','Mean','Compartment']]
    Group10 = df1.loc[(df1['Channel']==Channel[9] )][['CellID','Mean','Compartment']]
    Group11 = df1.loc[(df1['Channel']==Channel[10] )][['CellID','Mean','Compartment']]
    Group12 = df1.loc[(df1['Channel']==Channel[11] )][['CellID','Mean','Compartment']]
    
    if TotalCycleNumber >= 4:
        Group13 = df1.loc[(df1['Channel']==Channel[12] )][['CellID','Mean','Compartment']]
        Group14 = df1.loc[(df1['Channel']==Channel[13] )][['CellID','Mean','Compartment']]
        Group15 = df1.loc[(df1['Channel']==Channel[14] )][['CellID','Mean','Compartment']]
        Group16 = df1.loc[(df1['Channel']==Channel[15] )][['CellID','Mean','Compartment']]

# combine them horizontally

if TotalCycleNumber == 2:
    Group_all = pd.concat([Group1, Group2, Group3, Group4, Group5, Group6, Group7, Group8], axis=1)
elif TotalCycleNumber == 3:
    Group_all = pd.concat([Group1, Group2, Group3, Group4, Group5, Group6, Group7, Group8, Group9, Group10, Group11, Group12], axis=1)
elif TotalCycleNumber == 4:
    Group_all = pd.concat([Group1, Group2, Group3, Group4, Group5, Group6, Group7, Group8, Group9, Group10, Group11, Group12, Group13, Group14, Group15, Group16], axis=1)
   

In [30]:
# Intensity_all : select only intensity values

Intensity_all = Group_all.iloc[:,1::3]

Intensity_all.columns = list(Channel[:4*TotalCycleNumber])
Intensity_all['CellID'] = Intensity_all.index

# Merge SCdesign and Intensity
df_SC_Merged = pd.concat([SCdesign, Intensity_all], axis=1)


In [31]:
# put Nuc and Cyto info horizontally

Group_Nuc = df_SC_Merged.loc[(df_SC_Merged['Compartment']=='Nucleus')]
Group_Cyto = df_SC_Merged.loc[(df_SC_Merged['Compartment']=='Cytoplasm')]

List_column_Nuc = list(['Row', 'Column', 'Field', 'ObjNum', 'Well', 'Label', 'Content',
       'Top/Bottom', 'Treatment', 'Time', 'Cell',
       'CellID']) + list(Channel[:4*TotalCycleNumber])

List_column_Cyto = list(Channel[:4*TotalCycleNumber]) + list(['CellID'])

Group_Nuc_short = Group_Nuc[List_column_Nuc]
Group_Cyto_short = Group_Cyto[List_column_Cyto]

Group_both = pd.concat([Group_Nuc_short, Group_Cyto_short], axis=1)

# divide dataframe if antibodies on Row BCD and Row EFG are different (set3-set7)

df_BCD = Group_both[Group_both['Top/Bottom']=='BCD']
df_EFG = Group_both[Group_both['Top/Bottom']=='EFG']

print(os.getcwd())

O:\ImStor\sorger\data\Cytell\Kyun\cycIF_analysis_python_v2\set7_4CL_d234_ERcycD


In [32]:
# assign the antibody name to each fluorophore

fileN = 'metadata_' + SetNumber + '_Staining.csv'
meta_staining = pd.read_csv(fileN)

# label for BCD and EFG
Label_BCD_1 = meta_staining[meta_staining['Top/Bottom']=='BCD']['Staining'] + '_N'
Label_BCD_2 = meta_staining[meta_staining['Top/Bottom']=='BCD']['Staining'] + '_C'

Label_EFG_1 = meta_staining[meta_staining['Top/Bottom']=='EFG']['Staining'] + '_N'
Label_EFG_2 = meta_staining[meta_staining['Top/Bottom']=='EFG']['Staining'] + '_C'

Label_BCD = list(df_BCD.columns[:13]) + list(Label_BCD_1) + list(Label_BCD_2) + list(df_BCD.columns[-2:])
Label_EFG = list(df_EFG.columns[:13]) + list(Label_EFG_1) + list(Label_EFG_2) + list(df_EFG.columns[-2:])

df_BCD.columns = Label_BCD
df_EFG.columns = Label_EFG

In [33]:
#save

filename_BCD = 'SingleCell_' + SetNumber + '_' + SelectedCellamong4CellLine + '_BCD.csv'
filename_EFG = 'SingleCell_' + SetNumber + '_' + SelectedCellamong4CellLine + '_EFG.csv'

df_BCD.to_csv(filename_BCD)
df_EFG.to_csv(filename_EFG)