In [14]:
import os
import glob
import pandas as pd

In [15]:
path =r'/Users/michaelpiacentino/Drive/git/data/smpd3/20180616_CRISPR_TCFLef_sections/csvs/'
full_df = pd.DataFrame()
list_ = []

# For loop to bring in files and concatenate them into a single dataframe
for file_ in glob.glob(path + "/*.csv"):
    df = pd.read_csv(file_)
    # Determine Image name from file name
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]
    # Split values in ROI label and Image name columns
    df['Fluor'], df['ROI'] = zip(*df['Label'].map(lambda x: x.split(':')))
    df['Target'], df['Method'], df['Embryo'], df['Section'] = zip(*df['Image'].map(lambda x: x.split('_')))
    
    # Replace background ROI names
        # NOTE: I have updated the Fiji macro ('FluorIntensity_2Channel.ijm') to name all background ROIs as 'background',
        # so this step will be unnecessary with freshly collected data
    df.replace(to_replace=['back1a', 'back1b', 'back1c', 'back2a', 'back2b', 'back2c'],
                            value=['background', 'background', 'background', 'background', 'background', 'background'], 
                            inplace=True)
    list_.append(df)

full_df = pd.concat(list_)
full_df.head(13)

Unnamed: 0,Unnamed: 1,Label,Area,Mean,IntDen,RawIntDen,Image,Fluor,ROI,Target,Method,Embryo,Section
0,1,TCFLef:back1a,34.937,13.963,487.825,9467.0,SMPD3_CRISPR_Emb1_sec1,TCFLef,background,SMPD3,CRISPR,Emb1,sec1
1,2,TCFLef:back1b,37.565,13.726,515.599,10006.0,SMPD3_CRISPR_Emb1_sec1,TCFLef,background,SMPD3,CRISPR,Emb1,sec1
2,3,TCFLef:back1c,42.202,13.664,576.661,11191.0,SMPD3_CRISPR_Emb1_sec1,TCFLef,background,SMPD3,CRISPR,Emb1,sec1
3,4,TCFLef:Cntl,7227.2,130.882,945909.299,18356834.0,SMPD3_CRISPR_Emb1_sec1,TCFLef,Cntl,SMPD3,CRISPR,Emb1,sec1
4,5,TCFLef:Expt,7117.289,126.978,903740.336,17538480.0,SMPD3_CRISPR_Emb1_sec1,TCFLef,Expt,SMPD3,CRISPR,Emb1,sec1
5,6,pCIG:back2a,34.937,27.063,945.506,18349.0,SMPD3_CRISPR_Emb1_sec1,pCIG,background,SMPD3,CRISPR,Emb1,sec1
6,7,pCIG:back2b,37.565,26.597,999.096,19389.0,SMPD3_CRISPR_Emb1_sec1,pCIG,background,SMPD3,CRISPR,Emb1,sec1
7,8,pCIG:back2c,42.202,26.916,1135.905,22044.0,SMPD3_CRISPR_Emb1_sec1,pCIG,background,SMPD3,CRISPR,Emb1,sec1
8,9,pCIG:Cntl,7227.2,156.637,1132045.187,21969089.0,SMPD3_CRISPR_Emb1_sec1,pCIG,Cntl,SMPD3,CRISPR,Emb1,sec1
9,10,pCIG:Expt,7117.289,213.474,1519355.135,29485438.0,SMPD3_CRISPR_Emb1_sec1,pCIG,Expt,SMPD3,CRISPR,Emb1,sec1


In [16]:
# Mean background values and group
grouped_means = (full_df.groupby(['Embryo', 'Fluor', 'ROI', 'Section'])['Area', 'Mean', 'IntDen']).mean()
grouped_means.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Area,Mean,IntDen
Embryo,Fluor,ROI,Section,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Emb1,TCFLef,Cntl,sec1,7227.2,130.882,945909.3
Emb1,TCFLef,Cntl,sec2,6666.101,123.361,822335.5
Emb1,TCFLef,Cntl,sec3,3883.174,283.885,1102374.0
Emb1,TCFLef,Expt,sec1,7117.289,126.978,903740.3
Emb1,TCFLef,Expt,sec2,7322.013,103.656,758971.7
Emb1,TCFLef,Expt,sec3,3481.093,147.838,514636.3
Emb1,TCFLef,background,sec1,38.234667,13.784333,526.695
Emb1,TCFLef,background,sec2,44.469667,14.191,631.5737
Emb1,TCFLef,background,sec3,55.135667,13.598,749.5923
Emb1,pCIG,Cntl,sec1,7227.2,156.637,1132045.0


In [17]:
# Determine CTCF values = ROI IntDen - (background mean * ROI area)

# Calculate background (background mean * ROI area)
background_corr_cntl = (grouped_means.xs('background', level='ROI')['Mean'] 
                * grouped_means.xs('Cntl', level='ROI')['Area'])
background_corr_expt = (grouped_means.xs('background', level='ROI')['Mean'] 
                * grouped_means.xs('Expt', level='ROI')['Area'])

# Slice out only Cntl or Expt values in IntDen
intdens_cntl = grouped_means.xs('Cntl', level='ROI')['IntDen'] 
intdens_expt = grouped_means.xs('Expt', level='ROI')['IntDen'] 

# Subtract background from IntDens to determine CTCF and concatenate into single dataframe
sub_cntl = pd.DataFrame(intdens_cntl - background_corr_cntl)
sub_expt = pd.DataFrame(intdens_expt - background_corr_expt)
full_ctcf = pd.concat([sub_cntl, sub_expt], keys = ['Cntl', 'Expt'])
full_ctcf.columns = ['CTCF']

In [18]:
# Pull out TCFLef and pCIG values
ctcf_tcflef = full_ctcf.xs('TCFLef', level='Fluor')['CTCF'] 
ctcf_pcig = full_ctcf.xs('pCIG', level='Fluor')['CTCF'] 

# Normalize for electroporation efficiency by determining TCFLef/pCIG
electroporation_norm = pd.DataFrame(ctcf_tcflef / ctcf_pcig)
electroporation_norm.columns = ['CTCF']
electroporation_norm.index.names = ['Treatment', 'Embryo', 'Selection']

In [19]:
# Average sections grouped by embryos before generating Expt/Cntl ratio
averaged_sections = electroporation_norm.groupby(['Treatment','Embryo']).mean()

# Pull out Cntl and Expt CTCFs
ctcf_cntl = averaged_sections.xs('Cntl', level='Treatment')['CTCF'] 
ctcf_expt = averaged_sections.xs('Expt', level='Treatment')['CTCF'] 

# Generate ratios as Expt/Cntl
ratios_sections = pd.DataFrame(ctcf_expt / ctcf_cntl)
ratios_sections.columns = ['Expt/Cntl CTCF']

In [20]:
# Combine processed values into single dataframe and output as csv file 'Results.csv'
ctcf_cntl = pd.DataFrame(ctcf_cntl)
ctcf_cntl.columns = ['Cntl CTCF']
ctcf_cntl = ctcf_cntl.groupby('Embryo').mean()
ctcf_expt = pd.DataFrame(ctcf_expt)
ctcf_expt.columns = ['Expt CTCF']
ctcf_expt = ctcf_expt.groupby('Embryo').mean()
results = pd.concat([ctcf_cntl, ctcf_expt, ratios_sections], axis=1, sort=True)
results.to_csv('Results.csv')