In [1]:
import numpy as np
import glob
import pandas as pd
import matplotlib.pyplot as plt


def fdr(p_vals):

    from scipy.stats import rankdata
    ranked_p_values = rankdata(p_vals)
    fdr = p_vals * len(p_vals) / ranked_p_values
    fdr[fdr > 1] = 1

    return fdr

## Proliferation and differentiation data

In [2]:
files = glob.glob('../../data/screen_summary/stats/gene_avg/*')
files

['../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_tracketchcombined_all_gene_pvalues.csv',
 '../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_ECM_gene_pvalues.csv',
 '../../data/screen_summary/stats/gene_avg/20220412_screen_log2fold_diffs_growth_gene_pvalues.csv',
 '../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_ECMcollagen_gene_pvalues.csv',
 '../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_ECMfibrin_gene_pvalues.csv',
 '../../data/screen_summary/stats/gene_avg/20220412_screen_log2fold_diffs_differentiation_gene_pvalues.csv',
 '../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_tracketchcombined_10grad_gene_pvalues.csv',
 '../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_tracketchcombined_Nograd_gene_pvalues.csv',
 '../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_migration_combined_gene_pvalues.csv']

In [96]:
df = pd.read_csv( '../../data/screen_summary/stats/gene_avg/20220412_screen_log2fold_diffs_growth_gene_pvalues.csv')
df = df[~df.gene.str.contains('CONTROL')]
df['fdr'] = fdr(df['pvalue'])
print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.05]))

df_neg = df[df.log2fold_diff_mean <0]
print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
df_pos = df[df.log2fold_diff_mean >0]
print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))

Proliferation, number sig. genes:  2232
Proliferation, number sig. genes, negative fold change:  2170
Proliferation, number sig. genes, positive fold change:  62


In [97]:
df_prol = df

In [198]:
df = pd.read_csv('../../data/screen_summary/stats/gene_avg/20220412_screen_log2fold_diffs_differentiation_gene_pvalues.csv')
df = df[~df.gene.str.contains('CONTROL')]
df['fdr'] = fdr(df['pvalue'])
print('Differentiation, number sig. genes: ',len(df[df['fdr']<=0.05]))

df_neg = df[df.log2fold_diff_mean <0]
print('Differentiation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
df_pos = df[df.log2fold_diff_mean >0]
print('Differentiation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))


Differentiation, number sig. genes:  1726
Differentiation, number sig. genes, negative fold change:  995
Differentiation, number sig. genes, positive fold change:  731


In [199]:
df[df.gene.str.contains('RAR')]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
10772,10772,NRARP,0.067504,differentiation,0.6542048,0.913557
13277,13277,RARA,0.408919,differentiation,9.716167e-07,6e-05
13278,13278,RARB,-0.051569,differentiation,0.1068108,0.427469
13279,13279,RARG,-0.002617,differentiation,0.4364749,0.803713
13280,13280,RARRES1,0.034907,differentiation,0.9045335,0.987514
13281,13281,RARRES2,-0.013173,differentiation,0.3354497,0.732786
13282,13282,RARRES3,-0.012591,differentiation,0.3402593,0.737072
13283,13283,RARS,0.006885,differentiation,0.5399233,0.865434
13284,13284,RARS2,0.28277,differentiation,0.0001521552,0.003313


In [200]:
df[df.gene.str.contains('ATIC')]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
1196,1196,ATIC,0.427558,differentiation,2.91485e-07,2.7e-05


In [79]:
df[df['gene']=='SPI1']

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
15414,15414,SPI1,0.383008,differentiation,0.000306,0.00588


In [99]:
df_comb = pd.merge(df_prol,df, on = 'gene')

In [100]:
df_comb = df_comb[['gene', 'log2fold_diff_mean_x', 'exp_x', 'pvalue_x',
       'fdr_x', 'log2fold_diff_mean_y', 'exp_y', 'pvalue_y',
       'fdr_y']]

df_comb.columns = ['gene', 'log2fold_prolif', 'exp_x', 'pvalue_x',
       'fdr_x', 'log2fold_differ.', 'exp_y', 'pvalue_y',
       'fdr_y']

In [125]:
df_comb[df_comb.gene =='NPRL3']

Unnamed: 0,gene,log2fold_prolif,exp_x,pvalue_x,fdr_x,log2fold_differ.,exp_y,pvalue_y,fdr_y
10730,NPRL3,-0.46493,growth,6.2e-05,0.000876,-0.438434,differentiation,0.000236,0.004753


## Cell Migration data

### All track-etch exps

In [3]:
df = pd.read_csv('../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_tracketchcombined_all_gene_pvalues.csv')
df = df[~df.gene.str.contains('CONTROL')]
df['fdr'] = fdr(df['pvalue'])
print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.05]))

df_neg = df[df.log2fold_diff_mean <0]
print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
df_pos = df[df.log2fold_diff_mean >0]
print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))

Proliferation, number sig. genes:  482
Proliferation, number sig. genes, negative fold change:  446
Proliferation, number sig. genes, positive fold change:  36


In [6]:
df[df.gene.str.contains('ARHG')].sort_values('fdr')

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
940,940,ARHGAP30,-1.812978,tracketch_all_data,9.716167e-08,0.000097
937,937,ARHGAP27,-0.629082,tracketch_all_data,2.322164e-05,0.002850
982,982,ARHGEF6,0.625246,tracketch_all_data,9.609289e-05,0.007930
959,959,ARHGEF1,-0.462608,tracketch_all_data,6.189198e-04,0.029838
951,951,ARHGAP45,-0.413298,tracketch_all_data,2.136779e-03,0.070720
...,...,...,...,...,...,...
963,963,ARHGEF12,-0.016707,tracketch_all_data,9.316956e-01,0.987226
924,924,ARHGAP11A,-0.013490,tracketch_all_data,9.476055e-01,0.988456
960,960,ARHGEF10,0.004868,tracketch_all_data,9.473169e-01,0.988646
970,970,ARHGEF25,0.000586,tracketch_all_data,9.707556e-01,0.995623


### Chemotaxis (track-etch, 10% FBS gradient)

In [159]:
df_10 = pd.read_csv('../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_tracketchcombined_10grad_gene_pvalues.csv')
df_10 = df_10[~df_10.gene.str.contains('CONTROL')]
df_10[['pvalue']] = df_10[['pvalue']].fillna(value=df_10.pvalue.min())
df_10['fdr'] = fdr(df_10['pvalue'])
print('Proliferation, number sig. genes: ',len(df_10[df_10['fdr']<=0.05]))

# df_neg = df[df.log2fold_diff_mean <0]
# print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
# df_pos = df[df.log2fold_diff_mean >0]
# print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))

Proliferation, number sig. genes:  419


In [50]:
df[df.gene.str.contains('RAR')]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
10772,10772,NRARP,-0.015613,tracketch_all_10grad_data,0.952991,0.989757
13277,13277,RARA,0.664153,tracketch_all_10grad_data,0.000402,0.02743
13278,13278,RARB,-0.03521,tracketch_all_10grad_data,0.856839,0.96344
13279,13279,RARG,-0.091724,tracketch_all_10grad_data,0.588173,0.870014
13280,13280,RARRES1,-0.07056,tracketch_all_10grad_data,0.68324,0.906478
13281,13281,RARRES2,-0.186926,tracketch_all_10grad_data,0.244478,0.661912
13282,13282,RARRES3,0.040341,tracketch_all_10grad_data,0.775262,0.938919
13283,13283,RARS,0.171882,tracketch_all_10grad_data,0.290355,0.702667
13284,13284,RARS2,-0.554235,tracketch_all_10grad_data,0.000298,0.022473


In [133]:
df[df.gene.str.contains('TSC1')]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
17145,17145,TSC1,0.139567,tracketch_all_10grad_data,0.383957,0.765969


### Chemokinesis (track-etch, uniform 10% FBS)

In [160]:
df_0 = pd.read_csv('../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_tracketchcombined_Nograd_gene_pvalues.csv')
df_0 = df_0[~df_0.gene.str.contains('CONTROL')]
df_0[['pvalue']] = df_0[['pvalue']].fillna(value=df_0.pvalue.min())
df_0['fdr'] = fdr(df_0['pvalue'])
print('Proliferation, number sig. genes: ',len(df_0[df_0['fdr']<=0.25]))

# df_neg = df[df.log2fold_diff_mean <0]
# print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
# df_pos = df[df.log2fold_diff_mean >0]
# print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))

Proliferation, number sig. genes:  430


In [165]:
chemotaxis_genes = df_10[df_10['fdr']<=0.05][~df_10.gene.isin(df_0[df_0['fdr']<=0.6].gene.unique())]
print(len(chemotaxis_genes))

179


  """Entry point for launching an IPython kernel.


In [168]:
chemotaxis_genes.sort_values('log2fold_diff_mean')[:50]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
15942,15942,TAF4,-1.008775,tracketch_all_10grad_data,9.716167e-08,0.000102
15086,15086,SMARCE1,-0.985788,tracketch_all_10grad_data,6.801317e-06,0.001607
12769,12769,PRPF38B,-0.980708,tracketch_all_10grad_data,4.858083e-07,0.000193
12596,12596,PPP4R2,-0.945319,tracketch_all_10grad_data,4.459721e-05,0.006042
12101,12101,PIKFYVE,-0.896876,tracketch_all_10grad_data,2.234718e-06,0.000626
4296,4296,DHX30,-0.890527,tracketch_all_10grad_data,7.772934e-07,0.000265
9723,9723,MOB4,-0.888058,tracketch_all_10grad_data,9.716167e-08,0.000102
1220,1220,ATP1A1,-0.871538,tracketch_all_10grad_data,1.029914e-05,0.002139
17790,17790,VPS36,-0.865103,tracketch_all_10grad_data,1.068778e-06,0.000348
13998,13998,RTN4IP1,-0.808387,tracketch_all_10grad_data,8.938874e-06,0.001964


In [167]:
chemotaxis_genes.sort_values('log2fold_diff_mean')[-50:]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
16432,16432,TM2D2,-0.534338,tracketch_all_10grad_data,0.000539,0.033534
10718,10718,NPLOC4,-0.534255,tracketch_all_10grad_data,0.000521,0.032625
5723,5723,FIGNL1,-0.532703,tracketch_all_10grad_data,0.000732,0.038869
9013,9013,LSM1,-0.532614,tracketch_all_10grad_data,0.000831,0.04242
4632,4632,DUSP3,-0.532091,tracketch_all_10grad_data,0.000828,0.042501
18759,18759,ZNF764,-0.530337,tracketch_all_10grad_data,0.00073,0.03888
16762,16762,TMTC1,-0.529873,tracketch_all_10grad_data,0.000656,0.036909
18889,18889,ZWINT,-0.52961,tracketch_all_10grad_data,0.000578,0.034447
9445,9445,MED16,-0.529519,tracketch_all_10grad_data,0.000569,0.034359
3476,3476,COLEC12,-0.528487,tracketch_all_10grad_data,0.000581,0.034546


In [176]:
df_10[df_10.gene.str.contains('CORO1A')].sort_values('fdr')

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
3523,3523,CORO1A,-0.152704,tracketch_all_10grad_data,0.344937,0.742102


In [188]:
from scipy import stats

print('ECM, Pearson\ncorrelation\n' + r'$\rho$ = '
           + str(np.round(stats.pearsonr(df_10.sort_values('log2fold_diff_mean').log2fold_diff_mean.values,
                                         df_0.sort_values('log2fold_diff_mean').log2fold_diff_mean.values), 5)))
print(pearson_ecm_speed, pearson_screen_3D)

ECM, Pearson
correlation
$\rho$ = [0.99609 0.     ]


NameError: name 'pearson_ecm_speed' is not defined

### Amoeboid, 3D

In [23]:
df = pd.read_csv('../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_ECM_gene_pvalues.csv')
df = df[~df.gene.str.contains('CONTROL')]
df['fdr'] = fdr(df['pvalue'])
print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.05]))

df_neg = df[df.log2fold_diff_mean <0]
print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
df_pos = df[df.log2fold_diff_mean >0]
print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))

Proliferation, number sig. genes:  2
Proliferation, number sig. genes, negative fold change:  2
Proliferation, number sig. genes, positive fold change:  0


In [142]:
df[df['fdr']<=0.05]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
5571,5571,FMNL1,-2.192067,ECM_all_data,1.80642e-07,0.003295
8231,8231,LAMTOR2,-1.424793,ECM_all_data,4.516051e-06,0.041193


In [143]:
df[df.gene == 'CORO1A']

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
3402,3402,CORO1A,-0.746941,ECM_all_data,0.007473,1.0


In [24]:
df = pd.read_csv('../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_ECM_gene_pvalues.csv')
df = df[~df.gene.str.contains('CONTROL')]
df['fdr'] = fdr(df['pvalue'])
print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.25]))

df_neg = df[df.log2fold_diff_mean <0]
print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.25]))
df_pos = df[df.log2fold_diff_mean >0]
print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.25]))

Proliferation, number sig. genes:  2
Proliferation, number sig. genes, negative fold change:  2
Proliferation, number sig. genes, positive fold change:  0


In [150]:
df[df['fdr']<=0.5].sort_values('fdr')[:50]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
5571,5571,FMNL1,-2.192067,ECM_all_data,1.80642e-07,0.003295
8231,8231,LAMTOR2,-1.424793,ECM_all_data,4.516051e-06,0.041193


In [57]:
df[df.gene.str.contains('RAR')]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
10420,10420,NRARP,-0.09099,ECM_all_data,0.793693,0.989499
12830,12830,RARB,0.186408,ECM_all_data,0.38697,0.971311
12831,12831,RARG,0.174183,ECM_all_data,0.315758,0.967804
12832,12832,RARRES1,0.096121,ECM_all_data,0.542742,0.980515
12833,12833,RARRES2,-0.161548,ECM_all_data,0.536136,0.979346
12834,12834,RARRES3,-0.389926,ECM_all_data,0.122807,0.993074
12835,12835,RARS,0.316832,ECM_all_data,0.081547,1.0
12836,12836,RARS2,-0.337051,ECM_all_data,0.178487,0.99212


### ### Amoeboid, 3D; fibrin data 

In [12]:
df = pd.read_csv('../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_ECMfibrin_gene_pvalues.csv')
df = df[~df.gene.str.contains('CONTROL')]
df['fdr'] = fdr(df['pvalue'])
print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.05]))

df_neg = df[df.log2fold_diff_mean <0]
print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
df_pos = df[df.log2fold_diff_mean >0]
print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))

Proliferation, number sig. genes:  2
Proliferation, number sig. genes, negative fold change:  2
Proliferation, number sig. genes, positive fold change:  0


In [13]:
df[df['fdr']<=0.05]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
917,917,ARHGAP30,-2.036042,ECM_fibrin,2.167704e-06,0.019773
5571,5571,FMNL1,-2.333011,ECM_fibrin,5.419261e-07,0.009886


In [15]:
df[df['gene']=='CORO1A']

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
3402,3402,CORO1A,-1.181121,ECM_fibrin,0.001327,0.807166


In [16]:
df = pd.read_csv('../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_ECMfibrin_gene_pvalues.csv')
df = df[~df.gene.str.contains('CONTROL')]
df['fdr'] = fdr(df['pvalue'])
print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.25]))

df_neg = df[df.log2fold_diff_mean <0]
print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.25]))
df_pos = df[df.log2fold_diff_mean >0]
print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.25]))

Proliferation, number sig. genes:  5
Proliferation, number sig. genes, negative fold change:  3
Proliferation, number sig. genes, positive fold change:  2


In [17]:
df[df['fdr']<=0.25]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
141,141,ACIN1,1.253362,ECM_fibrin,1.047724e-05,0.063712
917,917,ARHGAP30,-2.036042,ECM_fibrin,2.167704e-06,0.019773
4727,4727,EMC4,-2.837661,ECM_fibrin,2.444689e-05,0.111496
5571,5571,FMNL1,-2.333011,ECM_fibrin,5.419261e-07,0.009886
12592,12592,PTPRN2,0.940105,ECM_fibrin,4.552179e-05,0.166091


### ### Amoeboid, 3D; Collagen data 

In [34]:
df = pd.read_csv('../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_ECMcollagen_gene_pvalues.csv')
df = df[~df.gene.str.contains('CONTROL')]
df['fdr'] = fdr(df['pvalue'])
print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.05]))

df_neg = df[df.log2fold_diff_mean <0]
print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
df_pos = df[df.log2fold_diff_mean >0]
print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))

Proliferation, number sig. genes:  0
Proliferation, number sig. genes, negative fold change:  0
Proliferation, number sig. genes, positive fold change:  0


In [37]:
df = pd.read_csv('../../data/screen_summary/stats/gene_avg/20211222_screen_log2fold_diffs_ECMcollagen_gene_pvalues.csv')
df = df[~df.gene.str.contains('CONTROL')]
df['fdr'] = fdr(df['pvalue'])
print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.25]))

df_neg = df[df.log2fold_diff_mean <0]
print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.25]))
df_pos = df[df.log2fold_diff_mean >0]
print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.25]))

Proliferation, number sig. genes:  0
Proliferation, number sig. genes, negative fold change:  0
Proliferation, number sig. genes, positive fold change:  0


In [38]:
df[df.gene == 'FMNL1']

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
5571,5571,FMNL1,-2.051123,ECM_collagen,2.2e-05,0.39875


### attempts with 'fibrin_collagengood' files

In [31]:
df = pd.read_csv('../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECM_fibrin_goodcollagen_gene_pvalues.csv')
df = df[~df.gene.str.contains('CONTROL')]
df['fdr'] = fdr(df['pvalue'])
print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.05]))

df_neg = df[df.log2fold_diff_mean <0]
print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
df_pos = df[df.log2fold_diff_mean >0]
print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))

Proliferation, number sig. genes:  0
Proliferation, number sig. genes, negative fold change:  0
Proliferation, number sig. genes, positive fold change:  0


In [32]:
df = pd.read_csv('../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECM_fibrin_goodcollagen_gene_pvalues.csv')
df = df[~df.gene.str.contains('CONTROL')]
df['fdr'] = fdr(df['pvalue'])
print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.25]))

df_neg = df[df.log2fold_diff_mean <0]
print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.25]))
df_pos = df[df.log2fold_diff_mean >0]
print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.25]))

Proliferation, number sig. genes:  0
Proliferation, number sig. genes, negative fold change:  0
Proliferation, number sig. genes, positive fold change:  0


In [33]:
df[df.gene == 'FMNL1']

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
5571,5571,FMNL1,-2.051123,ECM_fibrin_goodcollagen,2.2e-05,0.39875


In [30]:
df = pd.read_csv('../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECM_fibrin_goodcollagen_truncate_gene_pvalues.csv')
df = df[~df.gene.str.contains('CONTROL')]
df['fdr'] = fdr(df['pvalue'])
print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.05]))

df_neg = df[df.log2fold_diff_mean <0]
print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
df_pos = df[df.log2fold_diff_mean >0]
print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))

Proliferation, number sig. genes:  0
Proliferation, number sig. genes, negative fold change:  0
Proliferation, number sig. genes, positive fold change:  0


In [26]:
df = pd.read_csv('../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECM_fibrin_goodcollagen_truncate_gene_pvalues.csv')
df = df[~df.gene.str.contains('CONTROL')]
df['fdr'] = fdr(df['pvalue'])
print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.25]))

df_neg = df[df.log2fold_diff_mean <0]
print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.25]))
df_pos = df[df.log2fold_diff_mean >0]
print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.25]))

Proliferation, number sig. genes:  0
Proliferation, number sig. genes, negative fold change:  0
Proliferation, number sig. genes, positive fold change:  0


In [27]:
df[df.gene == 'FMNL1']

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
5571,5571,FMNL1,-2.051123,ECM_fibrin_goodcollagen_truncate,2.2e-05,0.39875


### ECM - current, but no renormalizations 

In [59]:
for f in glob.glob('../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECM*gene_pvalues_2.csv'):
    print(f)
    df = pd.read_csv(f)
    df = df[~df.gene.str.contains('CONTROL')]
    df['fdr'] = fdr(df['pvalue'])
    print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.05]))

    df_neg = df[df.log2fold_diff_mean <0]
    print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
    df_pos = df[df.log2fold_diff_mean >0]
    print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))
    break

../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECM_gene_pvalues_2.csv
Proliferation, number sig. genes:  3
Proliferation, number sig. genes, negative fold change:  3
Proliferation, number sig. genes, positive fold change:  0


In [60]:
df[df['fdr']<=0.05]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
4915,4915,EMC4,-0.561756,ECM_all_data,9.813329e-06,0.046363
5776,5776,FMNL1,-0.740113,ECM_all_data,9.716167e-08,0.001836
14101,14101,SAMM50,-0.563233,ECM_all_data,7.675772e-06,0.048352


In [61]:
for f in glob.glob('../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECM*gene_pvalues_2.csv'):
    print(f)
    df = pd.read_csv(f)
    df = df[~df.gene.str.contains('CONTROL')]
    df['fdr'] = fdr(df['pvalue'])
    print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.25]))

    df_neg = df[df.log2fold_diff_mean <0]
    print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.25]))
    df_pos = df[df.log2fold_diff_mean >0]
    print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.25]))
    break

../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECM_gene_pvalues_2.csv
Proliferation, number sig. genes:  84
Proliferation, number sig. genes, negative fold change:  79
Proliferation, number sig. genes, positive fold change:  5


In [62]:
df[df['fdr']<=0.25]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
145,145,ACIN1,0.309520,ECM_all_data,0.000096,0.139266
219,219,ACTR3,-0.411599,ECM_all_data,0.000185,0.145363
220,220,ACTR3B,-0.364446,ECM_all_data,0.000960,0.235529
537,537,ALDH1L2,0.267080,ECM_all_data,0.000432,0.177349
940,940,ARHGAP30,-0.520121,ECM_all_data,0.000005,0.050494
...,...,...,...,...,...,...
17385,17385,UBA5,-0.468375,ECM_all_data,0.000041,0.127613
17724,17724,VCP,-0.374984,ECM_all_data,0.000756,0.219746
17910,17910,WDR59,-0.375710,ECM_all_data,0.000866,0.227403
18078,18078,YEATS4,-0.383225,ECM_all_data,0.000531,0.193009


In [74]:
for f in glob.glob('../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECM*gene_pvalues_3.csv'):
    print(f)
    df = pd.read_csv(f)
    df = df[~df.gene.str.contains('CONTROL')]
    df['fdr'] = fdr(df['pvalue'])
    print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.05]))

    df_neg = df[df.log2fold_diff_mean <0]
    print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
    df_pos = df[df.log2fold_diff_mean >0]
    print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))
#     break

../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECM_gene_pvalues_3.csv
Proliferation, number sig. genes:  1
Proliferation, number sig. genes, negative fold change:  1
Proliferation, number sig. genes, positive fold change:  0
../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECMcollagen_gene_pvalues_3.csv
Proliferation, number sig. genes:  0
Proliferation, number sig. genes, negative fold change:  0
Proliferation, number sig. genes, positive fold change:  0
../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECMfibrin_gene_pvalues_3.csv
Proliferation, number sig. genes:  6
Proliferation, number sig. genes, negative fold change:  6
Proliferation, number sig. genes, positive fold change:  0


In [189]:
for f in glob.glob('../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECM*gene_pvalues_3.csv'):
    print(f)
    df = pd.read_csv(f)
    df = df[~df.gene.str.contains('CONTROL')]
    df['fdr'] = fdr(df['pvalue'])
    print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.25]))

    df_neg = df[df.log2fold_diff_mean <0]
    print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.25]))
    df_pos = df[df.log2fold_diff_mean >0]
    print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.25]))
    break

../../data/screen_summary/temp/20211222_screen_log2fold_diffs_ECM_gene_pvalues_3.csv
Proliferation, number sig. genes:  61
Proliferation, number sig. genes, negative fold change:  57
Proliferation, number sig. genes, positive fold change:  4


In [185]:
df[df['fdr']<=0.15]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
940,940,ARHGAP30,-1.112783,ECM_all_data,1.311683e-05,0.06197
4259,4259,DGCR8,-1.010263,ECM_all_data,2.953715e-05,0.093032
4915,4915,EMC4,-1.280676,ECM_all_data,9.619005e-06,0.09089
5776,5776,FMNL1,-1.943668,ECM_all_data,9.716167e-08,0.001836
8502,8502,LAMTOR2,-1.023215,ECM_all_data,2.691378e-05,0.101723
8880,8880,LPXN,-1.008969,ECM_all_data,3.536685e-05,0.09548
9723,9723,MOB4,-1.076371,ECM_all_data,1.243669e-05,0.078343
10813,10813,NSMCE1,-0.985476,ECM_all_data,6.655574e-05,0.139752
11797,11797,PDCL,-1.206252,ECM_all_data,4.527734e-05,0.106956


In [197]:
df_0[df_0['gene'].str.contains('ACTR')].sort_values('fdr')

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
224,224,ACTR8,0.626145,tracketch_all_NOgrad_data,0.005731,0.250691
223,223,ACTR6,-0.535779,tracketch_all_NOgrad_data,0.008035,0.29483
215,215,ACTR10,0.452368,tracketch_all_NOgrad_data,0.042898,0.547387
216,216,ACTR1A,0.363065,tracketch_all_NOgrad_data,0.089924,0.671955
222,222,ACTR5,0.356984,tracketch_all_NOgrad_data,0.093691,0.678119
220,220,ACTR3B,0.278012,tracketch_all_NOgrad_data,0.183305,0.780905
218,218,ACTR2,-0.258847,tracketch_all_NOgrad_data,0.206282,0.800478
11982,11982,PHACTR3,-0.231258,tracketch_all_NOgrad_data,0.257181,0.841886
219,219,ACTR3,-0.209837,tracketch_all_NOgrad_data,0.308019,0.867892
227,227,ACTRT3,0.167731,tracketch_all_NOgrad_data,0.411789,0.903936


In [196]:
df_10[df_10['gene'].str.contains('ACTR')].sort_values('fdr')

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
218,218,ACTR2,-0.626864,tracketch_all_10grad_data,0.000202,0.017502
11982,11982,PHACTR3,-0.398308,tracketch_all_10grad_data,0.010446,0.174845
226,226,ACTRT2,-0.308506,tracketch_all_10grad_data,0.048911,0.360503
219,219,ACTR3,-0.287505,tracketch_all_10grad_data,0.069062,0.416974
11980,11980,PHACTR1,-0.290224,tracketch_all_10grad_data,0.069588,0.417485
223,223,ACTR6,0.300104,tracketch_all_10grad_data,0.083396,0.446846
227,227,ACTRT3,-0.264231,tracketch_all_10grad_data,0.094151,0.471951
11981,11981,PHACTR2,-0.225409,tracketch_all_10grad_data,0.155186,0.566924
215,215,ACTR10,-0.219957,tracketch_all_10grad_data,0.167964,0.583276
220,220,ACTR3B,0.174583,tracketch_all_10grad_data,0.285031,0.699003


In [186]:
df[df['gene']=='CORO1A']

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
3523,3523,CORO1A,-0.737533,ECM_all_data,0.001478,0.303645


In [195]:
df[df['gene'].str.contains('ACTR')].sort_values('fdr')

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
219,219,ACTR3,-0.805514,ECM_all_data,0.000551,0.217011
220,220,ACTR3B,-0.687666,ECM_all_data,0.002832,0.385079
216,216,ACTR1A,-0.334771,ECM_all_data,0.091862,0.887071
218,218,ACTR2,-0.231186,ECM_all_data,0.202982,0.939953
224,224,ACTR8,-0.207934,ECM_all_data,0.242125,0.944801
11981,11981,PHACTR2,-0.1939,ECM_all_data,0.263076,0.950412
222,222,ACTR5,0.243487,ECM_all_data,0.343491,0.964386
11983,11983,PHACTR4,-0.158409,ECM_all_data,0.333611,0.965776
217,217,ACTR1B,0.229586,ECM_all_data,0.381715,0.973634
11980,11980,PHACTR1,-0.100345,ECM_all_data,0.475807,0.981638


In [194]:
df[df['gene'].str.contains('RAP')].sort_values('fdr')

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
13264,13264,RAP1GDS1,-0.733415,ECM_all_data,0.001534,0.308338
16972,16972,TRAPPC1,-0.604470,ECM_all_data,0.008577,0.595895
13260,13260,RAP1A,-0.526193,ECM_all_data,0.015237,0.702305
16974,16974,TRAPPC11,-0.487711,ECM_all_data,0.024592,0.766909
5991,5991,GABARAP,0.494221,ECM_all_data,0.027228,0.769151
...,...,...,...,...,...,...
5041,5041,ERAP2,0.097965,ECM_all_data,0.831352,0.996568
8592,8592,LDLRAP1,0.024656,ECM_all_data,0.892725,0.997029
898,898,ARAP2,0.078733,ECM_all_data,0.905518,0.998278
13266,13266,RAP2B,0.063656,ECM_all_data,0.966883,0.999899


In [71]:
df[df['fdr']<=0.05]

Unnamed: 0.1,Unnamed: 0,gene,log2fold_diff_mean,exp,pvalue,fdr
940,940,ARHGAP30,-1.684247,ECM_fibrin,1.846072e-06,0.013955
4915,4915,EMC4,-1.939001,ECM_fibrin,1.846072e-06,0.013955
5487,5487,FANCA,-1.457303,ECM_fibrin,7.384287e-06,0.02791
5776,5776,FMNL1,-1.955824,ECM_fibrin,2.91485e-07,0.005508
14101,14101,SAMM50,-1.675016,ECM_fibrin,4.08079e-06,0.01928
15414,15414,SPI1,-1.39796,ECM_fibrin,1.816923e-05,0.049052


### alternative ECMs

In [67]:
glob.glob('../../data/screen_summary/temp/*')

['../../data/screen_summary/temp/20211222_screen_log2fold_diffs_collated_gene_pvalues.csv',
 '../../data/screen_summary/temp/20220516_screen_log2fold_diffs_ECM_truncate_gene_pvalues.csv',
 '../../data/screen_summary/temp/collated_screen_data_sgRNA_allexps_ECM_avgsgCtrl_pvalues_20211222.csv',
 '../../data/screen_summary/temp/20220516_screen_log2fold_diffs_ECM_truncate_sgRNA_pvalues.csv',
 '../../data/screen_summary/temp/collated_screen_data_gene_pvalues_20210830.csv']

In [82]:
for f in ['../../data/screen_summary/temp/20220516_screen_log2fold_diffs_ECM_truncate_gene_pvalues.csv',
    '../../data/screen_summary/temp/collated_screen_data_sgRNA_allexps_ECM_avgsgCtrl_pvalues_20211222.csv',
    '../../data/screen_summary/temp/20220516_screen_log2fold_diffs_ECM_truncate_sgRNA_pvalues.csv',
    '../../data/screen_summary/temp/collated_screen_data_sgRNA_allexps_ECM_avgsgCtrl_pvalues_20211222.csv']:
    print(f)
    
    df = pd.read_csv(f)
    df = df[~df.gene.str.contains('CONTROL')]
#     print(df.head())
    df['fdr'] = fdr(df['pvalue'])
    print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.05]))

    df_neg = df[df.log2fold_diff_mean <0]
    print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
    df_pos = df[df.log2fold_diff_mean >0]
    print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))

../../data/screen_summary/temp/20220516_screen_log2fold_diffs_ECM_truncate_gene_pvalues.csv
Proliferation, number sig. genes:  0
Proliferation, number sig. genes, negative fold change:  0
Proliferation, number sig. genes, positive fold change:  0
../../data/screen_summary/temp/collated_screen_data_sgRNA_allexps_ECM_avgsgCtrl_pvalues_20211222.csv
Proliferation, number sig. genes:  0
Proliferation, number sig. genes, negative fold change:  0
Proliferation, number sig. genes, positive fold change:  0
../../data/screen_summary/temp/20220516_screen_log2fold_diffs_ECM_truncate_sgRNA_pvalues.csv
Proliferation, number sig. genes:  0
Proliferation, number sig. genes, negative fold change:  0
Proliferation, number sig. genes, positive fold change:  0
../../data/screen_summary/temp/collated_screen_data_sgRNA_allexps_ECM_avgsgCtrl_pvalues_20211222.csv
Proliferation, number sig. genes:  0
Proliferation, number sig. genes, negative fold change:  0
Proliferation, number sig. genes, positive fold chan

In [152]:
df = pd.read_csv('../../data/screen_summary/temp/20211222_screen_log2fold_diffs_collated_gene_pvalues.csv')

df_col = df[df.exp == 'ECM_collagen']
df_col['fdr'] = fdr(df_col['pvalue'])
print('collagen, number sig. genes: ',len(df_col[df_col['fdr']<=0.05]))

df_fib = df[df.exp == 'ECM_fibrin']
df_fib['fdr'] = fdr(df_fib['pvalue'])
print('fibrin, number sig. genes: ',len(df_fib[df_fib['fdr']<=0.05]))

collagen, number sig. genes:  4
fibrin, number sig. genes:  6


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [6]:
# df_col[df_col['fdr']<=0.15]

In [5]:
# df_fib[df_fib['fdr']<=0.15]

In [157]:
df[df['gene']=='CORO1A']

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,exp,gene,log2fold_diff_mean,pvalue
3523,3523,3523.0,24421.0,24421.0,transwell_2hr_0_bottom,CORO1A,-0.520883,0.221012
24421,24421,24421.0,45319.0,45319.0,transwell_2hr_0_top,CORO1A,-0.054808,0.886391
45319,45319,45319.0,66217.0,66217.0,transwell_2hr_10_bottom,CORO1A,-0.57885,0.169671
66217,66217,66217.0,87115.0,,transwell_2hr_10_top,CORO1A,-0.417573,0.310734
87115,87115,87115.0,108013.0,,transwell_6hr_0_bottom,CORO1A,-0.410718,0.318504
108013,108013,108013.0,,,ECM_collagen,CORO1A,-0.353542,0.132892
128911,128911,128911.0,,,ECM_fibrin,CORO1A,-1.008517,0.002263
149809,149809,149809.0,,,transwell_6hr_0_top,CORO1A,0.067904,0.843406
170707,170707,170707.0,,,transwell_6hr_10_bottom,CORO1A,-0.004569,0.985821
191605,191605,,,,transwell_6hr_10_top,CORO1A,0.251171,0.329999


In [158]:
df[df['gene']=='FMNL1']

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,exp,gene,log2fold_diff_mean,pvalue
5776,5776,5776.0,26674.0,26674.0,transwell_2hr_0_bottom,FMNL1,-2.748083,1.943233e-07
26674,26674,26674.0,47572.0,47572.0,transwell_2hr_0_top,FMNL1,-0.757884,0.07660148
47572,47572,47572.0,68470.0,68470.0,transwell_2hr_10_bottom,FMNL1,-1.267441,0.003893074
68470,68470,68470.0,89368.0,,transwell_2hr_10_top,FMNL1,-1.232013,0.003549219
89368,89368,89368.0,110266.0,,transwell_6hr_0_bottom,FMNL1,-1.742173,7.219112e-05
110266,110266,110266.0,,,ECM_collagen,FMNL1,-0.894591,8.74455e-06
131164,131164,131164.0,,,ECM_fibrin,FMNL1,-2.069689,2.91485e-07
152062,152062,152062.0,,,transwell_6hr_0_top,FMNL1,-0.717497,0.06673238
172960,172960,172960.0,,,transwell_6hr_10_bottom,FMNL1,-0.870277,0.0006252353
193858,193858,,,,transwell_6hr_10_top,FMNL1,-1.308264,7.384287e-06


In [3]:
df = pd.read_csv('../../data/screen_summary/temp/20211222_screen_log2fold_diffs_collated_gene_pvalues.csv')

df_col = df[df.exp == 'ECM_collagen']
df_col['fdr'] = fdr(df_col['pvalue'])
print('collagen, number sig. genes: ',len(df_col[df_col['fdr']<=0.25]))

df_fib = df[df.exp == 'ECM_fibrin']
df_fib['fdr'] = fdr(df_fib['pvalue'])
print('fibrin, number sig. genes: ',len(df_fib[df_fib['fdr']<=0.25]))

collagen, number sig. genes:  41
fibrin, number sig. genes:  74


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [4]:
df[df.gene == 'CORO1A']

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,exp,gene,log2fold_diff_mean,pvalue
3523,3523,3523.0,24421.0,24421.0,transwell_2hr_0_bottom,CORO1A,-0.520883,0.221012
24421,24421,24421.0,45319.0,45319.0,transwell_2hr_0_top,CORO1A,-0.054808,0.886391
45319,45319,45319.0,66217.0,66217.0,transwell_2hr_10_bottom,CORO1A,-0.57885,0.169671
66217,66217,66217.0,87115.0,,transwell_2hr_10_top,CORO1A,-0.417573,0.310734
87115,87115,87115.0,108013.0,,transwell_6hr_0_bottom,CORO1A,-0.410718,0.318504
108013,108013,108013.0,,,ECM_collagen,CORO1A,-0.353542,0.132892
128911,128911,128911.0,,,ECM_fibrin,CORO1A,-1.008517,0.002263
149809,149809,149809.0,,,transwell_6hr_0_top,CORO1A,0.067904,0.843406
170707,170707,170707.0,,,transwell_6hr_10_bottom,CORO1A,-0.004569,0.985821
191605,191605,,,,transwell_6hr_10_top,CORO1A,0.251171,0.329999


In [99]:
df_fib[df_fib['fdr']<=0.25][:60]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,exp,gene,log2fold_diff_mean,pvalue,fdr
125533,125533,125533.0,,,ECM_fibrin,ACIN1,1.261175,2.137557e-05,0.055838
125925,125925,125925.0,,,ECM_fibrin,ALDH1L2,1.122987,2.837121e-05,0.05929
126328,126328,126328.0,,,ECM_fibrin,ARHGAP30,-1.787405,1.74891e-06,0.012183
126502,126502,126502.0,,,ECM_fibrin,ASCC3,0.893538,0.0006786743,0.208573
126584,126584,126584.0,,,ECM_fibrin,ATIC,-1.246445,0.0001829554,0.123336
126660,126660,126660.0,,,ECM_fibrin,ATP6V1C1,-1.279511,0.0001326257,0.098986
127057,127057,127057.0,,,ECM_fibrin,BTF3,-1.147451,0.0005629547,0.206397
127091,127091,127091.0,,,ECM_fibrin,C10orf111,-1.130926,0.0006685694,0.211693
127156,127156,127156.0,,,ECM_fibrin,C12orf60,-1.41467,0.0001034772,0.083172
127676,127676,127676.0,,,ECM_fibrin,CAP1,-1.182385,0.0005948237,0.200494


In [100]:
df_fib[df_fib.gene == 'CORO1A']

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,exp,gene,log2fold_diff_mean,pvalue,fdr
128911,128911,128911.0,,,ECM_fibrin,CORO1A,-1.008517,0.002263,0.326166


In [128]:
df_col[df_col['fdr']<=0.25]
# df_col[df_col.gene == 'CORO1A']

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Unnamed: 0.1.1.1.1,Unnamed: 0.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1,exp,gene,log2fold_diff_mean,pvalue,sgRNA,fdr


In [109]:
df = pd.read_csv('../../data/screen_summary/temp/collated_screen_data_sgRNA_allexps_ECM_avgsgCtrl_pvalues_20211222.csv')

df = df[~df.gene.str.contains('CONTROL')]
df = df[['exp', 'gene', 'log2fold_diff_mean',
       'pvalue', 'sgRNA']]

df['fdr'] = fdr(df['pvalue'])

In [110]:
df[df.gene == "['CORO1A']"]

Unnamed: 0,exp,gene,log2fold_diff_mean,pvalue,sgRNA,fdr
9342,allexps_ECM,['CORO1A'],-0.652186,0.004024,AGTGATCCCCCAAAAGCCGG,0.146922
10238,allexps_ECM,['CORO1A'],-0.786372,0.006141,ATCTTCAGCGGGCGAGTCCC,0.168231
38030,allexps_ECM,['CORO1A'],-0.604531,0.006852,GGAAGGAGCTGGAGGAGCCG,0.172657


In [121]:
for f in glob.glob('../../data/screen_summary/temp/20220412*'):
    print(f)
    
    df = pd.read_csv(f)
    df = df[~df.gene.str.contains('CONTROL')]
#     print(df.head())
    df['fdr'] = fdr(df['pvalue'])
    print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.05]))

    df_neg = df[df.log2fold_diff_mean <0]
    print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
    df_pos = df[df.log2fold_diff_mean >0]
    print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))

../../data/screen_summary/temp/20220412_screen_log2fold_diffs_ECM_gene_means_Colab-pvalues.csv
Proliferation, number sig. genes:  2
Proliferation, number sig. genes, negative fold change:  2
Proliferation, number sig. genes, positive fold change:  0
../../data/screen_summary/temp/20220412_screen_log2fold_diffs_ECM_sgRNA_means_Colab-pvalues.csv
Proliferation, number sig. genes:  0
Proliferation, number sig. genes, negative fold change:  0
Proliferation, number sig. genes, positive fold change:  0


In [43]:
for f in [glob.glob('../../data/screen_summary/temp/20220412*')[0]]:
    print(f)
    
    df = pd.read_csv(f)
    df = df[~df.gene.str.contains('CONTROL')]
#     print(df.head())
    df['fdr'] = fdr(df['pvalue'])
    print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.25]))

    df_neg = df[df.log2fold_diff_mean <0]
    print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.25]))
    df_pos = df[df.log2fold_diff_mean >0]
    print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.25]))

../../data/screen_summary/temp/20220412_screen_log2fold_diffs_ECM_gene_means_Colab-pvalues.csv
Proliferation, number sig. genes:  14
Proliferation, number sig. genes, negative fold change:  13
Proliferation, number sig. genes, positive fold change:  1


In [44]:
df_neg[df_neg['fdr']<=0.25]

Unnamed: 0.1,Unnamed: 0,exp,gene,log2fold_diff_mean,pvalue,fdr
940,940,ECM_all_data,ARHGAP30,-0.840441,1.185372e-05,0.074671
4915,4915,ECM_all_data,EMC4,-0.935615,1.447709e-05,0.068397
5776,5776,ECM_all_data,FMNL1,-1.48214,9.716167e-08,0.001836
8502,8502,ECM_all_data,LAMTOR2,-0.828128,1.74891e-05,0.066102
8880,8880,ECM_all_data,LPXN,-0.741261,8.210161e-05,0.155156
9723,9723,ECM_all_data,MOB4,-1.013431,8.74455e-07,0.008263
10813,10813,ECM_all_data,NSMCE1,-0.819674,2.516487e-05,0.079261
11797,11797,ECM_all_data,PDCL,-0.929598,6.150334e-05,0.129143
12198,12198,ECM_all_data,PLAGL2,-0.741499,0.0001783888,0.240799
12600,12600,ECM_all_data,PPP6C,-0.911447,8.763983e-05,0.150565


In [127]:
df_pos[df_pos['fdr']<=0.25]

Unnamed: 0.1,Unnamed: 0,exp,gene,log2fold_diff_mean,pvalue,fdr
3041,3041,ECM_all_data,CHAF1B,0.808119,4.5e-05,0.120924


In [129]:
df[df.gene == 'CORO1A']

Unnamed: 0.1,Unnamed: 0,exp,gene,log2fold_diff_mean,pvalue,fdr
3523,3523,ECM_all_data,CORO1A,-0.681029,0.000248,0.260429


In [159]:
for f in glob.glob('../../data/screen_summary/temp/collated_screen_data_gene_ECMcombined_pvalues_20211222.csv'):
    print(f)
    
    df = pd.read_csv(f)
    df = df[~df.gene.str.contains('CONTROL')]
#     print(df.head())
    df['fdr'] = fdr(df['pvalue'])
    print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.05]))

    df_neg = df[df.log2fold_diff_mean <0]
    print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
    df_pos = df[df.log2fold_diff_mean >0]
    print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))

../../data/screen_summary/temp/collated_screen_data_gene_ECMcombined_pvalues_20211222.csv
Proliferation, number sig. genes:  2
Proliferation, number sig. genes, negative fold change:  2
Proliferation, number sig. genes, positive fold change:  0


In [160]:
for f in glob.glob('../../data/screen_summary/temp/collated_screen_data_gene_ECMcombined_pvalues_20211222.csv'):
    print(f)
    
    df = pd.read_csv(f)
    df = df[~df.gene.str.contains('CONTROL')]
#     print(df.head())
    df['fdr'] = fdr(df['pvalue'])
    print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.25]))

    df_neg = df[df.log2fold_diff_mean <0]
    print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.25]))
    df_pos = df[df.log2fold_diff_mean >0]
    print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.25]))

../../data/screen_summary/temp/collated_screen_data_gene_ECMcombined_pvalues_20211222.csv
Proliferation, number sig. genes:  14
Proliferation, number sig. genes, negative fold change:  13
Proliferation, number sig. genes, positive fold change:  1


In [161]:
df[df['fdr']<=0.25]

Unnamed: 0.1,Unnamed: 0,exp,gene,log2fold_diff_mean,pvalue,fdr
940,940,ECM_all_data,ARHGAP30,-0.840441,1.185372e-05,0.074671
3041,3041,ECM_all_data,CHAF1B,0.808119,4.479153e-05,0.120924
4915,4915,ECM_all_data,EMC4,-0.935615,1.447709e-05,0.068397
5776,5776,ECM_all_data,FMNL1,-1.48214,9.716167e-08,0.001836
8502,8502,ECM_all_data,LAMTOR2,-0.828128,1.74891e-05,0.066102
8880,8880,ECM_all_data,LPXN,-0.741261,8.210161e-05,0.155156
9723,9723,ECM_all_data,MOB4,-1.013431,8.74455e-07,0.008263
10813,10813,ECM_all_data,NSMCE1,-0.819674,2.516487e-05,0.079261
11797,11797,ECM_all_data,PDCL,-0.929598,6.150334e-05,0.129143
12198,12198,ECM_all_data,PLAGL2,-0.741499,0.0001783888,0.240799


In [39]:
for f in glob.glob('../../data/screen_summary/temp/collated_screen_data_gene_pvalues_20210830.csv'):
    print(f)
    
    df = pd.read_csv(f)
    df = df[df.exp == 'ECM_fibrin_goodcollagen']
    df = df[~df.gene.str.contains('CONTROL')]
#     print(df.head())
    df['fdr'] = fdr(df['pvalue'])
    print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.05]))

    df_neg = df[df.log2fold_diff_mean <0]
    print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.05]))
    df_pos = df[df.log2fold_diff_mean >0]
    print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.05]))

../../data/screen_summary/temp/collated_screen_data_gene_pvalues_20210830.csv
Proliferation, number sig. genes:  1
Proliferation, number sig. genes, negative fold change:  1
Proliferation, number sig. genes, positive fold change:  0


In [40]:
df[df['fdr']<=0.05]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,exp,gene,log2fold_diff_mean,pvalue,fdr
255243,255243,255243.0,,,ECM_fibrin_goodcollagen,FMNL1,-2.093313,9.716167e-08,0.001836


In [41]:
for f in glob.glob('../../data/screen_summary/temp/collated_screen_data_gene_pvalues_20210830.csv'):
    print(f)
    
    df = pd.read_csv(f)
    df = df[df.exp == 'ECM_fibrin_goodcollagen']
    df = df[~df.gene.str.contains('CONTROL')]
#     print(df.head())
    df['fdr'] = fdr(df['pvalue'])
    print('Proliferation, number sig. genes: ',len(df[df['fdr']<=0.25]))

    df_neg = df[df.log2fold_diff_mean <0]
    print('Proliferation, number sig. genes, negative fold change: ',len(df_neg[df_neg['fdr']<=0.25]))
    df_pos = df[df.log2fold_diff_mean >0]
    print('Proliferation, number sig. genes, positive fold change: ',len(df_pos[df_pos['fdr']<=0.25]))

../../data/screen_summary/temp/collated_screen_data_gene_pvalues_20210830.csv
Proliferation, number sig. genes:  69
Proliferation, number sig. genes, negative fold change:  65
Proliferation, number sig. genes, positive fold change:  4


In [175]:
df[df['fdr']<=0.25][:60]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,exp,gene,log2fold_diff_mean,pvalue,fdr
249612,249612,249612.0,,,ECM_fibrin_goodcollagen,ACIN1,0.773727,0.0001472971,0.173976
249672,249672,249672.0,,,ECM_fibrin_goodcollagen,ACTL6A,-0.915443,0.0006363118,0.218637
249686,249686,249686.0,,,ECM_fibrin_goodcollagen,ACTR3,-0.919701,0.0005659667,0.209718
250407,250407,250407.0,,,ECM_fibrin_goodcollagen,ARHGAP30,-1.252607,1.136792e-05,0.053708
251235,251235,251235.0,,,ECM_fibrin_goodcollagen,C12orf60,-1.058056,0.0002319249,0.156533
251782,251782,251782.0,,,ECM_fibrin_goodcollagen,CAPZB,-0.899502,0.0008196558,0.242029
251939,251939,251939.0,,,ECM_fibrin_goodcollagen,CCDC174,-0.94003,0.0006865444,0.231684
252099,252099,252099.0,,,ECM_fibrin_goodcollagen,CCT4,-1.055717,0.0002012218,0.17285
252330,252330,252330.0,,,ECM_fibrin_goodcollagen,CDT1,-0.997653,0.0002463048,0.160506
253063,253063,253063.0,,,ECM_fibrin_goodcollagen,CPSF1,-1.078106,0.0001134848,0.17872


In [173]:
df[df['gene']=='CORO1A'][:60]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,exp,gene,log2fold_diff_mean,pvalue,fdr
252990,252990,252990.0,,,ECM_fibrin_goodcollagen,CORO1A,-0.861733,0.001304,0.276827


In [172]:
df[df['fdr']<=0.25][60:]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,exp,gene,log2fold_diff_mean,pvalue,fdr
265392,265392,265392.0,,,ECM_fibrin_goodcollagen,TADA1,-0.902238,0.000825,0.24
265408,265408,265408.0,,,ECM_fibrin_goodcollagen,TAF3,-0.911367,0.000869,0.245196
265442,265442,265442.0,,,ECM_fibrin_goodcollagen,TARDBP,-0.950331,0.000428,0.179536
265789,265789,265789.0,,,ECM_fibrin_goodcollagen,THOC1,-1.039575,0.000195,0.194247
265857,265857,265857.0,,,ECM_fibrin_goodcollagen,TIPIN,0.768956,0.000526,0.19893
265883,265883,265883.0,,,ECM_fibrin_goodcollagen,TLN1,-0.969467,0.000299,0.171375
266852,266852,266852.0,,,ECM_fibrin_goodcollagen,UBA5,-0.970704,0.000362,0.17533
266918,266918,266918.0,,,ECM_fibrin_goodcollagen,UBLCP1,-0.89771,0.000757,0.238548
267253,267253,267253.0,,,ECM_fibrin_goodcollagen,VPS29,-0.97128,0.000397,0.182944


In [42]:
# df[df['fdr']<=0.06].sort_values('log2fold_diff_mean')[:50]