In [1]:
import pandas as pd
import numpy as np
import glob
import os

In [24]:
#Counting how many genes/onts are in each file
for f_path in glob.glob('../../nf_pipeline/outputs/BICCN_20220601_direct/gene_ont/*.csv'):
    df = pd.read_csv(f_path)
    print(f_path)
    print(df['gene'].nunique(),'genes')
    print(df['annotation'].nunique(),'onts')
    

../../nf_pipeline/outputs/BICCN_20220601_direct/gene_ont/BICCN_mouse1_punctate_gene_ont.csv
251 genes
19 onts
../../nf_pipeline/outputs/BICCN_20220601_direct/gene_ont/BICCN_mouse2_radial_gene_ont.csv
247 genes
19 onts
../../nf_pipeline/outputs/BICCN_20220601_direct/gene_ont/BICCN_mouse2_punctate_gene_ont.csv
247 genes
19 onts
../../nf_pipeline/outputs/BICCN_20220601_direct/gene_ont/BICCN_mouse2_peripheral_gene_ont.csv
252 genes
19 onts
../../nf_pipeline/outputs/BICCN_20220601_direct/gene_ont/BICCN_mouse1_central_gene_ont.csv
252 genes
19 onts
../../nf_pipeline/outputs/BICCN_20220601_direct/gene_ont/BICCN_mouse1_peripheral_gene_ont.csv
252 genes
19 onts
../../nf_pipeline/outputs/BICCN_20220601_direct/gene_ont/BICCN_mouse1_radial_gene_ont.csv
251 genes
19 onts
../../nf_pipeline/outputs/BICCN_20220601_direct/gene_ont/BICCN_mouse2_central_gene_ont.csv
252 genes
19 onts


In [69]:
metrics = ['peripheral','central','radial','punctate']
for metric in metrics:
    print(metric)
    f_paths = glob.glob('../../nf_pipeline/outputs/BICCN_20220601_direct/gene_ont/*{}*.csv'.format(metric))
    df = pd.concat((pd.read_csv(p) for p in f_paths))
    
    print('Num genes',df['gene'].nunique())
    print('Num ontologies',df['annotation'].nunique())
    
    print('Num tot ann/gene groups',df.groupby(['annotation','gene']).ngroups)
    
    sig_df = df[
        df['bh_p'].le(0.05) &
        df['med_gene_spots'].ge(2)
    ]
    sig_df = sig_df.groupby(['annotation','gene']).filter(lambda g: g['experiment'].nunique() > 1)
    
    print('Num sig genes',sig_df['gene'].nunique())
    print('Num sig ontologies',sig_df['annotation'].nunique())
    
    num_groups = sig_df.groupby(['annotation','gene']).ngroups
    print('Num sig ann/gene groups',num_groups)
    
    fraction_positive = sig_df['med_score'].gt(0).sum()/len(sig_df)
    print('Fraction sig ann/gene groups with a positive score {:.3f}'.format(
        fraction_positive
    ))
    
    num_groups_same_effect = sig_df.groupby(['annotation','gene'])['med_score'].prod().gt(0).sum()
    print('Number sig ann/gene groups with same direction of effect {}'.format(
        num_groups_same_effect
    ))
    
    print('Fraction sig ann/gene groups with same direction of effect {:.3f}'.format(
        num_groups_same_effect/num_groups
    ))
    
    print('There are Blank- MERFISH genes in the significant group',sig_df['gene'].str.contains('Blank-').any())
    print('')
    


peripheral
Num genes 252
Num ontologies 19
Num tot ann/gene groups 4653
Num sig genes 177
Num sig ontologies 19
Num sig ann/gene groups 828
Fraction sig ann/gene groups with a positive score 0.561
Number sig ann/gene groups with same direction of effect 823
Fraction sig ann/gene groups with same direction of effect 0.994
There are Blank- MERFISH genes in the significant group False

central
Num genes 252
Num ontologies 19
Num tot ann/gene groups 4653
Num sig genes 177
Num sig ontologies 19
Num sig ann/gene groups 814
Fraction sig ann/gene groups with a positive score 0.452
Number sig ann/gene groups with same direction of effect 812
Fraction sig ann/gene groups with same direction of effect 0.998
There are Blank- MERFISH genes in the significant group False

radial
Num genes 251
Num ontologies 19
Num tot ann/gene groups 3660
Num sig genes 216
Num sig ontologies 19
Num sig ann/gene groups 2389
Fraction sig ann/gene groups with a positive score 1.000
Number sig ann/gene groups with same 

In [67]:
sig_df['med_gene_spots'].max()

89.0

In [70]:
sig_df[sig_df['gene'].str.contains('Blank-')]

Unnamed: 0,experiment,sample,metric,gene,annotation,num_cells,med_gene_spots,med_spots,med_score,z,p,bh_p
260,BICCN_mouse1,m1s1,puncta,Blank-4,L23_IT,257,2.0,648.0,0.322,6.984224,2.864375e-12,9.677231e-12
275,BICCN_mouse1,m1s1,puncta,Blank-7,L23_IT,240,2.0,607.5,0.216,3.948304,7.870688e-05,0.0001372847
277,BICCN_mouse1,m1s1,puncta,Blank-7,L5_IT,132,2.0,745.0,0.309,3.854889,0.000115782,0.0001992808
293,BICCN_mouse1,m1s1,puncta,Blank-9,L23_IT,219,2.0,643.0,0.334,5.701228,1.189474e-08,3.068035e-08
214,BICCN_mouse2,m2s1,puncta,Blank-4,L23_IT,51,2.0,824.0,0.364,3.476426,0.0005081436,0.0008130297
220,BICCN_mouse2,m2s1,puncta,Blank-7,L23_IT,151,2.0,763.0,0.196,3.116723,0.001828733,0.002743519
222,BICCN_mouse2,m2s1,puncta,Blank-7,L5_IT,59,2.0,869.0,0.272,2.639248,0.008309029,0.01139922
235,BICCN_mouse2,m2s1,puncta,Blank-9,L23_IT,100,2.0,750.5,0.236,2.858491,0.004256614,0.006089917


In [60]:
sorted(sig_df['gene'].unique())

['1810046K07Rik',
 '5031425F14Rik',
 '5730522E02Rik',
 'Acta2',
 'Adam2',
 'Adamts2',
 'Adamts4',
 'Adra1b',
 'Alk',
 'Ankfn1',
 'Ano4',
 'Aqp4',
 'Asic4',
 'B4galnt2',
 'B4galnt3',
 'Barx2',
 'Bcl11b',
 'Bdnf',
 'Bgn',
 'Blank-4',
 'Blank-7',
 'Blank-9',
 'Blnk',
 'Bmpr1b',
 'Brinp3',
 'C1ql3',
 'C1qtnf7',
 'Cacng5',
 'Calb2',
 'Camk2d',
 'Cbln2',
 'Cbln4',
 'Ccbe1',
 'Ccdc162',
 'Ccdc3',
 'Ccdc80',
 'Ccnb1',
 'Cd14',
 'Cd24a',
 'Cdca7',
 'Cdcp1',
 'Cdh12',
 'Cdh13',
 'Cdh20',
 'Cdh9',
 'Ceacam9',
 'Cemip',
 'Chat',
 'Chn2',
 'Chodl',
 'Chrm2',
 'Chrna2',
 'Cldn5',
 'Clrn1',
 'Cntnap5b',
 'Cobll1',
 'Col14a1',
 'Col15a1',
 'Col23a1',
 'Col24a1',
 'Col25a1',
 'Corin',
 'Cplx3',
 'Crhr2',
 'Crispld2',
 'Cspg4',
 'Ctss',
 'Cux2',
 'Cxcl14',
 'Daam2',
 'Dscaml1',
 'Egfem1',
 'Egfr',
 'Egln3',
 'Egr2',
 'Elfn1',
 'Enpp6',
 'Epha7',
 'Fam19a2',
 'Fam84b',
 'Fbxl7',
 'Fezf2',
 'Flrt3',
 'Flt1',
 'Fndc7',
 'Fosb',
 'Foxp2',
 'Frem2',
 'Fst',
 'Gfap',
 'Glra1',
 'Gpc6',
 'Grik1',
 'Grin3a',
 '

In [58]:
df['gene'].unique()

array(['1700022I11Rik', '1810046K07Rik', '5031425F14Rik', '5730522E02Rik',
       'Acta2', 'Adam2', 'Adamts2', 'Adamts4', 'Adra1b', 'Alk', 'Ankfn1',
       'Ano4', 'Aqp4', 'Asic4', 'B4galnt2', 'B4galnt3', 'Barx2', 'Bcl11b',
       'Bdnf', 'Bgn', 'Blank-1', 'Blank-2', 'Blank-3', 'Blank-4',
       'Blank-5', 'Blank-6', 'Blank-7', 'Blank-8', 'Blank-9', 'Blnk',
       'Bmpr1b', 'Brinp3', 'C1ql3', 'C1qtnf7', 'Cacng5', 'Calb2',
       'Camk2d', 'Car3', 'Cbln2', 'Cbln4', 'Ccbe1', 'Ccdc162', 'Ccdc3',
       'Ccdc80', 'Ccnb1', 'Cd14', 'Cd24a', 'Cdca7', 'Cdcp1', 'Cdh12',
       'Cdh13', 'Cdh20', 'Cdh9', 'Ceacam9', 'Cemip', 'Chat', 'Chn2',
       'Chodl', 'Chrm2', 'Chrna2', 'Cldn5', 'Clrn1', 'Cntnap5b', 'Cobll1',
       'Col14a1', 'Col15a1', 'Col23a1', 'Col24a1', 'Col25a1', 'Corin',
       'Cplx3', 'Crhr2', 'Crispld2', 'Cspg4', 'Ctss', 'Cux2', 'Cxcl14',
       'Daam2', 'Dmkn', 'Dnase1l3', 'Dscaml1', 'Egfem1', 'Egfr', 'Egln3',
       'Egr2', 'Elfn1', 'Enpp6', 'Epha7', 'Fam19a2', 'Fam84b', 'Fbxl7',

In [57]:
216/251

0.8605577689243028