In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
results = {'condition' : [],
           'cell_number' : [],
           'cells_sequenced' : [],
           'sensitivity':[],
           'specificity':[]
          }

for scrambled in ['normal', 'scrambled']:
    for condition in ['PFA', 'methanol']:
    
        # Load data from files:
        NIS_nuclei = pd.read_csv(f'data_1F/{condition}_{scrambled}_NuclearIntensities.txt', sep = '\t')
        NIS_nuclei_seq = pd.read_csv(f'data_1F/{condition}_{scrambled}_NuclearSequences.txt', sep = '\t')
        NIS_pheno_assignment = pd.read_csv(f'data_1F/{condition}_CellAssignments.txt', sep = '\t')
        pheno = pd.read_csv(f'data_1F/{condition}_Phenotypes.txt', sep = '\t')
    
        # Merge phenotyped cells to NIS_pheno_assignment, keep also non-assinged cells:
        cells = pheno.merge(NIS_pheno_assignment, how = 'left', left_on = ['pheno_tile','pheno_cell'], right_on = ['pheno_tile','pheno_cell'])

        # Merge to NIS_nuclei, should all be matching:
        cells = cells.merge(NIS_nuclei, how = 'left', left_on = ['NIS_tile','NIS_nucleus'], right_on = ['tile','nucleus'])

        # Merge to NIS_nuclei_seq, keep also non-sequenced cells:
        cells = cells.merge(NIS_nuclei_seq, how = 'left', left_on = ['tile','nucleus'], right_on = ['tile','nucleus'])

        # Drop duplicate phenotype cells:
        cells = cells.drop_duplicates(['pheno_tile', 'pheno_cell'])
        
        # Remove cells with no nucleus / low nucleus signal
        cells = cells[cells.nuc_norm > 125].reset_index(drop=True)
        
        # Gate cells into GFP positive vs GFP negative cells based on linear function
        def gfp_gate(gfp, hoechst):
            return 1.67 * gfp -166.67 < hoechst
        cells['gfp_neg'] = [gfp_gate(x, y) for x, y in zip(cells.gfp_norm, cells.nuc_norm)]

        # Split imaging area in two halves
        for cells_ in [cells[cells.pheno_tile < 98], cells[cells.pheno_tile >= 98]]:

            cell_number = len(cells_)
            cells_sequenced = len(cells_[~cells_.sequence.isna()])
            cells_TP = len(cells_[~cells_.sequence.isna()][cells_.gfp_neg])  # true positive = seq defined, gfp negative
            cells_FN = len(cells_[cells_.sequence.isna()][cells_.gfp_neg])   # false negtive = seq undefined, gfp negative
            cells_TN = len(cells_[cells_.sequence.isna()][~cells_.gfp_neg])  # true negative = seq undefined, gfp positive
            cells_FP = len(cells_[~cells_.sequence.isna()][~cells_.gfp_neg]) # false positive= seq defined, gfp positive
    
            results['condition'].append(f'{condition}_{scrambled}')
            results['cell_number'].append(cell_number)
            results['cells_sequenced'].append(cells_sequenced)
            results['sensitivity'].append(cells_TP/(cells_TP + cells_FN)*100)
            results['specificity'].append(cells_TN/(cells_TN + cells_FP)*100)
            
# Save results to file
df = pd.DataFrame(results)
df.to_csv('Fig1F_results.tsv', sep='\t')