### NIS-Seq, Figure 1F written by AH and PK

---

This script generates figure 1F in [*Cell Type-Agnostic Optical Perturbation Screening Using Nuclear In-Situ Sequencing (NIS-Seq)*](https://www.biorxiv.org/content/10.1101/2024.01.18.576210v1). The goal is to compare PFA fixation and methanol fixation by calculating Sensitivity and Specificity values in an experimental setup where we add GFP positive cells to our genome wide knockout NIS cells containting barcodes and try to distinguish them from each other. 


The inputs for this script were generated with [*JSB ImageFiend*](https://jsb-lab.bio/opticalscreening/AnalyzeInSituCombined_v1.htm) and consist of the follwing files for each celltype:
    
1. **ExperimentName_NuclearSequences.txt**
    * columns: tile number, nucleus id, x and y coordinates, barcode sequence, maximal intensity
        * *tile*: Tile number, according to order of files loaded; counting from 0
        * *nucleus*: Number of the nucleus in that tile, as defined by the masks loaded. Cellpose masks start counting at 1.
        * *x*: Pixel-wise center position of each nuclear mask in the first cycle.
        * *y*: Pixel-wise center position of each nuclear mask in the first cycle.
        * *sequence*: Library-matched consensus sequence detected in a nucleus.
        * *max_intensity*: For each nucleus, the maximum intensity over all cycles and channels is indicated; intensity is calculated as the integrated pixel intensity across nuclear 
    
2. **ExperimentName_CellAssignments.txt**
    * columns: in-situ_tile, situ_nucleus, in-situ, in-situ_nucleus_x, in-situ_nucleus_y, in-situ_nucleus_area, phenotype_tile, phenotype_cell, phenotype_nucleus_x, phenotype_nucleus_y, phenotype_nucleus_area
        * *in-situ_tile*: Tile number, according to order of files loaded; counting from 0
        * *in-situ_nucleus*: Number of the nucleus in that tile, as defined by the masks loaded. Cellpose masks start counting at 1
        * *in-situ_nucleus_x*: Pixel-wise center position of each nuclear mask in the first cycle
        * *in-situ_nucleus_y*: Pixel-wise center position of each nuclear mask in the first cycle
        * *in-situ_nucleus_area*: Pixel-wise area of each nuclear mask in the first cycle
        * *phenotype_tile*: Tile number, according to order of files loaded; counting from 0
        * *phenotype_cell*: Number of the membrane mask in that tile, as defined by the masks loaded. Cellpose masks start counting at 1
        * *phenotype_nucleus_x*: Pixel-wise center position of each nuclear mask
        * *phenotype_nucleus_y*: Pixel-wise center position of each nuclear mask
        * *phenotype_nucleus_area*: Pixel-wise area of each nuclear mask in the phenotype data
    
The optical pooled screening analysis pipeline can be installed from [*Here*](https://github.com/feldman4/OpticalPooledScreens_2019).

---

In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import os
import numpy as np
from PIL import Image
from tqdm import tqdm

# Plot generation
import seaborn as sns
import matplotlib.pyplot as plt

### Load data and create output folders

In [None]:
# Data directory for all generated data
base = './data/figF/'

methanol_path = '/home/jsblab/data1/JSB_P47/phenotype/C9/'
PFA_path = '/home/jsblab/data2/P45_PFA_decrosslinking/P45_live/'


paths_total = []

# Generate a dataframe with all image paths, the cellpose masks were generated on the membrane mask
for path in [methanol_path, PFA_path]:
    
    paths = pd.DataFrame({y: sorted([path+x for x in os.listdir(path) if z in x]) for y, z in zip(['path_02', 'path_03', 'path_06'], ['2.tif', '3.tif', '6.tif'])})
    paths['cellpose'] = sorted([f'{path}masks/{i}' for i in os.listdir(path + 'masks/') if '6.tif' in i])
    paths_total.append(paths)


### Phenotype analysis

In [None]:
def gate(x, y):
    return 1.6666666666666667 * x -166.66666666666674 < y

In [None]:
# Load individual images and calculate GFP mean, GFP sum, hoechst mean, hoechst sum for each cell

for fixation, path in zip(['methanol', 'PFA'], paths_total):
    
    df = pd.DataFrame()
    for ind, (nucleus_path, gfp_path, membrane_path, cellpose) in tqdm(path.iterrows()):

        membrane = np.asarray(Image.open(membrane_path))
        hoechst = np.asarray(Image.open(nucleus_path))
        cellmask = np.asarray(Image.open(cellpose))
        gfp = np.asarray(Image.open(gfp_path))

        pos = np.where(cellmask != 0)
        cellid = cellmask[tuple(pos)]

        count = np.bincount(cellid)[1:]
        gfp_sums = np.bincount(cellid, gfp[tuple(pos)])[1:]
        hoe_sums = np.bincount(cellid, hoechst[tuple(pos)])[1:]

        df = pd.concat([df, pd.DataFrame({'well': [gfp_path.split('_')[-4] for x in range(len(gfp_sums))], 'tile': [gfp_path.split('_')[-2] for x in range(len(gfp_sums))], 'pheno_tile': [int(gfp_path.split('_')[-2][4:])-1 for x in range(len(gfp_sums))], 'cell_no': np.unique(cellid), 'gfp_sums': gfp_sums, 'gfp_mean': gfp_sums/count, 'hoechst_sums': hoe_sums, 'hoechst_mean': hoe_sums/count})])

    df.pheno_tile = df.pheno_tile.astype(int)
    df.to_csv(base + f'{fixation}_pheno_data.csv', sep='\t', header = True, index = None)

### Merge phenotype with in situ cell assignment from JSB tools

In [None]:
# Filter phenotype for well
dfres = pd.DataFrame()

for cond in ['PFA_normal', 'methanol_normal', 'PFA_scrambled', 'methanol_scrambled']:
                        
    sp_dic = {'cond':[],
              'cell_number' : [],
              'cells_mapped' : [],
              'sens':[],
              'spec':[]
             }
    
    fixation = cond.split('_')[0]
    pheno_ = pd.read_csv(base + f'{fixation}_pheno_data.csv', '\t')
    
    # Load corresponding Cell assignments and NIS barcodes
    ass = pd.read_csv(f'data/{fixation}_CellAssignments.txt', sep = '\t')[['pheno_tile', 'cell.1', 'in-situ_tile', 'cell']]
    seq = pd.read_csv(f'data/{cond}_NuclearSequences.txt', sep = '\t')


    # Merge phenotype table and cell assignments and NIS barcodes
    merged = pheno_.merge(ass, how = 'left', left_on = ['pheno_tile','cell_no'], right_on=['pheno_tile','cell.1'])
    merged_seq = merged.merge(seq, how = 'left', left_on = ['in-situ_tile','cell'], right_on = ['tile','nucleus'])
    merged_seq = merged_seq.drop_duplicates(['tile_x', 'cell_no'])

    # Kill cells with no nucleus / low nucleus signal
    final_cells = merged_seq[merged_seq.hoechst_mean > 125].reset_index(drop=True)

    # Gate cells into GFP positive vs GFP negative cells based on linear function
    final_cells['gfp_neg'] = [gate(x, y) for x, y in zip(final_cells.gfp_mean, final_cells.hoechst_mean)]

    # calculate sensitivity and specificity for each phenotype tile by counting GFP neg vs pos and NIS bc vs no bc cells 
    for merged_ in [final_cells[final_cells.pheno_tile < 98], final_cells[final_cells.pheno_tile >= 98]]:

        sp_dic['cond'].append(cond)
        sp_dic['cell_number'].append(len(merged_))
        sp_dic['cells_mapped'].append(len(merged_[~merged_.sequence.isna()]))
        sp_dic['sens'].append(len(merged_[~merged_.sequence.isna()][merged_.gfp_neg])/(len(merged_[~merged_.sequence.isna()][merged_.gfp_neg]) + len(merged_[merged_.sequence.isna()][merged_.gfp_neg]))*100)
        sp_dic['spec'].append(len(merged_[merged_.sequence.isna()][~merged_.gfp_neg])/(len(merged_[merged_.sequence.isna()][~merged_.gfp_neg]) + len(merged_[~merged_.sequence.isna()][~merged_.gfp_neg]))*100)

    dfres = dfres.append(pd.DataFrame(sp_dic))

In [None]:
dfres.to_csv(base + 'final_figF.tsv', '\t')

### Reformat data or final figure

In [None]:
res = []
for key, val in dfres.iterrows():
    res.append(['Specificity', val.spec, val['cond']])
    res.append(['Sensitivity', val.sens, val['cond']])
df = pd.DataFrame(res)

### Generate final figure

In [None]:
# Final figure F
g = sns.catplot(data=df, kind="bar",x=0, y=1, palette="dark", alpha=.6, height=7, aspect=5/10, col=2)
g.set_axis_labels('', '%')
g.despine(left=True)

plt.savefig(f'data/figF/figureF_AH.svg')