In [1]:
import os
import re
import anndata as ad
import numpy as np
import pandas as pd
import scanpy as sc
import scipy as sci
import seaborn as sns; sns.set(color_codes=True)
import matplotlib.pyplot as plt
%matplotlib inline
import scimap as sm

import tqdm as tqdm
from joblib import Parallel, delayed


IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html



In [2]:
root_dir = "../../outs/neighborhood_analysis"
dat_dir = [d for d in os.listdir(root_dir) if 'reg' in d]


In [50]:
def manual_gating(adata, gate_df):
    # STROMAL, FDC
    # reset the label
    adata.obs.loc[adata.obs['cell_type_pred_knn'] == 'FDC', "new_pheno"] = "Stromal"
    m = "CD45"
    thresh = gate_df.loc[m,:].values[0]
    cond1 = adata.X[:,adata.var_names == m] < thresh
    cond1 = cond1.flatten()
    cond2 = adata.obs['cell_type_pred_knn'] == 'FDC'
    adata.obs.loc[(cond1) & (cond2), 'new_pheno'] = "FDC"
    
    # STROMAL
    m = "CD45"
    thresh = gate_df.loc[m,:].values[0]
    cond1 = adata.X[:,adata.var_names == m] < thresh
    cond1 = cond1.flatten()
    cond2 = adata.obs['cell_type_pred_knn'] == 'Endothelial'
    adata.obs.loc[(cond1) & (cond2), 'new_pheno'] = "Stromal"
    
    # ENDOTHELIAL
    m1 = "CD31"
    thresh = gate_df.loc[m1,:].values[0]
    cond1 = adata.X[:,adata.var_names == m1] > thresh
    cond1 = cond1.flatten()
    cond2 = adata.obs['cell_type_pred_knn'] == 'Endothelial'
    m2 = "CD34"
    thresh = gate_df.loc[m2,:].values[0]
    cond3 = adata.X[:,adata.var_names == m2] > thresh
    cond3 = cond1.flatten()
    adata.obs.loc[(cond1) & (cond2) & (cond3), 'new_pheno'] = "Endothelial"

    # B-CELL AND B-CELL GERMINAL
    adata.obs.loc[adata.obs['cell_type_pred_knn'] == 'B_Cell_germ', "new_pheno"] = "B_Cell"
    m1 = "BCL-2"
    thresh = gate_df.loc[m1,:].values[0]
    cond1 = adata.X[:,adata.var_names == m1] > thresh
    cond1 = cond1.flatten()
    cond2 = adata.obs['new_pheno'].isin(['cDC1', 'Stromal', 'unknown', 'FDC']) #added FDC, 12/19
    m2 = "CD20"
    thresh = gate_df.loc[m2,:].values[0]
    cond3 = adata.X[:,adata.var_names == m2] > thresh
    cond3 = cond3.flatten()
    cond4 = adata.obs['new_pheno'] == 'B_Cell'
    # cond2 = cond2 | cond4
    #     B-cell
    adata.obs.loc[((cond2) | (cond4)) & (cond3 | cond1), 'new_pheno'] = "B_Cell"
    #     B-cell, germinal
    # cond1 = adata.X[:,adata.var_names == m1] < thresh
    # cond1 = cond1.flatten()
    # adata.obs.loc[(cond1) & ((cond2) | (cond4)) & (cond3), 'new_pheno'] = "B_Cell, germinal"
    
    # 12/16, FLIP FDC TO STROMAL, doing this after B_cell so we don't accidentally lose them
    adata.obs.loc[adata.obs['new_pheno']=='FDC', 'new_pheno'] = 'Stromal'
    
    # MACROPHAGE
    adata.obs.loc[adata.obs['cell_type_pred_knn'].isin(['Macrophage_M2','Sinus_histiocyte']), "new_pheno"] = "Macrophage"
    m1 = "CD68"
    thresh = gate_df.loc[m1,:].values[0]
    cond1 = adata.X[:,adata.var_names == m1] > thresh
    cond1 = cond1.flatten()
    cond2 = adata.obs['cell_type_pred_knn'] == 'Neutrophil'
    m2 = "CD163"
    thresh = gate_df.loc[m2,:].values[0]
    cond3 = adata.X[:,adata.var_names == m2] > thresh
    cond3 = cond3.flatten()
    adata.obs.loc[((cond1) | (cond3)) & (cond2), 'new_pheno'] = "Macrophage"
    
    # PLASMA CELL, PROLIFERATING
    adata.obs.loc[adata.obs['cell_type_pred_knn'] == 'Plasma_prolif', "new_pheno"] = "unknown"
    m1 = "CD138"
    thresh = gate_df.loc[m1,:].values[0]
    cond1 = adata.X[:,adata.var_names == m1] > thresh
    cond1 = cond1.flatten()
    cond2 = adata.obs['cell_type_pred_knn'] == 'Plasma_prolif'
    adata.obs.loc[(cond1) & (cond2), 'new_pheno'] = "Plasma_Cell"
    # m3 = "Ki67"
    # thresh = gate_df.loc[m3,:].values[0]
    # cond3 = adata.X[:,adata.var_names == m3] > thresh
    # cond3 = cond1.flatten()
    # cond4 = adata.obs['cell_type_pred_knn'] == 'Plasma_Cell'
    # adata.obs.loc[((cond2) | (cond4)) & (cond3), 'new_pheno'] = "Plasma_Cell, prolif."
    
    # cDC1
    adata.obs.loc[adata.obs['cell_type_pred_knn'] == 'cDC1', "new_pheno"] = "unknown"
    m1 = "CLEC9A"
    thresh = gate_df.loc[m1,:].values[0]
    cond1 = adata.X[:,adata.var_names == m1] > thresh
    cond1 = cond1.flatten()
    cond2 = adata.obs['new_pheno'].isin(['B_Cell','unknown', 'Stromal'])
    m2 = "IRF8"
    thresh = gate_df.loc[m2,:].values[0]
    cond3 = adata.X[:,adata.var_names == m2] > thresh
    cond3 = cond3.flatten()
    m3 = "CD1c"
    thresh = gate_df.loc[m3,:].values[0]
    cond4 = adata.X[:,adata.var_names == m3] > thresh
    cond4 = cond4.flatten()
    adata.obs.loc[(cond2) & ((cond3 & cond4) | (cond3 & cond1)), 'new_pheno'] = "cDC1"
    
    # T cells
    adata.obs.loc[adata.obs['cell_type_pred_knn'] == 'activated_T_Cell', "new_pheno"] = "unknown"
    
    return adata

In [5]:
def pheno_unknown(adata, gate_df, phenotype):
    adata_unknown = adata[adata.obs['new_pheno'] == "unknown",:]
    adata_unknown = sm.pp.rescale(adata_unknown, gate=gate_df, imageid='orig.ident')
    adata_unknown = sm.tl.phenotype_cells(adata_unknown, phenotype=phenotype, label="phenotype")
    adata.obs.loc[adata.obs['new_pheno']=="unknown", "new_pheno"] = adata_unknown.obs['phenotype']
    return adata

In [105]:
phenotype = pd.read_csv('unknown_pheno_matrix.csv')
def gate_cell_type(d):
    print(d)
    adata = sc.read_h5ad(os.path.join(root_dir,d,'dat.h5ad'))
    # processing the data
    adata.X = adata.raw.X
    adata.X = adata.X.toarray()
    adata.raw = adata
    adata.X = np.log1p(adata.X)
    # doing the manual corrections
    adata.obs['new_pheno'] = adata.obs['cell_type_pred_knn'].astype(str)
    gate_df = pd.read_csv('cohort1_gates_orig_v2.csv', header=None, index_col=0, names=['gate'])
    adata = manual_gating(adata, gate_df)
    
    # # tracking the stromal population
    # adata.obs.loc[adata.obs['new_pheno'] == 'Stromal', 'new_pheno'] = 'Stromal, manual'
    
    # assigning the unknowns
    gate_df = pd.read_csv('cohort1_gates_orig_v2.csv', header=None)
    adata = pheno_unknown(adata, gate_df, phenotype)
    
    # reassigning B cell, germ. and FDC
    adata.obs.loc[adata.obs['new_pheno']=='FDC', 'new_pheno'] = 'Stromal'
    adata.obs.loc[adata.obs['new_pheno']=='B_Cell_germ', 'new_pheno'] = 'B_Cell'
    # accept likely-cDC1
    adata.obs.loc[adata.obs['new_pheno'] == 'likely-cDC1', 'new_pheno'] = 'cDC1'
    
    adata.obs.to_csv(os.path.join('../forQuPath/Data',d,'annotation_df_v3.csv'))
    adata.write(os.path.join(root_dir,d,'gated_dat.h5ad'))
    # return adata
    return None