In [4]:
import scanpy as sc
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib as plt

In [5]:
fc_labels = ['log2FC_NE_NS', 'log2FC_NE_M', 
             'log2FC_NE_D', 'log2FC_NE_T',
             'log2FC_NS_M', 'log2FC_NS_D', 
             'log2FC_NS_T', 'log2FC_M_D', 
             'log2FC_M_T', 'log2FC_D_T']

In [6]:
degs = pd.read_csv('EsoAtlas_DEGs_filtered_by_score_5_16_22.csv', index_col=0)
obs  = pd.read_csv('EsoAtlas_adatas_june2021_final_obs_May15.csv', index_col=0) 
cnts = pd.read_csv('count_means_by_celltype_and_dx_v4.csv.gz', index_col=0)

In [7]:
cells = list(set(degs.celltype))

In [8]:
cells

['mast_cells',
 'monocytes_macs_DCs',
 'NK_cells',
 'stromal',
 'fibroblasts',
 'naive_T_cells',
 'cd8_Tcells',
 'myofibroblasts',
 'B_cells',
 'endothelial',
 'neuroendocrine',
 'cd4_Tcells',
 'squamous_epithelial',
 'gi_epithelial']

In [9]:
alldf = pd.DataFrame()

for comp in fc_labels:
    for celli in cells:
        cell_degs = degs.loc[degs.celltype == celli]
        degs_sorted = cell_degs.sort_values(by=comp, axis=0, ascending=False, inplace=False)

        glist_up = degs_sorted.gene[degs_sorted[comp] > 0.58].values
        glist_dn = degs_sorted.gene[degs_sorted[comp] < -0.58].values
        fc_up = degs_sorted.loc[degs_sorted[comp] > 0.58, comp].values
        fc_dn = degs_sorted.loc[degs_sorted[comp] < -0.58, comp].values

        g = sns.scatterplot(data=degs_sorted, x=range(0,len(degs_sorted)), y=comp)
        g.axhline(0.58, ls='--', linewidth=3, color='red')
        g.axhline(-0.58, ls='--', linewidth=3, color='red')
        g.set(title=celli+"_"+comp)
        g.figure.savefig(celli+"_"+comp+".png")
        plt.pyplot.close()
        
        df_up = pd.DataFrame({
            'Cell':[celli for xi in range(0,len(glist_up))],
            'Comparison':[comp for xi in range(0,len(glist_up))],
            'Direction':["up" for xi in range(0,len(glist_up))],
            'Gene':glist_up,
            'log2FC':fc_up
        })
        df_dn = pd.DataFrame({
            'Cell':[celli for xi in range(0,len(glist_dn))],
            'Comparison':[comp for xi in range(0,len(glist_dn))],
            'Direction':["down" for xi in range(0,len(glist_dn))],
            'Gene':glist_dn,
            'log2FC':fc_dn
        })
        df = df_up.append(df_dn)
        df.to_csv(celli+"_"+comp+".csv")

        alldf = alldf.append(df)

alldf.to_csv('EsoAtlas_comparison_sorted_degs_5_16_22.csv')

In [10]:
alldf_table = alldf.loc[:, ['Cell','Comparison','Direction']].value_counts()
alldf_table

Cell           Comparison    Direction
gi_epithelial  log2FC_NE_D   down         2481
               log2FC_NE_M   down         2461
               log2FC_NE_T   down         2336
               log2FC_NE_NS  down         2277
               log2FC_NS_T   down         2175
                                          ... 
NK_cells       log2FC_NS_T   up              6
cd4_Tcells     log2FC_M_D    up              6
NK_cells       log2FC_D_T    up              5
               log2FC_M_T    up              5
               log2FC_NE_T   up              5
Length: 280, dtype: int64

In [11]:
alldf

Unnamed: 0,Cell,Comparison,Direction,Gene,log2FC
0,mast_cells,log2FC_NE_NS,up,AKAP12,2.748948
1,mast_cells,log2FC_NE_NS,up,PRKAR2B,2.462319
2,mast_cells,log2FC_NE_NS,up,RAB33A,2.079008
3,mast_cells,log2FC_NE_NS,up,ASIC3,2.064015
4,mast_cells,log2FC_NE_NS,up,AFF2,2.037438
...,...,...,...,...,...
616,gi_epithelial,log2FC_D_T,down,MAGEA6,-7.386328
617,gi_epithelial,log2FC_D_T,down,MAGEA12,-7.996341
618,gi_epithelial,log2FC_D_T,down,TMEM271,-12.376194
619,gi_epithelial,log2FC_D_T,down,HSPE1P5,-14.360315
