<a href="https://colab.research.google.com/github/nissimlab/scRNA-seq-fixed-pancreas/blob/main/supplementary_tables/SupplementaryTable4/CIP_all_DEGs_annotated_fine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#SupplementaryTable4 DEGs annotated by timepoint
#All cells (by cell-type)

#Loading Packages

In [None]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [None]:
!pip install scanpy anndata leidenalg watermark pandas==1.5.3 rpy2==3.4.5 umap-learn==0.5.3 plotnine==0.8.0 panel anndata2ri

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import scanpy as sc
import numpy as np
import re
import os
import pandas as pd
import seaborn as sb
import matplotlib
import scipy.sparse as sp
from scipy.sparse import issparse
import anndata2ri

import matplotlib as mpl
from scipy import stats as scistats
import matplotlib.pyplot as pl
import scanpy.external as sce
from matplotlib import colors
from pathlib import Path
from ipywidgets import interactive

sc._settings.ScanpyConfig.n_jobs=8

pl.rcParams['pdf.fonttype'] = 'truetype'
sc.set_figure_params(vector_friendly=False,dpi_save=300,transparent=True)
pl.rcParams['lines.linewidth'] = 0.1
sc.set_figure_params(color_map='viridis')
colorsComb = np.vstack([pl.cm.Reds(np.linspace(0, 1, 10)), pl.cm.Greys_r(np.linspace(0.7, 0.8, 0))])
mymap = colors.LinearSegmentedColormap.from_list('my_colormap', colorsComb)


%matplotlib inline
import panel as pn
pn.extension( comms='colab')


#Setup WD

In [None]:
folder='/content/drive/MyDrive/CIP_v3_data/CIP_all/FullDataset/'

samplename='CIP_all_v3'
random_state=42

import os
os.chdir(folder)


In [None]:
adata=sc.read('datasets/CIP_all_v3_no_W6_8__filteredNormalized_annotated_2023-02-14.h5ad')

In [None]:
adata

AnnData object with n_obs × n_vars = 68378 × 21686
    obs: 'sample', 'donor', 'date', 'n_counts', 'log_counts', 'n_genes', 'mt_frac', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito', 'scDblFinder_class', 'doublet_score', 'predicted_doublet', 'batch', 'total_counts_mt', 'pct_counts_mt', 'total_counts_rb', 'pct_counts_rb', 'QC', 'outlier', 'S_score', 'G2M_score', 'phase', 'leiden_r0.8', 'leiden_r0.5', 'leiden_r0.4', 'ADM', 'Acinar', 'Ductal', 'annotated', 'annotated_fine', 'annotated_ADM', 'annotated_immunegrouped', 'annotated_ADM2'
    var: 'feature_type', 'id', 'mito', 'n_cells', 'mt', 'rb', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'triku_distance', 'triku_distance_uncorrected', 'triku_highly_variable'
    u

### Rank Marker genes

In [None]:
#method : {‘logreg’, ‘t-test’, ‘wilcoxon’, ‘t-test_overestim_var’} | None (default: None)
sc.tl.rank_genes_groups(adata, "annotated_fine", method='wilcoxon',n_genes=adata.shape[1],layer='PFlog1pPF_normalization', rankby_abs=False,corr_method='bonferroni',key_added='wilcoxon_rank_genes_groups' )
wilcoxon=pd.DataFrame(adata.uns['wilcoxon_rank_genes_groups']['names']).head(100)
wilcoxon

Unnamed: 0,Acinar,ADM early,ADM late,Macrophage,Fibroblast,Fibroblast_activated,Endothelial,Neutrophil,Ductal,T / NK,B,cDC,Cycling Macrophage,Pericyte,Endocrine,Plasmacytoid DC,Mesothelial,Schwann
0,Pnlip,Gm2663,Gm5771,Lyz2,Dcn,Timp1,Fabp4,Csf3r,Spp1,Skap1,Bank1,H2-Eb1,Mki67,Cald1,Ptprn2,Siglech,Gpm6a,Prnp
1,Prss2,Clu,Prss1,Lgmn,Col3a1,Lox,Flt1,Srgn,Pkhd1,Itk,Cd79a,Cd74,Birc5,Notch3,Pcsk2,Mctp2,Upk3b,Scn7a
2,Cela1,Reg3b,Prss3,Csf1r,Gsn,Tpm2,Ptprb,S100a8,Atp1b1,Ptprc,Igkc,H2-Aa,Stmn1,Prkg1,Ccser1,Runx2,Cav1,Cdh19
3,2210010C04Rik,1810009J06Rik,Gm2663,Adgre1,Col1a2,Ckap4,Egfl7,Tyrobp,Magi1,Ets1,Ighm,H2-Ab1,Top2a,Myl9,Chga,Irf8,Pkhd1l1,Csmd1
4,Sycn,Cmss1,Try10,Ctss,Serping1,Fkbp10,Emcn,S100a9,Tm4sf4,Arhgap15,Cd74,Lsp1,Cenpf,Mfge8,Shroom3,Rnase6,Sox6,Slc35f1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Ppib,Ctrl,Psmb10,Dleu2,Rhoj,Fbn2,Etl4,Elmo1,Msi2,Cnot6l,mt-Atp6,H3f3a,Hist1h2ap,Gja4,Slc22a23,Fgfr1op2,Podxl,C4b
96,Gm10076,Try4,Rps21,Cd300c2,Ar,Fzd1,Shank3,Gcnt2,Hnf1b,Nsd3,Pou2af1,Taok3,Ckap2,Nedd4,Map2,Klra17,Agap1,Scd2
97,Tmem97,Rpl35,Rps3,Cyba,Pcdh7,Slc25a4,Jam2,Pygl,Pigr,P2ry10,Eml4,Lcp1,Arhgap11a,Rasal2,Snap25,Syngr2,Col18a1,Cadm4
98,Nucb2,Cela1,Aqp12,Cd53,Zeb1,Pknox2,Ebf1,H3f3b,Cystm1,Rps15a,Cd55,March1,Kif2c,Ptp4a3,Rph3al,Stk17b,Wt1os,Atp1a2


In [None]:
savetable='./markers/'+samplename+ "_annotated_fine_wilcoxon_marker_genes_2023-05-22.csv"
wilcoxon.to_csv(savetable)

In [None]:
adata

AnnData object with n_obs × n_vars = 68378 × 21686
    obs: 'sample', 'donor', 'date', 'n_counts', 'log_counts', 'n_genes', 'mt_frac', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito', 'scDblFinder_class', 'doublet_score', 'predicted_doublet', 'batch', 'total_counts_mt', 'pct_counts_mt', 'total_counts_rb', 'pct_counts_rb', 'QC', 'outlier', 'S_score', 'G2M_score', 'phase', 'leiden_r0.8', 'leiden_r0.5', 'leiden_r0.4', 'ADM', 'Acinar', 'Ductal', 'annotated', 'annotated_fine', 'annotated_ADM', 'annotated_immunegrouped', 'annotated_ADM2'
    var: 'feature_type', 'id', 'mito', 'n_cells', 'mt', 'rb', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'triku_distance', 'triku_distance_uncorrected', 'triku_highly_variable'
    u

In [None]:
wilcoxon=pd.DataFrame(adata.uns['wilcoxon_rank_genes_groups']['pvals']).head(100)
wilcoxon

Unnamed: 0,Acinar,ADM early,ADM late,Macrophage,Fibroblast,Fibroblast_activated,Endothelial,Neutrophil,Ductal,T / NK,B,cDC,Cycling Macrophage,Pericyte,Endocrine,Plasmacytoid DC,Mesothelial,Schwann
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.000000e+00,5.861576e-73,5.495889e-105,1.468811e-111,4.436218e-49
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.000000e+00,3.125372e-263,5.617193e-69,1.953665e-101,8.055224e-106,6.386932e-47
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.000000e+00,4.141061e-257,1.314016e-68,1.440319e-100,4.734926e-100,2.180774e-46
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.000000e+00,3.327960e-256,4.129728e-66,1.369394e-99,1.182897e-99,2.604742e-46
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.0,0.000000e+00,1.990239e-255,2.365095e-65,4.585363e-98,1.940064e-99,5.535773e-44
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.450874e-157,1.672964e-198,0.0,8.749364e-160,8.467684e-105,6.967323e-34,2.173165e-50,1.173267e-57,8.656422e-18
96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.505638e-156,8.037064e-198,0.0,1.121623e-159,4.023633e-103,1.481058e-33,3.619981e-50,1.265477e-57,9.019640e-18
97,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.124697e-154,8.800418e-197,0.0,2.117392e-158,9.155127e-103,1.598911e-32,6.199350e-50,3.183740e-57,1.741656e-17
98,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.511750e-154,3.490167e-194,0.0,3.824646e-158,2.204439e-102,1.766171e-32,9.763880e-50,3.673574e-57,2.322378e-17


##DEG table for all cells

In [None]:
adata.uns['wilcoxon_rank_genes_groups']

{'params': {'groupby': 'annotated_fine',
  'reference': 'rest',
  'method': 'wilcoxon',
  'use_raw': False,
  'layer': 'PFlog1pPF_normalization',
  'corr_method': 'bonferroni'},
 'names': rec.array([('Pnlip', 'Gm2663', 'Gm5771', 'Lyz2', 'Dcn', 'Timp1', 'Fabp4', 'Csf3r', 'Spp1', 'Skap1', 'Bank1', 'H2-Eb1', 'Mki67', 'Cald1', 'Ptprn2', 'Siglech', 'Gpm6a', 'Prnp'),
            ('Prss2', 'Clu', 'Prss1', 'Lgmn', 'Col3a1', 'Lox', 'Flt1', 'Srgn', 'Pkhd1', 'Itk', 'Cd79a', 'Cd74', 'Birc5', 'Notch3', 'Pcsk2', 'Mctp2', 'Upk3b', 'Scn7a'),
            ('Cela1', 'Reg3b', 'Prss3', 'Csf1r', 'Gsn', 'Tpm2', 'Ptprb', 'S100a8', 'Atp1b1', 'Ptprc', 'Igkc', 'H2-Aa', 'Stmn1', 'Prkg1', 'Ccser1', 'Runx2', 'Cav1', 'Cdh19'),
            ...,
            ('Ahnak', 'Vim', 'Mbnl1', 'Igfbp7', 'Ucp2', 'Tpt1', 'Cd63', 'Rps20', 'Emp3', 'Cst3', 'Cd63', 'Selenop', 'Nedd4', 'Rps12', 'Rpl32', 'Ifitm3', 'Fau', 'Rps12'),
            ('H2-D1', 'Psap', 'Ahnak', 'Sparc', 'Sec11c', 'Ftl1', 'Lgals1', 'Rps4x', 'Zeb2', 'Ifitm2', 'Cts

In [None]:
#Function without pct1 and pct2

def make_deg_table(adata_degs, filename):
    cell_types = adata_degs['names'].dtype.names

    data = []
    for cell_type in cell_types:
        genes = pd.DataFrame(adata_degs['names'])[cell_type].head(100)
        log_fold_change = pd.DataFrame(adata_degs['logfoldchanges'])[cell_type].head(100)
        adjusted_pval = pd.DataFrame(adata_degs['pvals_adj'])[cell_type].head(100)
        score = pd.DataFrame(adata_degs['scores'])[cell_type].head(100)

        for i in range(100):
            data.append([cell_type, genes[i], log_fold_change[i], adjusted_pval[i], score[i]])

    df = pd.DataFrame(data, columns=["Cell type", "Gene", "Log fold change", "Adjusted p-val", 'Score'])
    df.to_csv(filename, index=False)


In [None]:
make_deg_table(adata.uns['wilcoxon_rank_genes_groups'], filename= 'deg_table_test2.csv')