In [12]:
import pertpy as pt
import scanpy as sc
import pandas as pd
import anndata as ad
import numpy as np
import os

from scipy.stats import pearsonr


In [3]:
dlg_bs = sc.read_h5ad("data/dialogue_bs.h5ad")

In [4]:
dlg_bs.obs['mcp_0']

Cell barcode
AAATGCCTCATCTGCC.Pre_P007_t   -0.035555
AATCCAGAGCGTTCCG.Pre_P007_t    0.250260
ACAGCCGAGCACCGCT.Pre_P007_t    0.396963
ACCGTAAAGCTAACTC.Pre_P007_t    0.717449
ACGAGCCTCGTAGGTT.Pre_P007_t    0.238999
                                 ...   
CGGGTCAGTTCCCGAG.Pre_P004_t   -1.225605
GCTTGAACAGGCTGAA.Pre_P004_t   -0.810994
GTCGGGTGTCCGAATT.Pre_P004_t   -1.314777
TCATTTGTCAATACCG.Pre_P004_t    0.031566
TCTCATATCCAAAGTC.Pre_P004_t    0.119286
Name: mcp_0, Length: 42550, dtype: float64

In [7]:
dlg_bs.obs.describe(include='all')

Unnamed: 0,Sample,Patient,Origin,Tissue,Efficacy,Group,Treatment,n_counts,Number of genes,Major celltype,...,mcp_0,mcp_1,mcp_2,mcp_3,mcp_4,mcp_5,mcp_6,mcp_7,mcp_8,mcp_9
count,42550,42550,42550,42550,42550,42550,42550,42550.0,42550.0,42550,...,42550.0,42550.0,42550.0,42550.0,42550.0,42550.0,42550.0,42550.0,42550.0,42550.0
unique,11,11,1,4,2,1,2,,2522.0,3,...,,,,,,,,,,
top,Pre_P019_t,P019,t,lymph_node,PR,Pre-treatment,Anti-PD-L1+Chemo,,643.0,T cell,...,,,,,,,,,,
freq,14183,14183,42550,23191,30712,42550,29436,,63.0,29994,...,,,,,,,,,,
mean,,,,,,,,3912.805664,,,...,4.775916e-17,-3.3398010000000005e-17,1.0153e-16,2.137473e-17,1.162251e-16,1.082096e-16,-4.5421300000000007e-17,-4.5421300000000007e-17,1.335921e-16,-5.610866000000001e-17
std,,,,,,,,5263.518234,,,...,2.026775,1.759048,2.759948,2.199334,2.05699,2.055027,1.872101,1.696025,2.016699,1.915154
min,,,,,,,,601.0,,,...,-26.34698,-9.96118,-19.64123,-11.34135,-12.95388,-15.89589,-12.91576,-8.504128,-9.944482,-11.14316
25%,,,,,,,,1549.0,,,...,-0.5129249,-1.111769,-1.87691,-1.400654,-1.25732,-1.376782,-1.063405,-0.980413,-1.16896,-1.071811
50%,,,,,,,,2329.0,,,...,0.2545488,-0.0913087,-0.08717495,-0.1566021,0.1053856,-0.007704338,0.03025702,-0.03139693,-0.1515533,0.07497903
75%,,,,,,,,3774.0,,,...,0.9556792,0.9905855,1.672548,1.271432,1.320954,1.423238,1.100339,0.9864914,0.9186977,1.195355


In [8]:
tBmem = pd.read_csv("R-implementation/R_dlg_output/cca_output/tBmem-CD27_cca_scores.csv", index_col=0)

In [9]:
dlg_bs[dlg_bs.obs['Cluster'] == 'tBmem-CD27'].obs['mcp_0']

Series([], Name: mcp_0, dtype: float64)

In [11]:
dlg_bs[tBmem.index.values].obs.mcp_0

Cell barcode
AAATGCCTCATCTGCC.Pre_P007_t   -0.035555
AATCCAGAGCGTTCCG.Pre_P007_t    0.250260
ACAGCCGAGCACCGCT.Pre_P007_t    0.396963
ACCGTAAAGCTAACTC.Pre_P007_t    0.717449
ACGAGCCTCGTAGGTT.Pre_P007_t    0.238999
                                 ...   
CTGATAGTCAGGCAAG.Pre_P004_t   -0.298519
CTTAACTCAGTAAGCG.Pre_P004_t    0.051517
GCACATATCTGTTTGT.Pre_P004_t   -2.962961
TCATTTGCACGCTTTC.Pre_P004_t   -0.036066
TCGTAGATCCCTAATT.Pre_P004_t   -0.463854
Name: mcp_0, Length: 7283, dtype: float64

In [25]:
pearsonr(tBmem['MCP1'].vaglues, dlg_bs[tBmem.index.values].obs.mcp_0.values)

PearsonRResult(statistic=0.8641040327275515, pvalue=0.0)

In [26]:
dlg_bs.obs['Cluster'].cat.categories

Index(['t_Bmem-CD27', 't_CD4_Tcm-LMNA', 't_CD4_Treg-FOXP3', 't_CD8_MAIT-KLRB1',
       't_CD8_Tem-GZMK', 't_CD8_Trm-ZNF683', 't_Tn-LEF1', 't_mono-FCN1',
       't_pB-IGHG1'],
      dtype='object')

In [25]:
# import all files in R-implementation/R_dlg_output/cca_output/ pandas data frames stored
# in a dictionary indexed by the file name (without the extension)

cca_scores = {}
for f in os.listdir("R-implementation/R_dlg_output/cca_output/"):
    if f.endswith(".csv"):
        cca_scores[f[:-15]] = pd.read_csv("R-implementation/R_dlg_output/cca_output/" + f, index_col=0)
        

In [75]:
ps = []
for ct in cca_scores:
    scores = cca_scores[ct]
    ps.append(pearsonr(scores['MCP1'].values, dlg_bs[scores.index.values].obs.mcp_0.values).statistic)
    ps.append(pearsonr(scores['MCP2'].values, dlg_bs[scores.index.values].obs.mcp_1.values).statistic)
    ps.append(pearsonr(scores['MCP3'].values, dlg_bs[scores.index.values].obs.mcp_2.values).statistic)
    ps.append(pearsonr(scores['MCP4'].values, dlg_bs[scores.index.values].obs.mcp_3.values).statistic)
    ps.append(pearsonr(scores['MCP5'].values, dlg_bs[scores.index.values].obs.mcp_4.values).statistic)
    ps.append(pearsonr(scores['MCP6'].values, dlg_bs[scores.index.values].obs.mcp_5.values).statistic)
    ps.append(pearsonr(scores['MCP7'].values, dlg_bs[scores.index.values].obs.mcp_6.values).statistic)
    ps.append(pearsonr(scores['MCP8'].values, dlg_bs[scores.index.values].obs.mcp_7.values).statistic)
    ps.append(pearsonr(scores['MCP9'].values, dlg_bs[scores.index.values].obs.mcp_8.values).statistic)
    ps.append(pearsonr(scores['MCP10'].values, dlg_bs[scores.index.values].obs.mcp_9.values).statistic)



In [80]:
np.mean(ps)

-0.016952694149120203