In [None]:
import rpy2.robjects as robjects
import seaborn as sns
import statsmodels.api as sm
import matplotlib.pyplot as plt
import scanpy as sc
import pandas as pd
import numpy as np
import os
import sys
sys.path.append('/home/qiuaodon/Desktop/PanCancer_scRNA_analysis/utils/')
from scRNA_utils import *
data_dir = '/home/qiuaodon/Desktop/'

In [None]:
adata_T = sc.read(data_dir + 'project_data_new/1863-counts_cells_cohort1_T_cells.h5ad')
adata_B = sc.read(data_dir + 'project_data_new/1863-counts_cells_cohort1_B_cells.h5ad')
adata_M = sc.read(data_dir + 'project_data_new/1863-counts_cells_cohort1_M_cells.h5ad')
adata_Epi = sc.read(data_dir + 'project_data_new/1863-counts_cells_cohort1_Epi_cells.h5ad')
adata_Endo = sc.read(data_dir + 'project_data_new/1863-counts_cells_cohort1_Endo_cells.h5ad')
adata_Fibro = sc.read(data_dir + 'project_data_new/1863-counts_cells_cohort1_Fibro_cells.h5ad')

In [None]:
pseudo_T = scRNA2PseudoBulkAnnData(adata_T, sample_id_col='sample_id')
pseudo_B = scRNA2PseudoBulkAnnData(adata_B, sample_id_col='sample_id')
pseudo_M = scRNA2PseudoBulkAnnData(adata_M, sample_id_col='sample_id')
pseudo_Epi = scRNA2PseudoBulkAnnData(adata_Epi, sample_id_col='sample_id')
pseudo_Endo = scRNA2PseudoBulkAnnData(adata_Endo, sample_id_col='sample_id')
pseudo_Fibro = scRNA2PseudoBulkAnnData(adata_Fibro, sample_id_col='sample_id')

In [None]:
# remove patient_id BIOKEY_22 and BIOKEY_28 from the pseudo bulk data
pseudo_T = pseudo_T[~pseudo_T.obs['patient_id'].isin(['BIOKEY_22', 'BIOKEY_28'])]
pseudo_B = pseudo_B[~pseudo_B.obs['patient_id'].isin(['BIOKEY_22', 'BIOKEY_28'])]
pseudo_M = pseudo_M[~pseudo_M.obs['patient_id'].isin(['BIOKEY_22', 'BIOKEY_28'])]
pseudo_Epi = pseudo_Epi[~pseudo_Epi.obs['patient_id'].isin(['BIOKEY_22', 'BIOKEY_28'])]
pseudo_Endo = pseudo_Endo[~pseudo_Endo.obs['patient_id'].isin(['BIOKEY_22', 'BIOKEY_28'])]
pseudo_Fibro = pseudo_Fibro[~pseudo_Fibro.obs['patient_id'].isin(['BIOKEY_22', 'BIOKEY_28'])]

In [None]:
DEG_exp = pd.read_csv(data_dir + 'DEG-wholecelltype/DEG_exp.csv')

In [None]:
# use sample_id as index
DEG_exp.index = DEG_exp['sample_id']

In [None]:
DEG_exp

In [None]:
def plot_CIT_DEGcorr(g1, g2, L, R, DEG_exp, pseudo_1, pseudo_2):
    # g1, g2: pesudobulk data of DEG. g1 is the DEG of the first type of cell and g2 is the DEG of the second type of cell.
    # L, R: pesudobulk data of ligand and receptor


    # plot the scatterplot between DEG of g1 and g2
    g1_exp = DEG_exp[g1]
    g2_exp = DEG_exp[g2]
    df1 = pd.DataFrame({g1: g1_exp, g2: g2_exp, 'treatment': DEG_exp['treatment']})
    df1 = df1.dropna()
    sns.scatterplot(x=g1, y=g2, data=df1, hue= 'treatment')
    corr1 = df1[g1].corr(df1[g2])
    plt.title('Correlation between ' + g1 + ' and ' + g2 + ' is ' + str(corr1))
    plt.show()

    # plot the scatterplot between DEG of g1 and L
    L_exp = pseudo_1[:, L].X
    L_exp = pd.DataFrame(L_exp, columns=[L], index=pseudo_1.obs['sample_id'])
    L_exp = L_exp.dropna()
    L_exp.columns = [L]
    df2 = pd.DataFrame({g1: g1_exp, L: L_exp.iloc[:,0], 'treatment': DEG_exp['treatment']})
    df2 = df2.dropna()
    sns.scatterplot(x=g1, y=L, data=df2, hue= 'treatment')    
    corr2 = g1_exp.corr(L_exp.squeeze())
    plt.title('Correlation between ' + g1 + ' and ' + L + ' is ' + str(corr2))
    plt.show()


    # plot the scatterplot between DEG of R and g2
    R_exp = pseudo_2[:, R].X
    R_exp = pd.DataFrame(R_exp, columns=[R], index=pseudo_2.obs['sample_id'])
    R_exp = R_exp.dropna()
    df4 = pd.DataFrame({R: R_exp.iloc[:,0], g2: g2_exp, 'treatment': DEG_exp['treatment']})
    df4 = df4.dropna()
    sns.scatterplot(x=R, y=g2, data=df4, hue='treatment')
    corr4 = df4[R].corr(df4[g2].squeeze())
    plt.title('Correlation between ' + R + ' and ' + g2 + ' is ' + str(corr4))
    plt.show()

    # plot the scatterplot between DEG of g1 and g2 residue
    L_values = pseudo_1[:, L].X
    R_values = pseudo_2[:, R].X
    L_R = L_values * R_values
    X = sm.add_constant(L_R)
    model = sm.OLS(g2_exp,X).fit()
    residue = model.resid
    condition = DEG_exp['treatment']
    corr = g1_exp.corr(residue)
    plt.scatter(g1_exp, residue, c= condition)
    plt.title('Correlation between ' + g1 + ' and residue of ' + g2 + ' is ' + str(corr))
    plt.show()



    # plot the g1 and L*R
    df3 = pd.DataFrame({g1: g1_exp, L + '*' + R: L_R.squeeze(), 'treatment': DEG_exp['treatment']})
    df3 = df3.dropna()
    sns.scatterplot(x=g1, y=L_R.squeeze(), data=df3, hue= 'treatment')
    L_R = pd.Series(L_R.squeeze(), index=pseudo_1.obs['sample_id'])
    corr3 = g1_exp.corr(L_R)
    plt.title('Correlation between ' + g1 + ' and ' + L + '*' + R + ' is ' + str(corr3))
    plt.show()

    # plot the g2 and L*R
    df5 = pd.DataFrame({g2: g2_exp, L + '*' + R: L_R.squeeze(), 'treatment': DEG_exp['treatment']})
    df5 = df5.dropna()
    L_R = pd.Series(L_R.squeeze(), index=pseudo_1.obs['sample_id'])
    sns.scatterplot(x=L_R, y=g2, data=df5, hue= 'treatment')
    corr5 = g2_exp.corr(L_R)
    plt.title('Correlation between ' + g2 + ' and ' + L + '*' + R + ' is ' + str(corr5))
    plt.show()


In [None]:

plot_CIT_DEGcorr('PMAIP1_T', 'TNFSF8_B', 'TNFSF10', 'TNFRSF10D', DEG_exp, pseudo_T, pseudo_B)

In [None]:
plot_CIT_DEGcorr('TXNIP_T', 'CLN8_M', 'CD40LG', 'CD40', DEG_exp, pseudo_T, pseudo_M)

In [None]:
# FKBP5_T	AC025164.1_B	FASLG	FAS
plot_CIT_DEGcorr('FKBP5_T', 'AC025164.1_B', 'FASLG', 'FAS', DEG_exp, pseudo_T, pseudo_B)

In [None]:
results_M = pd.read_excel('/home/qiuaodon/Desktop/project_data_new/data_IVtest_result/CIT_test_results_M.xlsx')


In [None]:
results_M['g1'].unique()

In [None]:
results_M['g2'].unique()