# Human colon cancer data analysis
Python analysis using scanpy.


## Setup

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import scanorama
import scipy
import os
import anndata as anndata
import scvelo as scv
#import scanpy_cluster_proportions
from pathlib import Path
import gzip, shutil

In [None]:
import matplotlib as mpl
new_rc_params = {'text.usetex': False,
"svg.fonttype": 'none'
}
mpl.rcParams.update(new_rc_params)

In [None]:
path_to_results = '../results/'
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=100, facecolor='white', figsize = (4,4), dpi_save=300, frameon = False)
sc.settings.figdir = path_to_results
umap_point_size = 10
umap_transparency = 0.3
umap_continuous_point_size = 50
umap_continuous_transparency = 0.7
aspect_ratio = 1
save_figure = False

# Data import
#Read the 10X dataset with count matrices.

In [None]:
results_file = '../data/intermediate/preprocessed.h5ad'
data_path = Path('../data/raw/')
intermediate_data_path = Path('../data/intermediate')
intermediate_data_path.mkdir(parents = True, exist_ok = True)
annotated_tpm_file = os.path.join(intermediate_data_path, 'tpm.h5ad')

In [None]:
adata = sc.read(data_path / 'adata_AllAnnotated.h5ad') # downloaded from  https://doi.org/10.6084/m9.figshare.25323397 ; article: https://www.nature.com/articles/s43018-024-00807-z

In [None]:
adata.obs.Sample.unique()

In [None]:
adata

In [None]:
adata.var_names

In [None]:
sc.pl.umap(adata, color = ['SubCluster'])

In [None]:
sc.pl.umap(adata, color = ['ParentalCluster'])

In [None]:
sc.pl.umap(adata, color = ['GrandparentalCluster'])

In [None]:
sc.pl.umap(adata, color = ['Dataset', 'Class', 'MMRstatus', 'Position', 'Site', 'Grade', 'nCount_RNA', 'nFeature_RNA'])

# Get Macro/Mono

In [None]:
ad_mac = adata[adata.obs.ParentalCluster == 'Mono/Macro']

In [None]:
ad_mac

for class_type in ad_mac.obs.Class.cat.categories:
    ax = sc.pl.violin(ad_mac[ad_mac.obs.Class == class_type], keys='KOvsWT_up_minus_down_score', groupby='CXCL9_SPP1', rotation = 45, show=False)
    ax.set_title(class_type)
    plt.show()

In [None]:
ad_mac_tumor = ad_mac[ad_mac.obs.Class == 'T']

In [None]:
len(ad_mac_tumor.obs.Sample.unique())

In [None]:
database_path = Path('../../../shared/databases/')
numof_genes = 150
KOvsWT_down_genelist = pd.read_csv(database_path / '{}_KO--over--WT_Down-Regulated.csv'.format(numof_genes), header=0).gene_name
KOvsWT_up_genelist = pd.read_csv(database_path / '{}_KO--over--WT_Up-Regulated.csv'.format(numof_genes), header=0).gene_name
KOvsWT_down_genelist = KOvsWT_down_genelist.str.upper()
KOvsWT_up_genelist = KOvsWT_up_genelist.str.upper()

In [None]:
sc.tl.score_genes(ad_mac_tumor, gene_list=KOvsWT_down_genelist, score_name='KOvsWT_down_score')

In [None]:
sc.tl.score_genes(ad_mac_tumor, gene_list=KOvsWT_up_genelist, score_name='KOvsWT_up_score')

In [None]:
ad_mac_tumor.obs['KOvsWT_up_minus_down_score'] = ad_mac_tumor.obs['KOvsWT_up_score'] - ad_mac_tumor.obs['KOvsWT_down_score']

In [None]:
df = sc.get.obs_df(ad_mac_tumor, keys = ['SPP1', 'CXCL9'])

In [None]:
#define conditions
conditions = [
    (df['CXCL9'] > 0) & (df['SPP1'] > 0),
    (df['CXCL9'] > 0) & (df['SPP1'] == 0),
    (df['CXCL9'] == 0)& (df['SPP1'] > 0),
    (df['CXCL9'] == 0)& (df['SPP1'] == 0),
]

#define results
results = ['CXCL9 & SPP1', 'CXCL9', 'SPP1', 'none']

#create new column based on conditions in column1 and column2
ad_mac_tumor.obs['CXCL9_SPP1'] = np.select(conditions, results)

In [None]:
Cxcl9_Spp1_colors= {'CXCL9':'#40E0D0', 'SPP1':'#FF1493', 'CXCL9 & SPP1': '#5D29A6', 'none': '#F2F2F2'}

# Fig 1F

In [None]:
sc.pl.violin(ad_mac_tumor, keys='KOvsWT_up_minus_down_score', groupby='CXCL9_SPP1',  palette = Cxcl9_Spp1_colors, stripplot = False, inner = 'box', rotation = 45)