# Overview 

Results from histology annotations and cell2location shall be used here to compared specific cell type expression in malignant and non-malignant regions. The genes identified via spatial matrisome clustering 
shall be the focus. 

## Imports 

In [305]:
import pandas as pd 
import numpy as np 
import scanpy as sc 
import matplotlib.pyplot as plt
from statannotations.Annotator import Annotator
import seaborn as sns
import os 

## Helper functions 

In [64]:
def load_data(): 
    ''' Loads binarized histopathologist annot and cell2location processed adatas '''

    # Load original histologist markers (higher accuracy)
    hist_labels = pd.read_csv('C:/Users/MG/OneDrive - The University of Manchester/Documents/GitHub/Spatial-Transcriptomics-of-the-Prostate-Tumour-Matrisome/Pathology_unlabeled.csv')
    mask = hist_labels['Pathology'].isna()
    hist_labels = hist_labels['Pathology'][~mask]

    # Convert annotations to binary 
    hist_labels = hist_labels.replace(['Blood vessel', 'Fibro-muscular tissue', 'Fibrous tissue', 'Immune Cells', 'Normal gland',  'Nerve'], 'Non-malignant')
    hist_labels = hist_labels.replace('Invasive carcinoma', 'Malignant')
    hist_labels = hist_labels.reset_index(drop=True).values

    # Load processed cell2location data
    c2l_adata = sc.read_h5ad('C:/Users/MG/OneDrive - The University of Manchester/Documents/GitHub/Spatial-Transcriptomics-of-the-Prostate-Tumour-Matrisome/adata_c2l.h5ad')
    c2l_adata = c2l_adata[~mask] # Spots not annotated in histologist annotations ignored 

    return hist_labels, c2l_adata

In [335]:
def compare_ct_gene_exp(ct, gene):
    ''' Compares a cell type's gene expression in malignant to non-malignant regions'''

    # Cell type expression 
    ct_all_genes = c2l_adata.layers[ct].toarray()


    # Find gene index in columns 
    genes = np.array(c2l_adata.var.index)
    col_mask = genes == [gene]

    # Gene expression for specific cell type 
    ct_gene_exp = ct_all_genes[:, col_mask]

    # Combine data in df, format type 
    data = pd.DataFrame((np.column_stack((ct_gene_exp,hist_labels))), columns=['gene_exp', 'pathology'])
    data['gene_exp'] =  data['gene_exp'].astype(float)
    data['pathology'] =  data['pathology'].astype(str)

    
    # Create plot 
    x = 'pathology'
    y = 'gene_exp'
    order = ['Malignant', 'Non-malignant']

    ax = sns.boxplot(data=data, x=x, y=y, order=order)

    pairs=[('Malignant', 'Non-malignant')]

    annotator = Annotator(ax, pairs, data=data, x=x, y=y, order=order)

    annotator.configure(test='Mann-Whitney')
    annotator.apply_and_annotate()

    ax.set_title(ct + '_' + gene + '_' + 'expression')

    return ax




In [65]:
# Load processed cell2location adata and histologist annotations 
hist_labels, c2l_adata = load_data()

In [349]:
def plot_ct_gene_exp(): 
  ''' Plots each cell types gene expression in malignant vs non-malignant '''
  genes = ['THSD4', 'ANXA1', 'LMAN1L', 'CRISPLD2', 'FBLN1', 'SPOCK1', 'SPON2', 'LMAN1', 'COL9A2', 'GDF15', 'SERPINA3', 'BGN', 'SCUBE2', 'MUC12', 'MMP7', 'SPARCL1', 'COL6A2', 'COL6A1', 'TIMP2', 'DCN']
  celltypes = ['BE', 'CE', 'DC', 'Endothelial', 'Fibroblast', 'HE', 'LE-KLK3', 'LE-KLK4', 'Mac1', 'Mac2', 'Mac-MT1', 'Mast cell', 'Mono', 'NK cell', 'T cell', 'Treg']

  os.mkdir('c2l_regional_exp_results')
  for ct in celltypes: 
      os.mkdir('c2l_regional_exp_results/' + ct )
    
      for g in genes: 
          ax.clear()
          ax = compare_ct_gene_exp(ct, g)
          ax.figure.savefig('c2l_regional_exp_results/' + ct + '/' + ct+'_'+g)
  return 
        
