In [None]:
import os

import pandas as pd
import numpy as np

import scanpy as sc
import quicat
from scipy import sparse as sp
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.lines as mlines


import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
from collections import defaultdict

In [None]:
reports_dir = '/home/daniele/Code/github_synced/barcoding/quicat_paper_code/reports/'
dpi = 300

In [None]:
gex_dir = "/mnt/storage/Daniele/clonal_toolkit_data/visium/"

In [None]:
quicat_output = quicat.read_sc('/mnt/storage/Daniele/quicat_benchmark/st/Ratz/quicat/barcodes_output.csv')

In [None]:
quicat_output

In [None]:
quicat_output.obs['IHC'] = np.where(
    quicat_output.obs['sample'].isin(['slide_1', 'slide_2', 'slide_3', 'slide_4']),
    'No IHC',
    np.where(
        quicat_output.obs['sample'].isin(['slide_5', 'slide_6', 'slide_7', 'slide_8']),
        'IHC',
        quicat_output.obs['sample']
    )
)

In [None]:
slides = [
    'slide_1',
    'slide_2',
    'slide_3',
    'slide_4',
    'slide_5',
    'slide_6',
    'slide_7',
    'slide_8',
]

In [None]:
adatas = []
spots = defaultdict(dict)
for slide in slides:
    #read anndatas
    gep_adata = sc.read_visium(f'{gex_dir}{slide}')
    gep_adata.var_names_make_unique()
    #QC
    quicat_slide = quicat_output[quicat_output.obs['sample'] == slide]
    sc.pp.calculate_qc_metrics(quicat_slide, inplace = True, percent_top = None)

    #Merging
    quicat_slide.obs_names = [spot[:spot.find(f'_{slide}')] for spot in quicat_slide.obs_names]
    quicat_slide = quicat_slide[list(quicat_slide.obs_names.intersection(gep_adata.obs_names))].copy()
    gep_adata.obs['barcode_counts'], gep_adata.obs['n_barcodes_by_counts'] = quicat_slide.obs['total_counts'], quicat_slide.obs['n_genes_by_counts']
    gep_adata.obs['barcode_counts'].fillna(0, inplace = True)
    gep_adata.obs['n_barcodes_by_counts'] = gep_adata.obs['n_barcodes_by_counts'].fillna(0).astype(int).astype('category')
    top_barcode_idx = [idx[0] for idx in np.argmax(quicat_slide.X, axis=1).tolist()]
    quicat_slide.obs['top_barcode'] = [quicat_slide.var_names[idx] for idx in top_barcode_idx]
    common_spots = list(quicat_slide.obs_names.intersection(gep_adata.obs_names))
    quicat_slide.obsm['spatial'] = gep_adata[common_spots].obsm['spatial']
    quicat_slide.uns['spatial'] = gep_adata[common_spots].uns['spatial']  
    gep_adata.obs['top_barcode'] = quicat_slide.obs['top_barcode']
    gep_adata.obs['top_barcode'] = gep_adata.obs['top_barcode'].fillna('no barcode')
    spots[slide]['quicat'] = quicat_slide.n_obs
    spots[slide]['total'] = gep_adata.n_obs
    adatas.append([gep_adata, quicat_slide])

In [None]:
data = dict(spots)

In [None]:
labels = list(data.keys())
_quicat = [data[slide]['quicat'] for slide in labels]
_total = [data[slide]['total'] for slide in labels]

bar_width = 0.4  # Width of each bar
x = range(len(labels))

colors = sns.palettes.color_palette('Set2')[:2]
plt.figure(figsize=(12, 6))

plt.bar([pos - 0.5 * bar_width for pos in x], _total, width=bar_width, color=colors[0], label='Total', edgecolor='black', alpha=0.7)
plt.bar([pos + 0.5 * bar_width for pos in x], _quicat, width=bar_width, color=colors[1], label='Barcode positive', edgecolor='black', alpha=0.7)

plt.xticks(x, labels, rotation=45, ha="right")
plt.ylabel('Visium Spots')
plt.title('Comparison of Counts for Each Slide')

plt.legend(loc='upper left', bbox_to_anchor=(1, 1))

plt.tight_layout()
plt.savefig(f'{reports_dir}figures/fig4/barplot_spots_with_detected_barcodes.pdf', dpi = dpi, bbox_inches='tight')
plt.show()

In [None]:
quicat.pl.boxplot(
    adata=quicat_output,
    groupby='sample',
    obs_key='total_counts',
    hue='IHC',
    palette='Set2',
    figsize=(12, 8),
    title='Gene Expression by Cell Type',
    xlabel='Slides',
    ylabel='barcodes counts',
    save=f'{reports_dir}figures/fig4/boxplot_counts_per_spot_IHC_comparison.pdf',
    dpi=dpi
)


### Focus 1 slide

In [None]:
gep_adata, barcode_adata = adatas[1]
barcode_adata = barcode_adata[:,barcode_adata.var.n_cells_by_counts>0].copy()

In [None]:
sc.pp.highly_variable_genes(barcode_adata, n_top_genes=10)

In [None]:
#remove noisy ones
barcode_adata = barcode_adata[barcode_adata.obs.n_barcodes_by_counts.isin(list(barcode_adata.obs.n_barcodes_by_counts.value_counts()[barcode_adata.obs.n_barcodes_by_counts.value_counts()>5].index))].copy()

In [None]:
gep_adata.layers['counts'] = gep_adata.X.copy()
sc.pp.normalize_total(gep_adata)
sc.pp.log1p(gep_adata)
sc.pp.pca(gep_adata, n_comps=25)
sc.pp.neighbors(gep_adata)
sc.tl.leiden(gep_adata, resolution = .3)
barcode_adata.obs['leiden'] = gep_adata.obs['leiden']

In [None]:
sc.pl.spatial(
    gep_adata, 
    color = ['barcode_counts'], 
    title=['barcode counts'],  
    vmax = 25, 
    frameon=False,
    cmap='cividis',
    show=False,
)
plt.title('')
plt.grid(False)
plt.savefig(f'{reports_dir}figures/fig4/spatial_barcode_counts.pdf', dpi = dpi, bbox_inches='tight')

In [None]:
mapping = {}
top5 = list(gep_adata.obs.top_barcode.value_counts().head(6).index)

clone = 1

for bc in gep_adata.obs.top_barcode.unique():
    if bc not in top5:
        mapping[bc] = 'others'
    elif bc in top5 and bc != 'no barcode':
        mapping[bc] = f'clone {clone}'
        clone += 1
    else:
        continue
        

In [None]:
gep_adata.obs['clone'] = gep_adata.obs.top_barcode.replace(mapping)
gep_adata.obs['clone'] = gep_adata.obs['clone'].astype('category')
gep_adata.obs['clone'] = gep_adata.obs['clone'].cat.reorder_categories(sorted(gep_adata.obs['clone'].unique()))

prettify

In [None]:
quicat.pl.stacked_barplot(
    adata=gep_adata,
    groupby='leiden',
    obs_key='clone',
    figsize=(12, 6),
    xlabel='Leiden Cluster',
    ylabel='Clones frequencies (%)',
    title='',
    palette = palette,
    edgecolor='black',        
    linewidth=1.5,  
    dpi=dpi,
    save = f'{reports_dir}figures/fig4/stacked_barplot_leiden_by_clone.pdf'         
)

In [None]:
sc.pl.spatial(
    gep_adata, 
    color = ['leiden'], 
    vmax = 50, 
    frameon=False,
    cmap='cividis',
    show=False
)
plt.title('')
plt.grid(False)
plt.savefig(f'{reports_dir}figures/fig4/spatial_leiden.pdf', dpi = dpi, bbox_inches='tight')

In [None]:
violin = sc.pl.violin(
    gep_adata, 
    groupby='leiden', 
    keys='n_barcodes_by_counts', 
    show=False
)

plt.gca().invert_yaxis()
plt.title('')
plt.xlabel('Leiden Cluster')
plt.ylabel('barcodes per spot')
plt.xticks(fontsize=8)
plt.yticks(fontsize=8)

plt.tight_layout()
plt.savefig(f'{reports_dir}figures/fig4/violin_plot_nr_of_barcodes_per_spot_by_leiden_cluster.pdf', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
quicat.pl.barplot(
    barcode_adata, 
    groupby = 'n_barcodes_by_counts', 
    color = '#DDCC77',
    xlabel = 'barcodes per spot', 
    edgecolor='black',      
    save = f'{reports_dir}figures/fig4/barplot_nr_of_barcode_focus_1_slide.pdf'
)

In [None]:
gep_adata.uns['top_barcode_colors'] = ['fff5f0'] * 20

In [None]:
bc_count = 1
for bc in top5[1:]:
    ax = sc.pl.spatial(
        gep_adata, 
        color=['top_barcode'], 
        groups=[bc],
        title=['barcode counts'],  
        vmax=25, 
        frameon=False,
        show=False,
        na_in_legend=False,
        palette=['#FF0000'],
        alpha_img = .5,
        legend_loc=None
    )
    plt.title(f'barcode {bc_count}')
    plt.grid(False)
    handles, labels = plt.gca().get_legend_handles_labels()
    new_labels = [f'barcode {bc_count}']
    bc_count += 1
    plt.savefig(f'{reports_dir}figures/fig4/spatial_clone_{bc_count}_distribution.pdf', dpi=dpi, bbox_inches='tight')

plt.show()