# Comparison of spot sizes

In [1]:
from itertools import chain
from itertools import product
import numpy as np
import anndata as ad
import pandas as pd
import scanpy as sc
import scanpy.external as sce
# import scvelo as scv
# import cellrank as cr
import seaborn as sns
from scipy import io
from scipy import sparse

In [2]:
import gc
gc.enable()

In [3]:
import os
os.chdir('/local/workdir/dwm269/scCardiacOrganoid/')

In [4]:
from scripts.py.utils import *
from scripts.py.plots import *

## Load data & metadata

In [5]:
meta = pd.read_csv("resources/metadata.csv")
meta = meta.loc[meta["source"]=="Hoang et al",]
meta = meta.loc[meta["include"],]
meta.index = list(range(0, len(meta)))
meta

Unnamed: 0,sample,include,data.dir,pattern,pattern_int,timepoint,time_int,cell_line,source,soupx,soupx_rho_GeneFull
0,D0_600um,True,/workdir/dwm269/scCardiacOrganoid/data/STARsol...,600um,600.0,D0,0,GCaMP6f hiPSCs,Hoang et al,False,
1,D1_600um,True,/workdir/dwm269/scCardiacOrganoid/data/STARsol...,600um,600.0,D1,1,GCaMP6f hiPSCs,Hoang et al,False,
2,D4_200um,True,/workdir/dwm269/scCardiacOrganoid/data/STARsol...,200um,200.0,D4,4,GCaMP6f hiPSCs,Hoang et al,True,0.076
3,D4_600um,True,/workdir/dwm269/scCardiacOrganoid/data/STARsol...,600um,600.0,D4,4,GCaMP6f hiPSCs,Hoang et al,True,0.07
4,D4_1000um,True,/workdir/dwm269/scCardiacOrganoid/data/STARsol...,1000um,1000.0,D4,4,GCaMP6f hiPSCs,Hoang et al,True,0.14
5,D6_600um,True,/workdir/dwm269/scCardiacOrganoid/data/STARsol...,600um,600.0,D6,6,GCaMP6f hiPSCs,Hoang et al,True,0.029
6,D8_600um,True,/workdir/dwm269/scCardiacOrganoid/data/STARsol...,600um,600.0,D8,8,GCaMP6f hiPSCs,Hoang et al,True,0.015
7,D12_600um,True,/workdir/dwm269/scCardiacOrganoid/data/STARsol...,600um,600.0,D12,12,GCaMP6f hiPSCs,Hoang et al,True,0.017
8,D21_200um_B,True,/workdir/dwm269/scCardiacOrganoid/data/STARsol...,200um,200.0,D21,21,GCaMP6f hiPSCs,Hoang et al,True,0.011
9,D21_600um,True,/workdir/dwm269/scCardiacOrganoid/data/STARsol...,600um,600.0,D21,21,GCaMP6f hiPSCs,Hoang et al,True,0.039


Load the data 

In [6]:
adata = sc.read_h5ad(
    filename="data/pyobjs/scCO_v1c.h5ad"
)

Density along PHATE embeddings

In [7]:
sc.set_figure_params(
    figsize=(5,5),
    dpi=200
)

sc.tl.embedding_density(
    adata,
    basis='phate_harmony_s', 
    groupby='pattern'
)
sc.pl.embedding_density(
    adata,
    basis='phate_harmony_s',
    key='phate_harmony_s_density_pattern', 
    bg_dotsize=3,
    fg_dotsize=3,
    ncols=3,
    color_map="YlGnBu",
    group=['200um','600um','1000um']
)

ValueError: Cannot find the embedded representation `adata.obsm['X_phate_harmony_s']`. Compute the embedding first.

In [None]:
# import pandas as pd
tmp = np.core.defchararray.add(
        list(adata.obs['pattern']),
        list(adata.obs['timepoint'])
    )

for x in ['600umD0', '600umD1', '600umD12', '600umD6', '600umD8']:
    tmp[tmp==x] = "other"
    
print(np.unique(tmp))

adata.obs['pattern_timepoint'] = tmp

In [None]:
sc.set_figure_params(
    figsize=(5,5),
    dpi=200
)

sc.tl.embedding_density(
    adata,
    basis='phate_harmony_s',
    groupby='pattern_timepoint'
)

sc.pl.embedding_density(
    adata,
    basis='phate_harmony_s',
    key='phate_harmony_density_pattern_timepoint', 
    bg_dotsize=3,
    fg_dotsize=3,
    ncols=3,
    color_map="YlGnBu",
    group=['200umD4','600umD4','1000umD4','200umD21','600umD21','1000umD21']
)

### D4 DGEA

In [None]:
# Subset D4 cells
# d4_adata = adata[~adata.obs['timepoint'].isin(['D4']),:]
d4_adata = adata[adata.obs['timepoint']=='D4',:]

In [None]:
# Run DGEA across spot sizes
sc.tl.rank_genes_groups(
    d4_adata,
    'pattern',
    method='logreg' #wilcoxon
)

# visualize the results
sc.pl.rank_genes_groups(
    d4_adata,
    n_gene=30,
    ncols=5
)

In [None]:
d4_genes = {
    '200um':['STEAP1B','HAS2','DSCAM','BMPER','PRTG','COLEC12','SERPINE2','HAS2','HAPLN2'],
    '600um':['MT-RNR2','MT-ND1','NPM1','TKT','MYH6','MTATP6P1'],
    '1000um':['AFP','TTR','CDH2','MGST3','SPINK1','AMBP','MAGI1']
}

In [None]:
sc.set_figure_params(
#     figsize=(6,6),
    transparent=True
)

sc.pl.dotplot(
    d4_adata,
    var_names=d4_genes,
    groupby='leiden_harmony_types', 
#     layer="counts",
#     color_map="viridis",
    title="D4 Diff. Expressed Genes",
    color_map='Greys',
    use_raw=False,
    standard_scale ='var',
#     swap_axes=True,
    dendrogram=False
)

### D21 DGEA

In [None]:
# Subset D21 cells
# d21_adata = adata[~adata.obs['timepoint'].isin(['D21']),:]
d21_adata = adata[adata.obs['timepoint']=='D21',:]

In [None]:
# Run DGEA across spot sizes

sc.tl.rank_genes_groups(
    d21_adata,
    'patterm',
    method='logreg' #wilcoxon
)

# visualize the results
sc.pl.rank_genes_groups(
    d21_adata,
    n_gene=30,
    ncols=5
)

In [None]:
d21_genes = {
    '200um':['STEAP1B','LINC00458','SERPINE2','HAS2','DSCAM','FBN2','MIR302CHG','SFRP1','HAPLN2'],
    '600um':['GAPDH','SLC25A3','MT-ND4','MT-ND5','FTL','CKB','TPI1'],
    '1000um':['STEAP1B','HAS2','PRTG','DSCAM','HAPLN1','BMPER','CGNL1','TEAD1']
}

In [None]:
sc.set_figure_params(
#     figsize=(6,6),
    transparent=True
)

sc.pl.dotplot(
    d21_adata,
    var_names=d21_genes,
    groupby='leiden_harmony_types', 
#     layer="counts",
#     color_map="viridis",
    title="D21 Diff. Expressed Genes",
    color_map='Greys',
    use_raw=False,
    standard_scale ='var',
#     swap_axes=True,
    dendrogram=False
)

## Spot size density, split by timepoint 

In [None]:
# Day 4
sc.set_figure_params(figsize=(4,4),dpi=200)

sc.tl.embedding_density(
    d4_adata,
    basis='phate_harmony', 
    groupby='patterm'
)
sc.pl.embedding_density(
    d4_adata,
    basis='phate_harmony',
    key='phate_harmony_density_patterm', 
    bg_dotsize=5,
    fg_dotsize=5,
    ncols=3,
    color_map="YlGnBu",
    group=['200um','600um','1000um']
)

In [None]:
# Day 21
sc.set_figure_params(figsize=(4,4),dpi=200)

sc.tl.embedding_density(
    d21_adata,
    basis='phate_harmony', 
    groupby='patterm'
)
sc.pl.embedding_density(
    d21_adata,
    basis='phate_harmony',
    key='phate_harmony_density_patterm', 
    bg_dotsize=5,
    fg_dotsize=5,
    ncols=3,
    color_map="YlGnBu",
    group=['200um','600um','1000um']
)