# CO Organoids
## UMAP, clustering and visualization

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
from plotnine import *
import seaborn as sns

In [None]:
sc.settings.set_figure_params(facecolor='white')

In [None]:
my_cmap = sns.blend_palette(["lightgray", sns.xkcd_rgb["blue"]], as_cmap=True)

In [None]:
adata = sc.read_h5ad('../../_m/adata.h5ad')

In [None]:
adata.shape

In [None]:
set(adata.obs['CO/SO'])

In [None]:
%%time 

sc.tl.leiden(adata, key_added='leiden', resolution=1)


In [None]:
sc.tl.umap(adata)

# UMAP with QC variables and leiden clustering

In [None]:
sc.pl.umap(adata, color=['n_counts', 'leiden'])

In [None]:
sc.pl.umap(adata, color=['n_genes', 'leiden'])

In [None]:
sc.pl.umap(adata, color=['mt_frac', 'leiden'])

# Samples and treatments

In [None]:
for var in ['10X_Assay_ID', 'CO/SO', 'leiden']:
    sc.pl.umap(adata, color=[var], cmap=my_cmap)

# Marker genes

In [None]:
for gene in ['RBFOX3', 'DCX', 'PPP1R1B', 'TH', 'DRD1', 'DRD2', 'DRD3', 'DRD4',
            'SOX2', 'SOX11', 'GAD1', 'GAD2', 'SLC32A1', 'PPP1R1B', 'GFAP']:
    sc.pl.umap(adata, color=[gene, 'leiden'], cmap=my_cmap)

In [None]:
sc.pl.umap(adata, color=['DCX', 'ALR/Alpha', 'RBFOX3', 'GFAP', 'S100B', 'SOX2', 'PPP1R1B', 'L1HS'], cmap=my_cmap)

In [None]:
sc.pl.umap(adata, color=['L1HS', 'L1PA3'], cmap=my_cmap)

In [None]:
sc.pl.umap(adata, color=['LTR7', 'AluY'], cmap=my_cmap)

In [None]:
sc.pl.umap(adata, color=['GFAP', 'HERVK-int'], cmap=my_cmap)

In [None]:
sc.pl.umap(adata, color=['SVA_%s' % x for x in 'ABCDEF'], cmap=my_cmap)

# Top 20 most variable repeats

In [None]:
top20_variable_repeats = adata.var[adata.var['type']=='repeat'].sort_values('dispersions_norm', ascending=False).head(20)
top20_variable_repeats

In [None]:
sc.pl.umap(adata, color=list(top20_variable_repeats.index), cmap=my_cmap)

In [None]:
features = ['PPP1R1B', 'TH', 'DRD1', 'DRD2', 'DRD3', 'DRD4', 'GAD1', 'GAD2', 'SLC32A1', 'leiden', 'DCX', 'ALR/Alpha', 'RBFOX3']

sc.pl.umap(adata, color=features, cmap=my_cmap)

In [None]:
adata.var.columns

In [None]:
adata.var.groupby(['type', 'highly_variable']).size()

# Differential expression

In [None]:
%%time 

sc.tl.rank_genes_groups(adata, groupby='leiden', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

# Save adata

In [None]:
adata.write_h5ad('adata.h5ad')