In [None]:
import scanpy
import pandas as pd
import numpy as np
from kneed import KneeLocator

### Performing basic preprocessing of scRNA-seq data

In [None]:
adata = scanpy.read(filename = "scRNA-seq_rna_umap.h5ad")

In [None]:
# following contacttracing tutorial
adata.obs.index.name=None
adata.var.index.name=None

In [None]:
# making sure cell barcodes are unique
numdup = sum(adata.obs.index.duplicated())
print(f'{numdup} duplicated barcodes')
if numdup > 0:
    adata.obs_names_make_unique()
    numdup = sum(adata.obs.index.duplicated())
    print(f'Now, {numdup} duplicated barcodes')

In [None]:
# condition setting
adata.obs['Condition'] = adata.obs['Sample']

In [None]:
adata.obs['cell type'] = adata.obs['new_annotation']

In [None]:
adata.obs['Condition'].value_counts()

In [None]:
# removing small cell types
minCell = 50
ctcounts = adata.obs[['cell type','Condition']].value_counts()
remove_cellTypes = ctcounts[ctcounts < minCell].reset_index()['cell type'].unique()
if len(remove_cellTypes) > 0:
    tmpstr = ','.join(remove_cellTypes)
    print(f'Removing celltypes: {tmpstr}')
    print(f'Original ncell {adata.shape[0]}')
    adata = adata[~adata.obs['cell type'].isin(remove_cellTypes)]
    print(f'New ncell {adata.shape[0]}')
else:
    print('Keeping all cell types')

In [None]:
# creating logX layer
adata.X = adata.X.toarray()
adata.layers['logX'] = np.log10(adata.X + 0.1) - np.log10(0.1)

In [None]:

# use kneepoint method to get number of PCs to use
init_npcs = 50
scanpy.pp.pca(adata, n_comps=init_npcs)
x = [x for x in range(len(adata.uns['pca']['variance_ratio']))]
y = np.log(adata.uns['pca']['variance_ratio'])
knee = KneeLocator(x=x, y=y, curve='convex', direction='decreasing', online=True, interp_method='polynomial')
knee.plot_knee_normalized()

In [None]:
opt_n_pcs = knee.knee+1
print('Optimal Number of PCs: ', str(opt_n_pcs))
scanpy.pp.pca(adata, n_comps=opt_n_pcs)
scanpy.pp.neighbors(adata)
scanpy.tl.umap(adata)

In [None]:
scanpy.pl.umap(adata, color=['cell type', 'Condition'], ncols=1)
