In [None]:
import os
import csv
import scanpy as sc
import pandas as pd
import numpy as np
import harmonypy as hm
import anndata as ad
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib as mpl

import dask.dataframe as dd
import pyarrow
import loompy
import phate
import time
import mygene
import scanpy.external as sce 
import scipy.sparse
import seaborn as sns


from matplotlib.lines import Line2D
from matplotlib.colors import LinearSegmentedColormap
from anndata import AnnData

from scipy import sparse
from sklearn.manifold import Isomap

In [None]:
### Process Adata Object. 

adata = adata_merged.copy()

adata.X = adata.layers['counts_RNA'].copy()
adata.X.max()

sc.pp.normalize_total(adata, target_sum = 1e4)
adata.layers["data"] = adata.X.copy()
sc.pp.log1p(adata)
adata.layers["log1p_normalized"] = adata.X.copy()
sc.pp.highly_variable_genes(adata, n_top_genes = 5000, flavor = 'seurat')
sc.pp.scale(adata, max_value = 10)
adata.layers["scale.data"] = adata.X.copy()

sc.pp.pca(adata, n_comps = 50)
sc.tl.pca(adata, n_comps = 50)
adata.obs['Sample_ID_Dataset'] = adata.obs['Sample_ID'].astype(str) + "_" + adata.obs['Dataset'].astype(str)
sce.pp.harmony_integrate(adata, key = ['Sample_ID'], max_iter_harmony = 100, nclust = 40, max_iter_kmeans = 20)
sc.pp.neighbors(adata, use_rep = 'X_pca_harmony', n_neighbors = 20) 
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution = 0.1, key_added = "leiden_0.1") ## Different Resolutions. Also "louvain".

In [None]:
### Wilcoxon rank sum test.

sc.tl.rank_genes_groups(adata, 'leiden_0.1', method = 'wilcoxon', layer = "log1p_normalized", key_added = "wilcoxon")

In [None]:
### Export Results.

result = adata.uns['wilcoxon']
groups = result['names'].dtype.names

dfs = []
for group in groups:
    df = pd.DataFrame({
        'gene': result['names'][group],
        'logfoldchanges': result['logfoldchanges'][group],
        'pvals': result['pvals'][group],
        'pvals_adj': result['pvals_adj'][group],
        'scores': result['scores'][group]
    })
    df['cluster'] = group
    dfs.append(df)

de_df = pd.concat(dfs)

de_df.to_excel("DGEA_Cell_Populations.xlsx", index = False)

In [None]:
### Basic UMAP Feature Plots.

sc.settings.set_figure_params(dpi = 200, figsize = (30, 30))

colors = ["#D5E5F7", "#2488F0", "#7F3F98", "#E22929", "#A81B1B"]
custom_cmap = LinearSegmentedColormap.from_list("custom", colors, N = 256)

sc.pl.umap(
    adata,
    color = "Feature_Name", 
    layer = "log1p_normalized",
    size = 20,
    cmap = custom_cmap,
    frameon = False,
    title = "",
    show = False
)

plt.gca().set_facecolor("white")
plt.gcf().set_facecolor("white")
plt.savefig("Feature.png", dpi = 300, bbox_inches = "tight", facecolor = "white")

plt.show()

In [None]:
### Basic DotPlots.

features = [
    "Feature_Name"
]

sc.pl.dotplot(
    adata,
    var_names = features,
    groupby = "Cluster_Column",
    standard_scale = "var",
    dot_min = 0.1,   
    dot_max = 1.0   
)

In [None]:
### Annotate Cells.

adata.obs["Cluster_Column"] = adata.obs["leiden_0.1"].astype(str)
adata.obs["leiden_0.1"] = adata.obs["leiden_0.1"].astype(str)
mask = adata.obs["leiden_0.1"].isin(["0", "1", "2"])
adata.obs.loc[mask, "Cluster_Column"] = adata.obs.loc[mask, "New_Population"]

In [None]:
### Save Adata Object

os.chdir("/folder/")
adata.write("adata_annotated.h5ad")