In [1]:
import hotspot
from itertools import groupby
from json import load
import pandas as pd
import scanpy as sc
import numpy as np
import sys
import modules.classifyClusters.classifyClusters as classify
import os
from matplotlib import pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns

adata = sc.read("./output/savedDataClustersFinal.h5ad")
adata.var_names_make_unique()
adata.X = adata.X.astype('float64')

sc.pp.filter_genes(adata, min_counts=3)

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)

adata.raw = adata
adata = adata[:, adata.var.highly_variable]

sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])
sc.pp.scale(adata, max_value=10)
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=15, n_pcs=40) # n_neighbors=15 is default
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution=0.3)

  sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])


In [2]:
adataHotspot = sc.read("./output/savedDataClustersFinal.h5ad")
adataHotspot.var_names_make_unique()
adataHotspot.X = adataHotspot.X.astype('float64')
adataHotspot.obsm["X_pca"] = adata.obsm["X_pca"]
print(adataHotspot)
sc.pp.filter_genes(adataHotspot, min_counts=50)
print(adataHotspot)

AnnData object with n_obs × n_vars = 34626 × 36601
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'group', 'sample', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'leiden'
    var: 'features', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
    obsm: 'X_pca'
AnnData object with n_obs × n_vars = 34626 × 23606
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'group', 'sample', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'leiden'
    var: 'features', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_counts'
    obsm: 'X_pca'


In [3]:
hs = hotspot.Hotspot(
    adataHotspot,
    model='danb',
    latent_obsm_key="X_pca",
)
%store hs



Stored 'hs' (Hotspot)


In [4]:
hs.create_knn_graph(weighted_graph=False, n_neighbors=30)
%store hs

Stored 'hs' (Hotspot)


In [5]:
hs_results = hs.compute_autocorrelations()
%store hs_results

100%|██████████| 23606/23606 [50:15<00:00,  7.83it/s] 


Stored 'hs_results' (DataFrame)


In [6]:
hs.results

Unnamed: 0_level_0,C,Z,Pval,FDR
Gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
TEX14,0.515330,2594.070025,0.000000,0.000000
KCNMB2-AS1,0.492181,949.023499,0.000000,0.000000
MIR99AHG,0.725571,869.714643,0.000000,0.000000
TENM2,0.519401,847.999303,0.000000,0.000000
HSPA1B,0.426130,825.877416,0.000000,0.000000
...,...,...,...,...
AL118558.4,-0.002060,-1.871002,0.969328,0.969492
AC080013.3,-0.002133,-1.880602,0.969987,0.970110
AC141586.3,-0.001933,-2.036560,0.979153,0.979236
AC016866.1,-0.002329,-2.079601,0.981219,0.981260


In [7]:
hs.results.to_csv("HsResults.csv")

In [8]:
hs_genes = hs_results.loc[hs_results.FDR < 0.05].index # Select genes
%store hs_genes
local_correlations = hs.compute_local_correlations(hs_genes, jobs=1) # jobs
%store local_correlations

Stored 'hs_genes' (Index)
Computing pair-wise local correlation on 21182 features...


100%|██████████| 21182/21182 [01:11<00:00, 294.55it/s]
  0%|          | 98148/224327971 [21:02<800:55:13, 77.77it/s] 


KeyboardInterrupt: 

In [None]:
hs_genes

In [None]:
local_correlations

In [None]:
modules = hs.create_modules(
    min_gene_threshold=30, core_only=True, fdr_threshold=0.05
)
%store modules

In [None]:
hs.plot_local_correlations()