In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
file_path_brain = '/content/drive/My Drive/GI_Data/Mouse_brain_cell_bin.h5ad'
file_path_embryo = '/content/drive/My Drive/GI_Data/E9.5_E1S1.MOSTA.h5ad'


In [4]:
!pip install scanpy

import numpy as np
import scanpy as sc

Collecting scanpy
  Downloading scanpy-1.10.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting anndata>=0.8 (from scanpy)
  Downloading anndata-0.10.7-py3-none-any.whl (122 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
Collecting legacy-api-wrap>=1.4 (from scanpy)
  Downloading legacy_api_wrap-1.4-py3-none-any.whl (15 kB)
Collecting pynndescent>=0.5 (from scanpy)
  Downloading pynndescent-0.5.12-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Collecting session-info (from scanpy)
  Downloading session_info-1.0.0.tar.gz (24 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting umap-learn!=0.5.0,>=0.5 (from scanpy)
  Downloading umap_learn-0.5.6-py3-none-any.whl (85 kB)
[2K     [90m━━━━

In [5]:
anndata_brain=sc.read(file_path_brain)
anndata_embryo=sc.read(file_path_embryo)

Only considering the two last: ['.MOSTA', '.h5ad'].
Only considering the two last: ['.MOSTA', '.h5ad'].


In [6]:
# QC
sc.pp.filter_genes(anndata_embryo, min_cells=10)
sc.pp.filter_genes(anndata_brain, min_cells=10)
# Normalization
sc.pp.normalize_total(anndata_embryo, inplace=True)
sc.pp.log1p(anndata_embryo)
sc.pp.normalize_total(anndata_brain, inplace=True)
sc.pp.log1p(anndata_brain)

In [7]:
print(anndata_embryo)
print(anndata_brain)

AnnData object with n_obs × n_vars = 5913 × 20055
    obs: 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'annotation', 'Regulon - 2310011J03Rik', 'Regulon - 5730507C01Rik', 'Regulon - Alx1', 'Regulon - Alx3', 'Regulon - Alx4', 'Regulon - Ar', 'Regulon - Arid3a', 'Regulon - Arid3c', 'Regulon - Arnt2', 'Regulon - Arx', 'Regulon - Ascl1', 'Regulon - Atf1', 'Regulon - Atf4', 'Regulon - Atf5', 'Regulon - Atf6', 'Regulon - Atf7', 'Regulon - Bach1', 'Regulon - Bach2', 'Regulon - Barhl1', 'Regulon - Barx1', 'Regulon - Batf', 'Regulon - Bcl11a', 'Regulon - Bcl3', 'Regulon - Bcl6', 'Regulon - Bcl6b', 'Regulon - Bclaf1', 'Regulon - Bdp1', 'Regulon - Bhlha15', 'Regulon - Bhlhe22', 'Regulon - Bhlhe23', 'Regulon - Bhlhe41', 'Regulon - Bmyc', 'Regulon - Boll', 'Regulon - Bptf', 'Regulon - Brca1', 'Regulon - Brf1', 'Regulon - Brf2', 'Regulon - Bsx', 'Regulon - Cdx1', 'Regulon - Cdx2', 'Regulon - Cebpa', 'Regulon - Cebpz', 'Regulon - Chd1', 'Regulon - Clock', 'Re

In [1]:
import scipy.sparse

def calculate_entropy(X):

    X = X.toarray().flatten()  # Convert sparse matrix to dense array
    _, counts = np.unique(X, return_counts=True)
    probs = counts / len(X)
    entropy = -np.sum(probs * np.log2(probs + 1e-12))
    return entropy

In [None]:
def detect_spatially_variable_genes(adata, threshold):
    entropy_values = []
    for gene in adata.var_names:
        gene_expression = adata[:, gene].X
        entropy = calculate_entropy(gene_expression)
        entropy_values.append(entropy)

    normalized_entropy = (entropy_values - np.min(entropy_values)) / (np.max(entropy_values) - np.min(entropy_values))

    svg_indices = np.where(normalized_entropy > threshold)[0]
    svg_genes = [adata.var_names[i] for i in svg_indices]

    return svg_genes

In [None]:
threshold = 0.6
svg_genes_embryo = detect_spatially_variable_genes(anndata_embryo, threshold)

svg_genes_brain = detect_spatially_variable_genes(anndata_brain, threshold)

print("Spatially variable genes in Mouse Embryo 9.5 sample:", len(svg_genes_embryo))
print("Spatially variable genes in Mouse Brain sample:", len(svg_genes_brain))

Spatially variable genes in Mouse Embryo 9.5 sample: 1669
Spatially variable genes in Mouse Brain sample: 73


In [None]:
def save_genes_to_txt(genes, filename):
    with open(filename, 'w') as file:
        for gene in genes:
            file.write(gene + '\n')

embryo_filename = 'svg_genes_embryo_06.txt'
brain_filename = 'svg_genes_brain_06.txt'

save_genes_to_txt(svg_genes_embryo, embryo_filename)

save_genes_to_txt(svg_genes_brain, brain_filename)

print(f"Spatially variable genes for Mouse Embryo 9.5 saved to '{embryo_filename}'.")
print(f"Spatially variable genes for Mouse Brain saved to '{brain_filename}'.")

Spatially variable genes for Mouse Embryo 9.5 saved to 'svg_genes_embryo_06.txt'.
Spatially variable genes for Mouse Brain saved to 'svg_genes_brain_06.txt'.
