In [None]:
import numpy as np
import pandas as pd
import os
import NaiveDE
import SpatialDE
import matplotlib.pyplot as plt
import ipywidgets as w
w.IntSlider()

# cpu usage and time statistics
import psutil
import time
import platform

Get platform information

In [None]:
# ---- System info ----
cpu_model = platform.processor()
cpu_cores = psutil.cpu_count(logical=False)
cpu_threads = psutil.cpu_count(logical=True)

total_ram = psutil.virtual_memory().total / (1024**3)  # GB

print("CPU model:", cpu_model)
print("CPU cores:", cpu_cores)
print("CPU threads:", cpu_threads)
print("Total RAM: {:.2f} GB".format(total_ram))

# ---------------- SAVE TO CSV -----------------
csv_file = os.path.join(output_dir, "system_info.csv")

with open(csv_file, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["CPU_model", "CPU_cores", "CPU_threads", "Total_RAM_GB"])
    writer.writerow([cpu_model, cpu_cores, cpu_threads, round(total_ram, 2)])

print(f"System info saved to {csv_file}")

Perform SpatialDE test on data in input files.

expression_csv : A CSV file with expression valies. Columns are genes,
and Rows are samples

coordinates_csv : A CSV file with sample coordinates. Each row is a sample,
the columns with coordinates must be named 'x' and 'y'. For other formats
(e.g. 1d or 3d queries), it is recommended to write a custom Python
script to do the analysis.

output_file : P-vaues and other relevant values for each gene
will be stored in this file, in CSV format.

In [None]:
# Edit these as needed
expression_csv = r"data_after_qc\SN048_A121573_Rep2\count.not_normalized.csv"
coordinate_csv = r"data_after_qc\SN048_A121573_Rep2\idx.not_normalized.csv"

# Base folder to save all figures
base_dir = r"data_after_qc\SN048_A121573_Rep2\results_spatialDE"
# Create folder if it doesn't exist
os.makedirs(base_dir, exist_ok=True)
results_csv = os.path.join(base_dir, "results.csv")

# model_selection_csv = r"data_after_qc\SN048_A121573_Rep2\results_spatialDE\model_selection.csv"
# alfa_m = 0.05

In [None]:
df = pd.read_csv(expression_csv, index_col=0)

df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes

sample_info = pd.read_csv(coordinate_csv, index_col=0)

sample_info['total_counts'] = df.sum(1)
sample_info = sample_info.query('total_counts > 5')  # Remove empty features

df = df.loc[sample_info.index]   
X = sample_info[['x', 'y']]


# Convert data to log-scale, and account for depth
dfm = NaiveDE.stabilize(df.T).T
res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T

# Perform Spatial DE test with default settings
results = SpatialDE.run(X, res)

# Save results and annotation in files for interactive plotting and interpretation
results.to_csv(results_csv)

Optionally, we can generate a model-selection file for genes that have an adjusted p-value greater than alpha.


In [None]:
de_results = results[(results.qval < alfa_m)].copy()
ms_results = SpatialDE.model_search(X, res, de_results)

ms_results.to_csv(model_selection_csv)

# Visualisation
Volkano plot - the relation between significance and effect size. Efect size is the fraction of variance explained by spatial variation.

In [None]:
# Edit these as needed
alfa_v = 0.05
fsv_thr = 0.8
qval_thr = 0.01

In [None]:
x = results['FSV']      # Fraction of spatial variance (0–1)
q = results['qval']     # q-values (0–1)

# Use biological maximum (1.0) instead of dataset normalization
x_color = np.clip(x, 0, 1)   # red channel based on FSV
q_color = np.clip(q, 0, 1)   # blue channel based on q-value

# Combine channels into RGB (R = FSV, G = 0, B = q-value)
colors = np.stack([x_color, np.zeros_like(x_color), q_color], axis=1)

plt.figure(figsize=(6,5))
plt.title("Volcano plot for SpatialDE")   # plot title

plt.yscale('log')  # log scale for q-values
plt.scatter(x, q, c=colors)

# Threshold line for significance level
plt.axhline(alfa_v, c='black', lw=1, ls='--')

plt.gca().invert_yaxis()  # invert y-axis to match volcano plot style

plt.xlabel('Fraction spatial variance')
plt.ylabel('Adj. P-value')

plt.legend()
plt.tight_layout()

# Save volcano plot
volcano_file = os.path.join(base_dir, "volcano_plot.png")
plt.savefig(volcano_file, dpi=300)
plt.show()
plt.close()

Then there are choosen the best genes.

In [None]:
plt.figure(figsize=(7,6))
plt.title("Volcano plot with the best genes for SpatialDE")   # plot title

plt.yscale('log')
plt.scatter(x, q, c=colors, label='All points')

# ---- Threshold lines ----
plt.axhline(alfa_v, c='black', lw=1, ls='--', label=f'Y threshold {alfa_v}')
plt.axhline(qval_thr, c='red',   lw=1, ls='--', label=f'Y threshold {qval_thr}')

plt.axvline(fsv_thr, c='black', lw=1, ls='--', label=f'X threshold {fsv_thr}')

# ---- Detect points in the strict quadrant ----
mask_strict = (x > fsv_thr) & (q < qval_thr)  # right upper quadrant (after invert)

# Plot strict points using another marker
plt.scatter(
    x[mask_strict], 
    q[mask_strict], 
    c='yellow', 
    marker='s', 
    edgecolor='black',
    s=80,
    label='Strict quadrant points'
)

plt.gca().invert_yaxis()

plt.xlabel('Fraction spatial variance')
plt.ylabel('Adj. P-value')

plt.legend()
plt.tight_layout()

# Save volcano plot
volcano_file = os.path.join(base_dir, "volcano_plot_strict_quadrant.png")
plt.savefig(volcano_file, dpi=300)
plt.show()
plt.close()

Graph of the tissue with the color correspond to expression level of top the genes, that was marked as the best priviously.

In [None]:
# Get gene names that pass the strict threshold
strict_genes = results.loc[mask_strict, 'g']

# Create maps for gene → l and gene → qval (using strict genes)
l_map = results.set_index('g')['l'].to_dict()    
qval_map = results.set_index('g')['qval'].to_dict()

# Dropdown will contain only genes passing strict threshold
strict_gene_list = list(strict_genes)

# Function to plot a selected gene
def plot_gene(gene):
    l_value = l_map[gene]
    qval_value = qval_map[gene]
    
    title = f"{gene} | FSV = {l_value:.3f} | qval = {qval_value:.2e}"
    
    plt.figure(figsize=(10,3))
    plt.scatter(sample_info['x'], sample_info['y'], c=res[gene], cmap='viridis')
    plt.title(title)
    plt.axis('equal')
    plt.colorbar(label='Expression')
    plt.tight_layout()
    
    # Save figure automatically
    file_path = os.path.join(base_dir, f"{gene}.png")
    plt.savefig(file_path, dpi=300)
    
    # Display figure
    plt.show()
    plt.close()

# ---- Interactive dropdown ----
w.interact(
    plot_gene,
    gene=w.Dropdown(options=strict_gene_list, description="Gene:")
)