In [1]:
import pandas as pd

import warnings
warnings.filterwarnings("ignore")
from importlib import reload

# this ensures that I can update the class without losing my variables in my notebook
import xenium_cluster
reload(xenium_cluster)
from xenium_cluster import XeniumCluster
from utils.metrics import *


In [2]:
# Path to your .gz file
file_path = 'data/hBreast/transcripts.csv.gz'

# Read the gzipped CSV file into a DataFrame
df_transcripts = pd.read_csv(file_path, compression='gzip')

In [15]:
def run_experiment(data, dataset_name: str, current_spot_size: int, third_dim: bool, resolutions: list):
    
    clustering = XeniumCluster(data=data, dataset_name=dataset_name)
    clustering.set_spot_size(current_spot_size)
    clustering.create_spot_data(third_dim=third_dim, save_data=True)

    print(f"The size of the spot data is {clustering.xenium_spot_data.shape}")

    clustering.normalize_counts(clustering.xenium_spot_data)
    clustering.generate_neighborhood_graph(clustering.xenium_spot_data, plot_pcas=False)

    Leiden_cluster = clustering.Leiden(clustering.xenium_spot_data, resolutions, embedding="umap", save_plot=True)
    Louvain_cluster = clustering.Louvain(clustering.xenium_spot_data, resolutions, embedding="umap", save_plot=True)
    hierarchical_cluster = "Not yet available."
    # hierarchical_cluster = clustering.Hierarchical(clustering.xenium_spot_data, embedding="umap", save_plot=True)

    return Leiden_cluster, Louvain_cluster, hierarchical_cluster

In [16]:
cluster_dict = {}

resolutions = [0.05, 1, 2, 0.5]

for spot_size in [100, 75, 50, 25]:
    for third_dim in [False, True]:
        Leiden_cluster, Louvain_cluster, hierarchical_cluster = run_experiment(df_transcripts, "hBreast", spot_size, third_dim, resolutions)
        cluster_dict["Leiden"] = {spot_size: {third_dim: Leiden_cluster}}
        cluster_dict["Louvain"] = {spot_size: {third_dim: Louvain_cluster}}
        cluster_dict["hierarchical"] = {spot_size: {third_dim: hierarchical_cluster}}
        print(f"Cluster with spot size {(spot_size, third_dim)} completed.")

The size of the spot data is (7312, 545)
Cluster with resolutions 0.05 0       2
1       2
2       2
3       2
4       2
       ..
7307    2
7308    2
7309    2
7310    2
7311    2
Name: leiden_0.05, Length: 7312, dtype: category
Categories (3, object): ['0', '1', '2']
Cluster with resolutions 1 0       17
1       17
2       17
3       17
4       17
        ..
7307    10
7308    10
7309    10
7310    10
7311    10
Name: leiden_1, Length: 7312, dtype: category
Categories (18, object): ['0', '1', '2', '3', ..., '14', '15', '16', '17']
Cluster with resolutions 2 0       30
1       30
2       30
3       30
4       30
        ..
7307     9
7308     9
7309     9
7310     9
7311     9
Name: leiden_2, Length: 7312, dtype: category
Categories (31, object): ['0', '1', '2', '3', ..., '27', '28', '29', '30']
Cluster with resolutions 0.5 0       10
1       10
2       10
3       10
4       10
        ..
7307     4
7308     4
7309     4
7310     4
7311     4
Name: leiden_0.5, Length: 7312, dtype: cat

In [None]:
NMI(cluster_dict["Leiden"][100][True], cluster_dict["Leiden"][75][True])

NameError: name 'NMI' is not defined