In [None]:
import pandas as pd
import json

import warnings 
warnings.filterwarnings("ignore")
from importlib import reload

# this ensures that I can update the class without losing my variables in my notebook
import xenium_cluster
reload(xenium_cluster)
from xenium_cluster import XeniumCluster
from utils.metrics import *

In [None]:
# Path to your .gz file
file_path = 'data/hBreast/transcripts.csv.gz'

# Read the gzipped CSV file into a DataFrame
df_transcripts = pd.read_csv(file_path, compression='gzip')

In [None]:
def run_experiment(data, dataset_name: str, current_spot_size: int, third_dim: bool, resolutions: list, n_clusters=3):
    
    clustering = XeniumCluster(data=data, dataset_name=dataset_name)
    clustering.set_spot_size(current_spot_size)
    clustering.create_spot_data(third_dim=third_dim, save_data=True)

    print(f"The size of the spot data is {clustering.xenium_spot_data.shape}")

    clustering.normalize_counts(clustering.xenium_spot_data)
    clustering.generate_neighborhood_graph(clustering.xenium_spot_data, plot_pcas=False)

    hierarchical_cluster = clustering.Hierarchical(clustering.xenium_spot_data, embedding="umap", save_plot=True, num_clusters=n_clusters, include_spatial=True)
    hierarchical_cluster_no_spatial = clustering.Hierarchical(clustering.xenium_spot_data, embedding="umap", save_plot=True, num_clusters=n_clusters, include_spatial=False)
    k_means_cluster = clustering.KMeans(clustering.xenium_spot_data, save_plot=True, K=n_clusters)
    k_means_cluster_no_spatial = clustering.KMeans(clustering.xenium_spot_data, save_plot=True, K=n_clusters, include_spatial=False)
    Leiden_cluster = clustering.Leiden(clustering.xenium_spot_data, resolutions, embedding="umap", save_plot=True)
    Louvain_cluster = clustering.Louvain(clustering.xenium_spot_data, resolutions, embedding="umap", save_plot=True)

    return Leiden_cluster, Louvain_cluster, hierarchical_cluster, hierarchical_cluster_no_spatial, k_means_cluster, k_means_cluster_no_spatial

In [None]:
cluster_dict = {"Leiden": {}, "Louvain": {}, "Hierarchical": {}, "K-Means": {}}
cluster_results_filepath = "results/clusters_w_plots_3_26.json"

In [None]:
resolutions = [0.6]
for spot_size in [75]:
    for third_dim in [False, True]:
        for K in [6,10]:
            Leiden_cluster, Louvain_cluster, hierarchical_cluster, hierarchical_cluster_no_spatial, k_means_cluster, k_means_cluster_no_spatial = run_experiment(df_transcripts, "hBreast", spot_size, third_dim, resolutions, n_clusters=K)

            # Leiden
            if "Leiden" not in cluster_dict:
                cluster_dict["Leiden"] = {}
            if spot_size not in cluster_dict["Leiden"]:
                cluster_dict["Leiden"][spot_size] = {}
            cluster_dict["Leiden"][spot_size][third_dim] = {res: clusters.tolist() for res, clusters in Leiden_cluster.items()}

            # Louvain
            if "Louvain" not in cluster_dict:
                cluster_dict["Louvain"] = {}
            if spot_size not in cluster_dict["Louvain"]:
                cluster_dict["Louvain"][spot_size] = {}
            cluster_dict["Louvain"][spot_size][third_dim] = {res: clusters.tolist() for res, clusters in Louvain_cluster.items()}

            # Hierarchical
            if "Hierarchical" not in cluster_dict:
                cluster_dict["Hierarchical"] = {}
            if spot_size not in cluster_dict["Hierarchical"]:
                cluster_dict["Hierarchical"][spot_size] = {}
            cluster_dict["Hierarchical"][spot_size][third_dim] = {True: hierarchical_cluster.tolist()}

            # Hierarchical
            if "Hierarchical" not in cluster_dict:
                cluster_dict["Hierarchical"] = {}
            if spot_size not in cluster_dict["Hierarchical"]:
                cluster_dict["Hierarchical"][spot_size] = {}
            cluster_dict["Hierarchical"][spot_size][third_dim] = {False: hierarchical_cluster_no_spatial.tolist()}

            # K-Means Spatial
            if "K-Means" not in cluster_dict:
                cluster_dict["K-Means"] = {}
            if spot_size not in cluster_dict["K-Means"]:
                cluster_dict["K-Means"][spot_size] = {}
            cluster_dict["K-Means"][spot_size][third_dim] = {True: k_means_cluster.tolist()}

            # K-Means No Spatial
            if "K-Means" not in cluster_dict:
                cluster_dict["K-Means"] = {}
            if spot_size not in cluster_dict["K-Means"]:
                cluster_dict["K-Means"][spot_size] = {}
            cluster_dict["K-Means"][spot_size][third_dim] = {False: k_means_cluster_no_spatial.tolist()}

            print(f"Cluster with spot size {(spot_size, third_dim, K)} completed.")
            with open(cluster_results_filepath, "w") as f:
                json.dump(cluster_dict, f, indent=4)