In [None]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from minisom import MiniSom

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

In [None]:
adata = sc.datasets.pbmc3k()
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

X = adata.X.toarray() if hasattr(adata.X, 'toarray') else adata.X
X = StandardScaler().fit_transform(X)  

In [None]:
input_dim = X.shape[1]
encoding_dim = 64

input_layer = Input(shape=(input_dim,))
x = Dense(512, activation='relu', kernel_regularizer=l2(1e-5))(input_layer)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
encoded = Dense(encoding_dim, activation='relu')(x)
x = Dense(512, activation='relu')(encoded)
decoded = Dense(input_dim, activation='linear')(x)

autoencoder = Model(inputs=input_layer, outputs=decoded)
encoder = Model(inputs=input_layer, outputs=encoded)

autoencoder.compile(optimizer=Adam(1e-3), loss='mse')
autoencoder.summary()

In [None]:
autoencoder.fit(X, X, epochs=50, batch_size=32, shuffle=True, verbose=1)

In [None]:
latent_X = encoder.predict(X)

In [None]:
som_size = 4  # 4x4 SOM grid
som = MiniSom(x=som_size, y=som_size, input_len=encoding_dim, sigma=1.0, learning_rate=0.5)
som.random_weights_init(latent_X)
som.train_random(latent_X, 1000, verbose=True)

In [None]:
win_map = np.array([som.winner(x) for x in latent_X])
clusters = [f"{i}-{j}" for i, j in win_map]  # Convert (i,j) to string label
adata.obs['som_cluster'] = clusters





In [None]:
# Step 6: Visualization
sc.pp.neighbors(adata, use_rep='X')
sc.tl.umap(adata)
sc.pl.umap(adata, color=['som_cluster'], title="SOM Clustering on Autoencoder Latent Space")

In [None]:
# Step 7: Evaluation (Optional - needs true labels or qualitative judgment)
print("Silhouette Score:", silhouette_score(latent_X, clusters))