# Implementation

In [None]:
# %load_ext autoreload
# %autoreload 2
# %reload_ext autoreload
import numpy as np
import argparse
from data_loader import load_data, preprocess
from model import Autoencoder
from trainer import train_autoencoder, get_embeddings
from cluster import cluster_embeddings, evaluate_clustering

def main(filepath, file_format, n_clusters, encoding_dim, epochs, learning_rate, batch_size, normalize, scale, log_transform, n_top_genes):

    #Load and Preprocess Data
    print("Loading and Preprocessing Data")
    data = load_data(filepath, format=file_format)
    processed_data, scaler = preprocess(data, normalize, scale, log_transform, n_top_genes)
    input_dim = processed_data.shape[1]

    #Define and Train Model
    print("Defining and Training Model")
    autoencoder = Autoencoder(input_dim, encoding_dim)
    trained_model = train_autoencoder(autoencoder, processed_data, epochs, batch_size, learning_rate)

    #Get Embeddings
    print("Obtaining Embeddings")
    embeddings = get_embeddings(trained_model, processed_data)

    #Clustering
    print("Clustering")
    cluster_labels = cluster_embeddings(embeddings, n_clusters)

    #Evaluation(if true labels are available)
    true_labels = np.random.randint(0, n_clusters, embeddings.shape[0])  # Placeholder
    evaluation_results = evaluate_clustering(true_labels, cluster_labels)
    print(f"Clustering Evaluation: ARI={evaluation_results['ARI']:.4f}, NMI={evaluation_results['NMI']:.4f}")
    return cluster_labels, embeddings, scaler

#manually set parameters for testing
if __name__ == "__main__":
    filepath = 'scDeepClustering_Sample_Data/mouse_bladder_cell_select_2100.h5'
    file_format = 'h5'
    n_clusters = 3
    encoding_dim = 32
    epochs = 100
    learning_rate = 0.001
    batch_size = 32
    normalize = True
    scale = True
    log_transform = True
    n_top_genes = 2000

    main(filepath, file_format, n_clusters, encoding_dim, epochs, learning_rate, batch_size, normalize, scale, log_transform, n_top_genes)

: 