In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
from scipy.spatial.distance import pdist, squareform
from skbio.diversity.alpha import shannon, simpson
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score, davies_bouldin_score
from sklearn.decomposition import PCA

def load_data(otu_file, taxonomy_file, metadata_file):
    otu = pd.read_csv(otu_file, index_col=0)
    taxonomy = pd.read_csv(taxonomy_file, index_col=0)
    metadata = pd.read_csv(metadata_file, index_col=0)
    return otu, taxonomy, metadata

def filter_top_species(otu, top_n=30):
    otu_top = otu.apply(lambda x: x.nlargest(top_n).fillna(0), axis=1)
    return otu_top

def compute_diversity_indices(otu):
    diversity_df = pd.DataFrame({
        'Shannon_Index': otu.apply(shannon, axis=1),
        'Simpson_Index': otu.apply(simpson, axis=1)
    })
    return diversity_df

def apply_dbscan_clustering(data, metric='braycurtis', eps=0.5, min_samples=5):
    distance_matrix = squareform(pdist(data, metric=metric))
    clustering = DBSCAN(eps=eps, min_samples=min_samples, metric='precomputed').fit(distance_matrix)
    return clustering.labels_

def evaluate_clusters(data, labels):
    if len(set(labels)) > 1 and -1 not in set(labels):  # Ensure valid clusters exist
        silhouette = silhouette_score(data, labels, metric='euclidean')
        db_index = davies_bouldin_score(data, labels)
        return {'Silhouette Score': silhouette, 'Davies-Bouldin Index': db_index}
    return {'Silhouette Score': None, 'Davies-Bouldin Index': None}

def visualize_clusters(data, labels, metadata):
    pca = PCA(n_components=2)
    pca_result = pca.fit_transform(StandardScaler().fit_transform(data))

    df = pd.DataFrame(pca_result, columns=['PC1', 'PC2'])
    df['Cluster'] = labels
    df = df.merge(metadata, left_index=True, right_index=True)

    plt.figure(figsize=(8, 6))
    sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cluster', style='Geography', alpha=0.7)
    plt.title('DBSCAN Clustering of Microbiome Data')
    plt.show()

