In [None]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

In [None]:
df = pd.read_csv('')
df = df[(df['OLINK'] == 'No')]
data = df[["global_stress", "global_rest", "global_reserve", 'CMVD']].dropna()
data = data[data < 8].dropna()
fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=True)
sns.color_palette("Set2")

# Plot 1: Stress Flow
sns.histplot(data=data, x='global_stress', ax=axes[0], kde=False, color=sns.color_palette("Set2")[2])
axes[0].set_title("Stress Flow")

# Plot 2: Rest Flow
sns.histplot(data=data, x='global_rest', ax=axes[1], kde=False, color=sns.color_palette("Set2")[2])
axes[1].set_title("Rest Flow")

# Plot 3: Reserve Flow
sns.histplot(data=data, x='global_reserve', ax=axes[2], kde=False, color=sns.color_palette("Set2")[2])
axes[2].set_title("Reserve Flow")

plt.tight_layout()
plt.show()


In [None]:
df = pd.read_csv('')
df = df[(df['OLINK'] == 'No')]
data = df[["global_stress", "global_rest", "global_reserve"]]
data = data[data < 8].dropna().values

# Elbow method
inertias = []
k_range = range(1, 11)

for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(data)
    inertias.append(kmeans.inertia_)

# Plot Elbow Curve
plt.figure(figsize=(7, 5))
plt.plot(k_range, inertias, marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Sum of Squared Distances (Inertia)')
plt.title('Elbow Method for Optimal k')
plt.grid(True)
plt.xticks(k_range)
plt.show()

# Run Kmeans
n_clusters = 4 
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
kmeans.fit(data)

centroids = kmeans.cluster_centers_
inertia = kmeans.inertia_
labels = kmeans.labels_

df_clusters = pd.DataFrame(data, columns=["global_stress", "global_rest", "global_reserve"])
df_clusters["Cluster"] = labels

print(f"Chosen number of clusters: {n_clusters}")
print("\nCluster centroids:")
for idx, centroid in enumerate(centroids):
    print(f"Cluster {idx}: {centroid}")

print(f"\nTotal Sum of Squared Distances (Inertia): {inertia:.2f}")

print("\nNumber of individuals per cluster:")
print(df_clusters['Cluster'].value_counts().sort_index())

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

def plot_kmeans_clusters(data, labels, out_file):
    """
    Plot 2D projections of k-means clusters for 3 features.
    """
    cluster_palette = {
    '0': sns.color_palette("Set2")[0],
    '1': sns.color_palette("Set2")[1],
    '2': sns.color_palette("Set2")[2],
    '3': sns.color_palette("Set2")[3]
}
    
    cluster_plot = data.rename(columns = {'global_stress': 'Stress Flow', 'global_rest': 'Rest Flow', 'global_reserve': 'Flow Reserve'})
    cluster_plot["Cluster"] = labels.astype(str)  # convert to string for distinct color mapping

    sns.set(style="whitegrid")
    fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=False)
    title_fontsize = 20
    label_fontsize = 18


    # Plot 1: Flow Reserve vs Stress Flow
    sns.scatterplot(
        data=cluster_plot,
        x="Stress Flow",
        y="Flow Reserve",
        hue="Cluster",
        palette=cluster_palette,
        s=20,
        ax=axes[0],
        legend=False
    )
    axes[0].set_title("Flow Reserve vs Stress Flow", fontsize=title_fontsize)
    axes[0].set_xlabel("Stress Flow (ml/min/g)", fontsize=label_fontsize)
    axes[0].set_ylabel("Flow Reserve (ml/min/g)", fontsize=label_fontsize)
    axes[0].tick_params(axis='both', labelsize=16)

    # Plot 2: Rest Flow vs Stress Flow
    sns.scatterplot(
        data=cluster_plot,
        x="Rest Flow",
        y="Stress Flow",
        hue="Cluster",
        palette=cluster_palette,
        s=20,
        ax=axes[1],
        legend=False
    )
    axes[1].set_title("Stress Flow vs Rest Flow", fontsize=title_fontsize)
    axes[1].set_xlabel("Rest Flow (ml/min/g)", fontsize=label_fontsize)
    axes[1].set_ylabel("Stress Flow (ml/min/g)", fontsize=label_fontsize)
    axes[1].tick_params(axis='both', labelsize=16)


    # Plot 3: Rest Flow vs Flow Reserve
    sns.scatterplot(
        data=cluster_plot,
        x="Rest Flow",
        y="Flow Reserve",
        hue="Cluster",
        palette=cluster_palette,
        s=20,
        ax=axes[2], legend = True
    )
    axes[2].set_title("Flow Reserve vs Rest Flow", fontsize=title_fontsize)
    axes[2].set_xlabel("Rest Flow (ml/min/g)", fontsize=label_fontsize)
    axes[2].set_ylabel("Flow Reserve (ml/min/g)", fontsize=label_fontsize)
    axes[2].tick_params(axis='both', labelsize=16)
    
    handles, labels = axes[2].get_legend_handles_labels()
    fig.legend(handles, labels, loc='upper right', fontsize=20, ncol=4, title="Cluster", title_fontsize=20, bbox_to_anchor=(0, 1))

    plt.tight_layout()
    plt.savefig(out_file, dpi=300)
    plt.show()

plot_kmeans_clusters(df_clusters, df_clusters['Cluster'], out_file='PMBB_Clustering_train.png')
plot_kmeans_clusters(df_project, df_project['Assigned_Cluster'], out_file='OLINK_Clustering_project.png')

In [None]:
df_full = pd.read_csv('')

# Apply same filtering as used in training
df_project = df_full[df_full['OLINK'] == 'Yes']
X_project = df_project[["global_stress", "global_rest", "global_reserve"]]
X_project = X_project[X_project < 8].dropna()

# Project onto trained KMeans clusters
projected_labels = kmeans.predict(X_project.values)
df_project = X_project.copy()
df_project["Assigned_Cluster"] = projected_labels

# Merge with original df_full
df_full_with_labels = df_full.merge(df_project["Assigned_Cluster"], how='left', left_index=True, right_index=True)
df_project

In [None]:
prs = pd.read_csv('', index_col = 'Unnamed: 0')
df_orig = pd.read_csv('')
df_orig = df_orig[df_orig['OLINK'] == 'Yes']

df_merged = df_orig.merge(
    df_project,
    on=["global_stress", "global_rest", "global_reserve"],
    how="left",
    suffixes=('', '_cluster')
)
df_merged = df_merged.drop_duplicates().set_index('PMBB_ID')

# # Join PRS
df_merged = df_merged.join(prs, how='inner', lsuffix='_left', rsuffix='_right').drop(columns = 'Unnamed: 0')
df_merged.to_csv('')