In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler, KBinsDiscretizer

from scipy.stats import sem

In [None]:
### LOAD ALL DATAFRAMES (JUST MMSE USED HERE FOR EXAMPLE) ###

mmse = pd.read_csv()
DT = pd.read_csv() # Dataframe with dementia diagnoses for each participant

In [None]:
mmse.rename(columns={'MMSE_ROC_12':'MMSE ROC'}, inplace=True)

In [None]:
kmean_discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='kmeans', random_state=42)

mmse['kmean_bins'] = kmean_discretizer.fit_transform(mmse[['MMSE ROC']]).astype(int)
print(mmse['kmean_bins'].value_counts())
print(kmean_discretizer.bin_edges_)  

In [None]:
for n in range(3):
    mean_value = round(mmse[mmse['kmean_bins'] == n]['MMSE ROC'].mean(), 1)
    std_dev = round(mmse[mmse['kmean_bins'] == n]['MMSE ROC'].std(), 1)
    sem_value = round(sem(mmse[mmse['kmean_bins'] == n]['MMSE ROC']),1)
    ci = 1.96*sem_value
    print(f"Cluster {n}: {mean_value} ± {std_dev}")

In [None]:
cluster_map = {0: 'Steep Decline', 1:'Moderate Decline', 2:'Slow Decline'}

mmse['kmean_bins'] = mmse['kmean_bins'].map(cluster_map)

mmse.rename(columns={'kmean_bins':'ROC Group'}, inplace=True)

In [None]:
mmse_scheme = ['#284080', '#7397CB', '#BFC9E4'][::-1]

In [None]:
clusters = ['Slow Decline','Moderate Decline','Steep Decline']

summary_data = {
    "n":{},
    "Slope":{},
    "Age": {},
    "Female (%)": {},
    "Diagnosis: AD": {},
    "Diagnosis: MCI": {},
    "Baseline MMSE (min-max)": {},
    "Baseline ADAS-Cog (min-max)": {},
    "Baseline BADL (min-max)": {}
}

summary_data["n"]["Overall"] = len(mmse)
summary_data["Slope"]["Overall"] = f"{round(mmse['MMSE ROC'].mean(),1)} ± {round(mmse['MMSE ROC'].std(),1)}"
summary_data["Age"]["Overall"] = f"{round(mmse['Age'].mean())} ± {round(mmse['Age'].std())}"
summary_data["Female (%)"]["Overall"] = f"{len(mmse[mmse['Gender']==2])} ({round(len(mmse[mmse['Gender']==2])/len(mmse)*100)}\%)"
summary_data["Diagnosis: AD"]["Overall"] = len(mmse[mmse['Dementia_type']=='AD'])
summary_data["Diagnosis: MCI"]["Overall"] = len(mmse[mmse['Dementia_type']=='MCI'])
summary_data["Baseline MMSE (min-max)"]["Overall"] = f"{round(mmse['Baseline MMSE'].mean(),1)} ({round(mmse['Baseline MMSE'].min())} - {round(mmse['Baseline MMSE'].max())})"
summary_data["Baseline ADAS-Cog (min-max)"]["Overall"] = f"{round(90-mmse['Baseline ADAS-Cog'].mean(),1)} ({round(90-mmse['Baseline ADAS-Cog'].max())} - {90-round(mmse['Baseline ADAS-Cog'].min())})"
summary_data["Baseline BADL (min-max)"]["Overall"] = f"{round(60-mmse['Baseline BADL'].mean(),1)} ({round(60-mmse['Baseline BADL'].max())} - {60-round(mmse['Baseline BADL'].min())})"

for cluster in clusters:
    mmse_cluster = mmse[mmse['ROC Group'] == cluster]
    mmse_badl_cluster = mmse[mmse['ROC Group'] == cluster]

    summary_data["n"][cluster] = len(mmse_cluster)
    summary_data["Slope"][cluster] = f"{round(mmse_cluster['MMSE ROC'].mean(),1)} ± {round(mmse_cluster['MMSE ROC'].std(),1)}"
    summary_data["Age"][cluster] = f"{round(mmse_cluster['Age'].mean())} ± {round(mmse_cluster['Age'].std())}"
    summary_data["Female (%)"][cluster] = f"{len(mmse_cluster[mmse_cluster['Gender']==2])} ({round(len(mmse_cluster[mmse_cluster['Gender']==2])/len(mmse_cluster)*100)}\%)"
    summary_data["Diagnosis: AD"][cluster] = len(mmse_cluster[mmse_cluster['Dementia_type']=='AD'])
    summary_data["Diagnosis: MCI"][cluster] = len(mmse_cluster[mmse_cluster['Dementia_type']=='MCI'])

    mmse_mean = round(mmse_cluster['Baseline MMSE'].mean(), 1)
    mmse_min = round(mmse_cluster['Baseline MMSE'].min())
    mmse_max = round(mmse_cluster['Baseline MMSE'].max())
    summary_data["Baseline MMSE (min-max)"][cluster] = f"{mmse_mean} ({mmse_min} - {mmse_max})"

    adas_mean = round(90-mmse_cluster['Baseline ADAS-Cog'].mean(), 1)
    adas_min = round(90-mmse_cluster['Baseline ADAS-Cog'].max())
    adas_max = round(90-mmse_cluster['Baseline ADAS-Cog'].min())
    summary_data["Baseline ADAS-Cog (min-max)"][cluster] = f"{adas_mean} ({adas_min} - {adas_max})"

    badl_mean = round(60-mmse_badl_cluster['Baseline BADL'].mean(), 1)
    badl_min = round(60-mmse_badl_cluster['Baseline BADL'].max())
    badl_max = round(60-mmse_badl_cluster['Baseline BADL'].min())
    summary_data["Baseline BADL (min-max)"][cluster] = f"{badl_mean} ({badl_min} - {badl_max})"

summary_df = pd.DataFrame(summary_data).T
summary_df

In [None]:
print(summary_df.to_latex(index=False))

In [None]:
scaler = MinMaxScaler()
mmse_to_scale = mmse.drop(columns=['Dyad','ROC Group','Dementia_type'])
mmse_scaled = pd.DataFrame(scaler.fit_transform(mmse_to_scale), columns=mmse_to_scale.columns)
mmse_scaled['ROC Group'] = mmse['ROC Group'].values
mmse_scaled

In [None]:
mmse_scaled.rename(columns={'Baseline MMSE':'MMSE Total', 'Baseline ADAS-Cog':'ADAS-Cog Total',
                            'Naming (Baseline MMSE)':'Naming (MMSE)', 'Naming (Baseline ADAS)':'Naming (ADAS)',
                            'Commands (Baseline MMSE)':'Commands (MMSE)', 'Commands (Baseline ADAS)':'Commands (ADAS)'}, inplace=True)
mmse_scaled.columns = mmse_scaled.columns.str.replace(' \(Baseline MMSE\)', '', regex=True)
mmse_scaled.columns = mmse_scaled.columns.str.replace(' \(Baseline ADAS\)', '', regex=True)
mmse_scaled.columns = mmse_scaled.columns.str.replace(' \(Baseline BADL\)', '', regex=True)

In [None]:
hue_order = ['Slow Decline', 'Moderate Decline', 'Steep Decline']

mmse_scaled['ROC Group'] = pd.Categorical(mmse_scaled['ROC Group'], categories=hue_order, ordered=True)
mmse_scaled = mmse_scaled.sort_values('ROC Group')

In [None]:
labels = ['MMSE Total', 'Year', 'Season', 'Month', 'Date', 'Day', 'Country', 'County', 'City',
       'Building', 'Floor', 'Registration', 'World', 'Recall', 'Naming (MMSE)',
       'Repeat', 'Closeyoureyes', 'Writesentence', 'Copypentagons',
       'Commands (MMSE)']  
plot_labels = ['MMSE Total', 'Year', 'Season', 'Month', 'Date', 'Day', 'Country', 'County', 'City',
       'Building', 'Floor', 'Registration', 'World', 'Recall', 'Naming',
       'Repeat', 'Close your eyes', 'Write sentence', 'Copy pentagons',
       'Commands']
num_vars = len(labels)


angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
angles += angles[:1]  # Complete the loop

fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))

colours = mmse_scheme

for i, (group_name, group_data) in enumerate(mmse_scaled.groupby('ROC Group')):
    values = group_data[labels].mean().tolist()
    values += values[:1]
    
    ax.plot(angles, values, label=f"{group_name}", color=colours[i % len(colours)], linewidth=2)
    ax.fill(angles, values, color=colours[i % len(colours)], alpha=0.25)

ax.set_xticks(angles[:-1])
ax.set_xticklabels([])

specific_alignments = {
    0: 'left',  
    1: 'left', 
    2: 'left', 
    3: 'left',
    4: 'left',  
    5: 'center',  
    6: 'right', 
    7: 'right',  
    8: 'right', 
    9: 'right',
    10: 'right',
    11: 'right',
    12: 'right',  
    13: 'right',
    14: 'right',
    15: 'center',
    16: 'left',
    17: 'left',
    18: 'left',
    19: 'left',
    20: 'left',
}

for idx, (angle, label) in enumerate(zip(angles[:-1], plot_labels)):  
    alignment = specific_alignments.get(idx, 'center')

    ax.text(
        x=angle, 
        y=1.1,
        s=label,
        fontsize=20,
        horizontalalignment=alignment,
        verticalalignment='center',
        transform=ax.get_xaxis_transform(),
    )

plt.title("Relative MMSE component scores", fontsize=25, pad=45, y=1.05)

plt.show()

Repeat for ADAS-Cog and BADL subquestions and then repeat this entire pipeline entirely for BADL clustering and profiling