Running the fuzzy clustering algorithm to create cognitive and behavioral profiles.


In [1]:
# Imports
import os

import pandas as pd

from neurostatx.io.utils import load_df_in_any_format

In [2]:
# Setting up relevant paths to previous steps.
output_folder = "c:/Users/Rosalie Grégoire/OneDrive - SERVICE EXTERNE MAIN-D'OEUVRE DE DRUMMOND INC/Documents/UdeS/Hiver 2026/Crédits de recherche/" 
data_dir = f"{output_folder}/data/"
output_dir = f"{output_folder}/result/fuzzyclustering/"

# Create output directory if it does not exist. (avec ça, j'ai pas besoin de créer les dossiers avant)
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [None]:
# Running Clustering on raw variables using a CLI tool

!FuzzyClustering --in-dataset "{data_dir}/abcd_data_preprocessed.xlsx"\
    --out-folder "{output_dir}/ABCDFuzzyCMeans" \
    --desc-columns 22 --id-column "subjectkey" --pca --k 4 --m 2 --metric mahalanobis \
    --maxiter 5000 --error 1e-06 --cmap "bone_r" --radarplot \
    -v -f -s --processes 6

[32m2026-01-16 10:03:58[0m [35mPortableRosalie[0m [34mroot[10172][0m [1;30mINFO[0m Loading dataset(s)...
[32m2026-01-16 10:04:04[0m [35mPortableRosalie[0m [34mroot[10172][0m [1;30mINFO[0m Applying PCA dimensionality reduction.
[32m2026-01-16 10:04:04[0m [35mPortableRosalie[0m [34mroot[10172][0m [1;30mINFO[0m Bartlett's test of sphericity returned a p-value of 0.0 and Keiser-Meyer-Olkin (KMO) test returned a value of 0.6872199957929849.
[32m2026-01-16 10:04:19[0m [35mPortableRosalie[0m [34mroot[10172][0m [1;30mINFO[0m Generating dendrogram.
[32m2026-01-16 10:04:47[0m [35mPortableRosalie[0m [34mroot[10172][0m [1;30mINFO[0m Computing FCM from k=2 to k=4
[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed: 37.4min finished
[32m2026-01-16 10:42:14[0m [35mPortableRosalie[0m [34mroot[10172][0m [1;30mINFO[0m Plotting validation indicators and outputting final matrices.


In [4]:
# Projecting BANDA study using a CLI tool from neurostatx

!PredictFuzzyMembership --in-dataset "{data_dir}/banda_data_preprocessed.xlsx" \
    --out-folder "{output_dir}/BANDAProjected/" \
    --in-cntr "{output_dir}/ABCDFuzzyCMeans/CENTROIDS/clusters_centroids_4.xlsx" \
    --desc-columns 17 --id-column subjectkey --pca \
    --pca-model "{output_dir}/ABCDFuzzyCMeans/PCA/pca_model.pkl" \
    --m 2 --error 1e-06 --maxiter 5000 --metric mahalanobis --radarplot \
    --cmap "bone_r" -v -s -f

[32m2026-01-16 10:43:45[0m [35mPortableRosalie[0m [34mroot[21676][0m [1;30mINFO[0m Loading dataset(s)...
[32m2026-01-16 10:43:46[0m [35mPortableRosalie[0m [34mroot[21676][0m [1;30mINFO[0m Loading PCA model...
[32m2026-01-16 10:43:46[0m [35mPortableRosalie[0m [34mroot[21676][0m [1;30mINFO[0m Predicting membership matrix...
[32m2026-01-16 10:43:46[0m [35mPortableRosalie[0m [34mroot[21676][0m [1;30mINFO[0m Saving results...


In [5]:
# Projecting GESTE study using a CLI tool from neurostatx

!PredictFuzzyMembership --in-dataset "{data_dir}/geste_data_preprocessed.xlsx" \
    --out-folder "{output_dir}/GESTEProjected/" \
    --in-cntr "{output_dir}/ABCDFuzzyCMeans/CENTROIDS/clusters_centroids_4.xlsx" \
    --desc-columns 14 --id-column subjectkey --pca \
    --pca-model "{output_dir}/ABCDFuzzyCMeans/PCA/pca_model.pkl" \
    --m 2 --error 1e-06 --maxiter 5000 --metric mahalanobis --radarplot \
    --cmap "bone_r" -v -s -f

[32m2026-01-16 10:44:01[0m [35mPortableRosalie[0m [34mroot[26912][0m [1;30mINFO[0m Loading dataset(s)...
[32m2026-01-16 10:44:02[0m [35mPortableRosalie[0m [34mroot[26912][0m [1;30mINFO[0m Loading PCA model...
[32m2026-01-16 10:44:02[0m [35mPortableRosalie[0m [34mroot[26912][0m [1;30mINFO[0m Predicting membership matrix...
[32m2026-01-16 10:44:02[0m [35mPortableRosalie[0m [34mroot[26912][0m [1;30mINFO[0m Saving results...


Concatenating membership values from all studies together and compute a Graph Network object.

In [6]:
# load all datasets resulting from FCM analysis

abcd_fcm = load_df_in_any_format(f'{output_dir}/ABCDFuzzyCMeans/MEMBERSHIP_DF/clusters_membership_4.xlsx')
banda_fcm = load_df_in_any_format(f'{output_dir}/BANDAProjected/predicted_membership_matrix.xlsx')
geste_fcm = load_df_in_any_format(f'{output_dir}/GESTEProjected/predicted_membership_matrix.xlsx')

In [7]:
# Find difference in column names between ABCD and BANDA
abcd_banda_diff = set(abcd_fcm.columns) ^ set(banda_fcm.columns)

# Add missing columns to BANDA dataset.
for col in abcd_banda_diff:
    banda_fcm[col] = 0

# Find difference in column names between ABCD and GESTE
abcd_geste_diff = set(abcd_fcm.columns) ^ set(geste_fcm.columns)

# Add missing columns to GESTE dataset.
for col in abcd_geste_diff:
    geste_fcm[col] = 0

In [8]:
# Reorder columns to match ABCD dataset.
banda_matched = banda_fcm[abcd_fcm.columns]
geste_matched = geste_fcm[abcd_fcm.columns]

# Assert that all datasets have the same columns.
assert all(abcd_fcm.columns == banda_matched.columns), "Columns do not match between ABCD and BANDA."
assert all(abcd_fcm.columns == geste_matched.columns), "Columns do not match between ABCD and GESTE."

# Other sanity checks that the datasets still have the same number of rows.
assert len(banda_fcm) == len(banda_matched), "Number of rows in the matched dataset changed, please validate."
assert len(geste_fcm) == len(geste_matched), "Number of rows in the matched dataset changed, please validate."

# Sanity checks that random values are still the same in the datasets.
assert all(banda_fcm.loc[:, "Cluster #1"] == banda_matched.loc[:, "Cluster #1"]), "Random value in BANDA dataset changed, please validate."
assert all(banda_fcm.loc[:, "AgeMonths"] == banda_matched.loc[:, "AgeMonths"]), "Random value in BANDA dataset changed, please validate."
assert all(geste_fcm.loc[:, "Cluster #1"] == geste_matched.loc[:, "Cluster #1"]), "Random value in GESTE dataset changed, please validate."
assert all(geste_fcm.loc[:, "AgeMonths"] == geste_matched.loc[:, "AgeMonths"]), "Random value in GESTE dataset changed, please validate."

In [9]:
# Concatenate all datasets.
final_fcm = pd.concat([abcd_fcm, banda_matched, geste_matched],
                      axis = 0)

# Replace string cohort identifiers with integers. This will make handling of cohorts in the graph network object easier.
final_fcm["Cohort"] = final_fcm["Cohort"].replace({"ABCD" : 1, "BANDA" : 2, "GESTE": 3})

# Change Cohort column name to cohort.
final_fcm.rename(columns={"Cohort": "cohort"}, inplace=True)

# Save final dataset
final_fcm.to_excel(f"{output_dir}/merged_fcm_data.xlsx", index=False, header=True)

  final_fcm["Cohort"] = final_fcm["Cohort"].replace({"ABCD" : 1, "BANDA" : 2, "GESTE": 3})


Computing a Graph Network

In [13]:
# Using the merged dataset, we will used a CLI script to generate a graph network.

!ComputeGraphNetwork --in-dataset "{output_dir}/merged_fcm_data.xlsx" \
    --out-folder "{output_dir}/GraphNetwork/" --id-column "subjectkey" --desc-columns 28 \
    --layout spring --weight membership -v -f -s --import-data --plot-distribution

[32m2026-01-16 11:32:05[0m [35mPortableRosalie[0m [34mroot[10856][0m [1;30mINFO[0m Loading membership data.
[32m2026-01-16 11:32:13[0m [35mPortableRosalie[0m [34mroot[10856][0m [1;30mINFO[0m Computing graph network layout.
[32m2026-01-16 11:40:57[0m [35mPortableRosalie[0m [34mroot[10856][0m [1;30mINFO[0m Setting nodes position.
[32m2026-01-16 11:40:57[0m [35mPortableRosalie[0m [34mroot[10856][0m [1;30mINFO[0m Importing data within the .gml file.


In [14]:
# Copying and renaming the graph network file at the root of the output_dir.
# !cp comme dans In[9] du notebooks FCMeansClustering de Antho fonctionne juste sur MAC. Shutil.copy = sur windows
import shutil

shutil.copy(
    f"{output_dir}/GraphNetwork/network_graph_file.gml",
    f"{output_dir}/GraphNetwork.gml"
)

"c:/Users/Rosalie Grégoire/OneDrive - SERVICE EXTERNE MAIN-D'OEUVRE DE DRUMMOND INC/Documents/UdeS/Hiver 2026/Crédits de recherche//result/fuzzyclustering//GraphNetwork.gml"


Visualization of the Graph Network and clustering results.


In [24]:
# Visualizing the global graph network with all cohort merged, then highlighting subjects from each cohort within the global network.

!VisualizeGraphNetwork --in-graph "{output_dir}/GraphNetwork.gml" \
    --out-folder "{output_dir}/VizNetwork/" --weight membership --colormap bone_r \
    -v -s -f --title "Global clustering results" \
    --legend-title "Membership values"

[32m2026-01-16 19:18:14[0m [35mPortableRosalie[0m [34mroot[27232][0m [1;30mINFO[0m Loading graph data.
[32m2026-01-16 19:18:28[0m [35mPortableRosalie[0m [34mroot[27232][0m [1;30mINFO[0m Generating graph.


In [23]:
!pip show neurostatx


Name: NeuroStatX
Version: 0.1.0
Summary: Command-line toolbox to perform various statistical analysis on neuroscience data.
Home-page: https://github.com/gagnonanthony/NeuroStatX.git
Author: Anthony Gagnon
Author-email: anthony.gagnon7@usherbrooke.ca
License: MIT
Location: C:\Users\Rosalie Grégoire\OneDrive - SERVICE EXTERNE MAIN-D'OEUVRE DE DRUMMOND INC\Documents\UdeS\Hiver 2026\Crédits de recherche\Code\.venv\Lib\site-packages
Requires: coloredlogs, cyclopts, detect-delimiter, factor-analyzer, gdown, graphviz, ipykernel, kneed, matplotlib, networkx, numpy, openpyxl, p-tqdm, pandas, Pillow, pip, scikit-fuzzy, scikit-learn, scipy, seaborn, semopy, setuptools, strenum
Required-by: 




rendu ici

In [10]:
# Visualizing participants with a diagnosis of AD, ADHD, OCD, ODD, CD, DD, and PSYPATHO index using all cohorts.

!VisualizeGraphNetwork --in-graph "{output_dir}/GraphNetwork.gml" \
    --out-folder "{output_dir}/VizNetworkDxGlobal/" --weight memebrship --colormap bone_r \
    -v -s -f --label-name AD --label-name ADHD --label-name OCD --label-name ODD \
    --label-name CD --label-name DD --label-name PSYPATHO --title "Global clustering results" \
    --legend-title "Membership values"

[32m2026-01-16 09:28:31[0m [35mPortableRosalie[0m [34mroot[17892][0m [1;30mINFO[0m Loading graph data.
[32m2026-01-16 09:28:50[0m [35mPortableRosalie[0m [34mroot[17892][0m [1;30mINFO[0m Generating graph.
[32m2026-01-16 09:28:55[0m [35mPortableRosalie[0m [34mroot[17892][0m [1;30mINFO[0m Constructing graph(s) with custom labels.


In [11]:
# Visualizing participants with a diagnosis of AD, ADHD, OCD, ODD, CD, DD, and PSYPATHO index using only the ABCD cohort.

!VisualizeGraphNetwork --in-graph "{output_dir}/GraphNetwork.gml" \
    --out-folder "{output_dir}/VizNetworkDxABCD/" --weight memebrship --colormap bone_r \
    -v -s -f --label-name AD --label-name ADHD --label-name OCD --label-name ODD \
    --label-name CD --label-name DD --label-name PSYPATHO --title "ABCD clustering results" \
    --legend-title "Membership values" --cohort 1


[32m2026-01-16 09:50:53[0m [35mPortableRosalie[0m [34mroot[26904][0m [1;30mINFO[0m Loading graph data.
[32m2026-01-16 09:51:07[0m [35mPortableRosalie[0m [34mroot[26904][0m [1;30mINFO[0m Generating graph.
[32m2026-01-16 09:51:10[0m [35mPortableRosalie[0m [34mroot[26904][0m [1;30mINFO[0m Constructing graph(s) with custom labels.


In [12]:
# Visualizing participants with a diagnosis of AD, ADHD, OCD, ODD, CD, DD, and PSYPATHO index using only the BANDA cohort.

!VisualizeGraphNetwork --in-graph "{output_dir}/GraphNetwork.gml" \
    --out-folder "{output_dir}/VizNetworkDxBANDA/" --weight memebrship --colormap bone_r \
    -v -s -f --label-name AD --label-name ADHD --label-name OCD --label-name ODD \
    --label-name CD --label-name DD --label-name PSYPATHO --title "BANDA clustering results" \
    --legend-title "Membership values" --cohort 2

[32m2026-01-16 09:52:02[0m [35mPortableRosalie[0m [34mroot[18088][0m [1;30mINFO[0m Loading graph data.
[32m2026-01-16 09:52:14[0m [35mPortableRosalie[0m [34mroot[18088][0m [1;30mINFO[0m Generating graph.
[32m2026-01-16 09:52:16[0m [35mPortableRosalie[0m [34mroot[18088][0m [1;30mINFO[0m Constructing graph(s) with custom labels.


In [13]:
# Visualizing participants with a diagnosis of ADHD and PSYPATHO index using only the GESTE cohort.

!VisualizeGraphNetwork --in-graph "{output_dir}/GraphNetwork.gml" \
    --out-folder "{output_dir}/VizNetworkDxGESTE/" --weight memebrship --colormap bone_r \
    -v -s -f --label-name ADHD --label-name PSYPATHO --title "GESTE clustering results" \
    --legend-title "Membership values" --cohort 3

[32m2026-01-16 09:52:59[0m [35mPortableRosalie[0m [34mroot[9436][0m [1;30mINFO[0m Loading graph data.
[32m2026-01-16 09:53:10[0m [35mPortableRosalie[0m [34mroot[9436][0m [1;30mINFO[0m Generating graph.
[32m2026-01-16 09:53:15[0m [35mPortableRosalie[0m [34mroot[9436][0m [1;30mINFO[0m Constructing graph(s) with custom labels.
