In [4]:
#!/usr/bin/env python3
import os
import glob
import json
import sys

root_dir = "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL/SOr_left_UKB40_32_16"

# Match: <root>/<config>/<date>/<run>/troiani_custom_embeddings/<region>/test_values.json
pattern = os.path.join(
    root_dir,
    "*",                      # config
    "*",                      # date
    "*",                      # run
    "troiani_custom_embeddings",
    "*",                      # region
    "test_values.json"
)

json_files = sorted(glob.glob(pattern))
if not json_files:
    print(f"No JSON files found under {root_dir}!")
    print("Pattern used was:", pattern)
    sys.exit(1)

# Header
print(f"\nChecking folder: {root_dir}")
print("="*120)
print(f"{'Config':<30} {'Region':<30} {'cv_score':>10} {'cv_std':>10} {'Path'}")
print("-"*120)

for jp in json_files:
    # relpath from root_dir:
    rel = os.path.relpath(jp, root_dir)
    parts = rel.split(os.sep)
    # parts == [ config, date, run, "troiani_custom_embeddings", region, "test_values.json" ]
    config = parts[0]
    region = parts[4]

    # load JSON
    try:
        with open(jp) as f:
            data = json.load(f)
        cv_score = float(data.get("cv_score", float("nan")))
        cv_std   = float(data.get("cv_std",   float("nan")))
    except Exception as e:
        print(f"Error reading {jp}: {e}")
        cv_score = cv_std = float("nan")

    print(f"{config:<30} {region:<30} {cv_score:10.4f} {cv_std:10.4f} {jp}")



Checking folder: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL/SOr_left_UKB40_32_16
Config                         Region                           cv_score     cv_std Path
------------------------------------------------------------------------------------------------------------------------
sigma_0.2_factor_0.2_FACTOR_batch_16 Left_OFC                           0.7881     0.0126 /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL/SOr_left_UKB40_32_16/sigma_0.2_factor_0.2_FACTOR_batch_16/2025-09-16/20-03-30_0/troiani_custom_embeddings/Left_OFC/test_values.json
sigma_0.4_factor_0.4_FACTOR_batch_16 Left_OFC                           0.7849     0.0176 /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL/SOr_left_UKB40_32_16/sigma_0.4_factor_0.4_FACTOR_batch_16/2025-09-16/20-03-33_0/troiani_custom_embeddings/Left_OFC/test_values.json
sigma_0.6_factor_0.6_FACTOR_batch_16 Left_OFC                           0.7819     0.0073 /neu

In [4]:
import os
import json

root_dir = "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL/SC-sylv_left_UKB40_32_16"

print(f"\nChecking folder: {root_dir}")
print("="*100)
print(f"{'Config':<35} {'Run':<15} {'Avg Test R2':>10}")
print("-"*100)

for config_name in sorted(os.listdir(root_dir)):
    config_path = os.path.join(root_dir, config_name)
    if not os.path.isdir(config_path):
        continue

    # -- go into each date folder under this config --
    for date_name in sorted(os.listdir(config_path)):
        date_path = os.path.join(config_path, date_name)
        if not os.path.isdir(date_path):
            continue

        # -- go into each run folder under that date --
        for run_name in sorted(os.listdir(date_path)):
            run_path = os.path.join(date_path, run_name)
            if not os.path.isdir(run_path):
                continue

            # finally locate the isomap embeddings directory
            isomap_dir = os.path.join(run_path, "hcp_isomap_custom_embeddings")
            if not os.path.isdir(isomap_dir):
                continue

            # gather R2 over dims 1–6
            r2_values = []
            for dim in range(1, 7):
                json_path = os.path.join(
                    isomap_dir,
                    f"Isomap_central_left_dim{dim}",
                    "test_values.json"
                )
                if os.path.isfile(json_path):
                    try:
                        with open(json_path, "r") as f:
                            data = json.load(f)
                        r2 = data.get("test_r2", None)
                        if r2 is not None:
                            r2_values.append(r2)
                    except Exception as e:
                        print(f"Error reading {json_path}: {e}")

            # compute and print
            if r2_values:
                avg_r2 = sum(r2_values) / len(r2_values)
                print(f"{config_name:<35} {run_name:<15} {avg_r2:10.4f}")
            else:
                print(f"{config_name:<35} {run_name:<15} {'No valid R2':>10}")

print("="*100)



Checking folder: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL/SC-sylv_left_UKB40_32_16
Config                              Run             Avg Test R2
----------------------------------------------------------------------------------------------------
sigma_0.2_factor_0.2_FACTOR_batch_16 19-37-44_0          0.5319
sigma_0.4_factor_0.4_FACTOR_batch_16 19-40-08_0          0.5170
sigma_0.6_factor_0.6_FACTOR_batch_16 19-55-38_0          0.5226
sigma_0.8_factor_0.8_FACTOR_batch_16 19-55-37_0          0.5243
sigma_1.0_factor_1.0_FACTOR_batch_16 19-57-56_0          0.5265
sigma_1.2_factor_1.2_FACTOR_batch_16 19-59-31_0          0.5053
sigma_1.4_factor_1.4_FACTOR_batch_16 20-00-06_0          0.5217


In [4]:
import pandas as pd

# Path to the input file
input_file = "/neurospin/dico/data/deep_folding/current/datasets/UkBioBank40/OCCIPITAL_left_BT_seed_1.csv_clusters.csv"

# Load the CSV file
df = pd.read_csv(input_file)

# Check if 'cluster' column exists
if "cluster" not in df.columns:
    raise ValueError("The column 'cluster' was not found in the CSV file.")

# Sort by the 'cluster' column
df_sorted = df.sort_values(by="cluster")

# Save the sorted DataFrame to a new file
output_file = input_file.replace(".csv", "_sorted.csv")
df_sorted.to_csv(output_file, index=False)

print(f"Sorted file saved to: {output_file}")


Sorted file saved to: /neurospin/dico/data/deep_folding/current/datasets/UkBioBank40/OCCIPITAL_left_BT_seed_1_sorted.csv_clusters_sorted.csv


In [None]:
#!/usr/bin/env python3
import os
import shutil

BASE = "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/volume"
TARGET_DIR = "14-03-45_0"

for root, dirs, files in os.walk(BASE):
    if TARGET_DIR in dirs:
        path = os.path.join(root, TARGET_DIR)
        print(f"Removing: {path}")
        shutil.rmtree(path)


Removing: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/volume/OCCIPITAL_left_32/sigma_null_factor_1.0_batch_128/2025-07-29/14-03-45_0
Removing: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/volume/OCCIPITAL_left_32/sigma_null_factor_0.8_batch_128/2025-07-29/14-03-45_0
Removing: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/volume/OCCIPITAL_left_32/sigma_null_factor_0.6_batch_128/2025-07-29/14-03-45_0
Removing: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/volume/SOr_left_UKB40_32/sigma_null_factor_0.2_batch_128/2025-07-29/14-03-45_0
Removing: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/volume/SOr_left_UKB40_32/sigma_null_factor_0.4_batch_128/2025-07-29/14-03-45_0
Removing: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/volume/SOr_left_UKB40_32/sigma_null_factor_0.8_batch_128/2025-07-29/14-03-45_0
Removing: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/volume/SOr_left_UKB40_32/sigma_null_factor_0.6_bat

In [38]:
import os
import json

root_dir = "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/volume/LARGE_CINGULATE_right_UKB40_32"

print(f"\nChecking folder: {root_dir}")
print("="*60)
print(f"{'Config':<30} {'Run':<15} {'CV Score':>10} {'CV Std':>10}")
print("-"*60)

for config_name in sorted(os.listdir(root_dir)):
    config_path = os.path.join(root_dir, config_name)
    if not os.path.isdir(config_path):
        continue

    for date_name in sorted(os.listdir(config_path)):
        date_path = os.path.join(config_path, date_name)
        if not os.path.isdir(date_path):
            continue

        for run_name in sorted(os.listdir(date_path)):
            run_path = os.path.join(date_path, run_name)
            if not os.path.isdir(run_path):
                continue

            # point at the ACC_custom_embeddings/Right_PCS folder
            acc_dir = os.path.join(run_path, "ACC_custom_embeddings", "Right_PCS")
            json_path = os.path.join(acc_dir, "test_values.json")
            if not os.path.isfile(json_path):
                continue

            try:
                with open(json_path, "r") as f:
                    data = json.load(f)
                cv_score = data.get("cv_score", None)
                cv_std   = data.get("cv_std",   None)
            except Exception as e:
                print(f"Error reading {json_path}: {e}")
                continue

            if cv_score is not None and cv_std is not None:
                print(f"{config_name:<30} {run_name:<15} {cv_score:10.4f} {cv_std:10.4f}")
            else:
                print(f"{config_name:<30} {run_name:<15} {'No CV':>10} {'No CV':>10}")

print("="*60)



Checking folder: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/volume/LARGE_CINGULATE_right_UKB40_32
Config                         Run               CV Score     CV Std
------------------------------------------------------------
sigma_0.01_factor_0.2_batch_128 10-04-30_0          0.7936     0.0636
sigma_0.01_factor_0.2_batch_16 10-04-31_0          0.8729     0.0517
sigma_0.01_factor_0.4_batch_128 10-04-30_0          0.7781     0.0656
sigma_0.01_factor_0.4_batch_16 10-04-30_0          0.8697     0.0497
sigma_0.01_factor_0.6_batch_128 10-04-30_0          0.7543     0.1078
sigma_0.01_factor_0.6_batch_16 10-04-30_0          0.8693     0.0498
sigma_0.01_factor_0.8_batch_128 10-04-30_0          0.7496     0.0535
sigma_0.01_factor_0.8_batch_16 10-04-30_0          0.8821     0.0501
sigma_0.01_factor_1.0_batch_128 10-04-29_0          0.7325     0.0917
sigma_0.01_factor_1.0_batch_16 10-04-29_0          0.8322     0.0672
sigma_0.01_factor_1.2_batch_128 10-04-30_0          0.7359   

In [4]:
#!/usr/bin/env python3
import os
import shutil

# Root folder containing all the batch_32_seed_* configs
root_dir = "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL_test/OCCIPITAL_left_16_16"

# Destination base in your home
dst_base = "/home/cb283697/Bureau/ST"

# Name of the file to copy
target_file = "full_embeddings.csv"

# Make sure the destination base exists
os.makedirs(dst_base, exist_ok=True)

for config_name in sorted(os.listdir(root_dir)):
    # on ne garde que les dossiers batch_32_seed_*
    if not config_name.startswith("batch_16_seed_"):
        continue

    config_path = os.path.join(root_dir, config_name)
    if not os.path.isdir(config_path):
        continue

    # For each date folder under this batch
    for date_name in sorted(os.listdir(config_path)):
        date_path = os.path.join(config_path, date_name)
        if not os.path.isdir(date_path):
            continue

        # For each run folder under that date
        for run_name in sorted(os.listdir(date_path)):
            run_path = os.path.join(date_path, run_name)
            if not os.path.isdir(run_path):
                continue

            # Path to the embedding output
            embed_dir = os.path.join(
                run_path,
                "ukb40_FCLp_no_classiffier_random_embeddings"
            )
            if not os.path.isdir(embed_dir):
                continue

            src_path = os.path.join(embed_dir, target_file)
            if not os.path.isfile(src_path):
                continue

            # Create a subfolder for this batch in the destination
            dst_dir = os.path.join(dst_base, config_name)
            os.makedirs(dst_dir, exist_ok=True)

            dst_path = os.path.join(dst_dir, target_file)
            try:
                shutil.copy2(src_path, dst_path)
                print(f"Copied {src_path} → {dst_path}")
            except Exception as e:
                print(f"Failed to copy {src_path}: {e}")


Copied /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL_test/OCCIPITAL_left_16_16/batch_16_seed_128/2025-09-15/16-42-39_0/ukb40_FCLp_no_classiffier_random_embeddings/full_embeddings.csv → /home/cb283697/Bureau/ST/batch_16_seed_128/full_embeddings.csv
Copied /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL_test/OCCIPITAL_left_16_16/batch_16_seed_212/2025-09-15/16-42-50_0/ukb40_FCLp_no_classiffier_random_embeddings/full_embeddings.csv → /home/cb283697/Bureau/ST/batch_16_seed_212/full_embeddings.csv
Copied /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL_test/OCCIPITAL_left_16_16/batch_16_seed_42/2025-09-15/16-42-50_0/ukb40_FCLp_no_classiffier_random_embeddings/full_embeddings.csv → /home/cb283697/Bureau/ST/batch_16_seed_42/full_embeddings.csv
Copied /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL_test/OCCIPITAL_left_16_16/batch_16_seed_48/2025-09-15/16-42-39_0/ukb40_FCLp_no_classiffier_random_emb

In [58]:
#!/usr/bin/env python3
import os
import shutil

# === CONFIG ===
SRC_DIR = "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/volume_seed/OCCIPITAL_left_32_16/residualized"
DST_DIR = "/home/cb283697/Bureau/volume-seed-res"
# =============

def copy_contents(src: str, dst: str):
    """
    Recursively copy all files and folders from src into dst.
    Creates dst if it doesn't exist. Overwrites existing files of the same name.
    """
    if not os.path.isdir(src):
        raise FileNotFoundError(f"Source directory not found: {src}")
    os.makedirs(dst, exist_ok=True)

    for root, dirs, files in os.walk(src):
        # compute relative path from SRC_DIR
        rel_path = os.path.relpath(root, src)
        # target directory under DST_DIR
        target_dir = os.path.join(dst, rel_path) if rel_path != "." else dst
        os.makedirs(target_dir, exist_ok=True)

        # copy each file
        for fname in files:
            src_file = os.path.join(root, fname)
            dst_file = os.path.join(target_dir, fname)
            shutil.copy2(src_file, dst_file)

    print(f"All contents from\n  {src}\nhave been copied into\n  {dst}")

if __name__ == "__main__":
    copy_contents(SRC_DIR, DST_DIR)


All contents from
  /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/volume_seed/OCCIPITAL_left_32_16/residualized
have been copied into
  /home/cb283697/Bureau/volume-seed-res


In [5]:
# #!/usr/bin/env python3
# import os
# import shutil

# # Root folder for the FCLp-subsc-FCLa-INSULA_left_32 configs
# src_root = (
#     "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/BT/"
#     "FCLp-subsc-FCLa-INSULA_left_32_32"
# )

# # Destination base for all full_embeddings.csv files
# dst_root = (
#     "/neurospin/dico/babdelghani/Runs/02_champollion_v1/"
#     "Output/BT"
#     "FCLP-embeddings"
# )

# # Make sure the destination base exists
# os.makedirs(dst_root, exist_ok=True)

# for config_name in sorted(os.listdir(src_root)):
#     # only the batch_128 sigma_null_factor configs
#     if not (config_name.startswith("sigma_null_factor") and config_name.endswith("_batch_128")):
#         continue

#     config_path = os.path.join(src_root, config_name)
#     if not os.path.isdir(config_path):
#         continue

#     # Dive into each date/run under this config
#     for date_name in sorted(os.listdir(config_path)):
#         date_path = os.path.join(config_path, date_name)
#         if not os.path.isdir(date_path):
#             continue

#         for run_name in sorted(os.listdir(date_path)):
#             run_path = os.path.join(date_path, run_name)
#             if not os.path.isdir(run_path):
#                 continue

#             # Look for the embeddings directory
#             embed_dir = os.path.join(run_path, "schiz_random_embeddings")
#             src_file = os.path.join(embed_dir, "full_embeddings.csv")
#             if not os.path.isfile(src_file):
#                 continue

#             # Prepare destination subfolder for this config
#             dst_config_dir = os.path.join(dst_root, config_name)
#             os.makedirs(dst_config_dir, exist_ok=True)

#             # Copy the file
#             dst_file = os.path.join(dst_config_dir, "full_embeddings.csv")
#             try:
#                 shutil.copy2(src_file, dst_file)
#                 print(f"Copied: {src_file}\n      → {dst_file}")
#             except Exception as e:
#                 print(f"Error copying {src_file}: {e}")


#!/usr/bin/env python3
#!/usr/bin/env python3
import os
import shutil

# === Configuration ===
src_root = (
    "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL/FCLp-subsc-FCLa-INSULA_left_32_16"
)
dst_root = (
    "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL/"
    "FCLP-embeddings"
)
# ======================

# Ensure the destination directory exists
os.makedirs(dst_root, exist_ok=True)

for config_name in sorted(os.listdir(src_root)):
    # Include all batch_32_seed_* configs
    if not config_name.startswith("sigma_"):
        continue

    config_path = os.path.join(src_root, config_name)
    if not os.path.isdir(config_path):
        continue

    # Dive into each date folder under this config
    for date_name in sorted(os.listdir(config_path)):
        date_path = os.path.join(config_path, date_name)
        if not os.path.isdir(date_path):
            continue

        # Dive into each run folder
        for run_name in sorted(os.listdir(date_path)):
            run_path = os.path.join(date_path, run_name)
            if not os.path.isdir(run_path):
                continue

            # Look for the embeddings directory
            embed_dir = os.path.join(run_path, "schiz_random_embeddings")
            src_file  = os.path.join(embed_dir, "full_embeddings.csv")
            if not os.path.isfile(src_file):
                continue

            # Prepare a subdirectory in dst_root for this config
            dst_config_dir = os.path.join(dst_root, config_name)
            os.makedirs(dst_config_dir, exist_ok=True)

            # Copy the embeddings file
            dst_file = os.path.join(dst_config_dir, "full_embeddings.csv")
            try:
                shutil.copy2(src_file, dst_file)
                print(f"Copied: {src_file}\n      → {dst_file}")
            except Exception as e:
                print(f"Error copying {src_file}: {e}")


In [6]:
import os
import shutil

# === Configuration ===
src_root = (
    "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL_test/INSULA_left_16_16"
)
dst_root = (
    "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL_test/"
    "INSULA-embeddings"
)
# ======================

# Ensure the destination directory exists
os.makedirs(dst_root, exist_ok=True)

for config_name in sorted(os.listdir(src_root)):
    # Include all batch_32_seed_* configs
    if not config_name.startswith("batch_16"):
        continue

    config_path = os.path.join(src_root, config_name)
    if not os.path.isdir(config_path):
        continue

    # Dive into each date folder under this config
    for date_name in sorted(os.listdir(config_path)):
        date_path = os.path.join(config_path, date_name)
        if not os.path.isdir(date_path):
            continue

        # Dive into each run folder
        for run_name in sorted(os.listdir(date_path)):
            run_path = os.path.join(date_path, run_name)
            if not os.path.isdir(run_path):
                continue

            # Look for the embeddings directory
            embed_dir = os.path.join(run_path, "schiz_random_embeddings")
            src_file  = os.path.join(embed_dir, "full_embeddings.csv")
            if not os.path.isfile(src_file):
                continue

            # Prepare a subdirectory in dst_root for this config
            dst_config_dir = os.path.join(dst_root, config_name)
            os.makedirs(dst_config_dir, exist_ok=True)

            # Copy the embeddings file
            dst_file = os.path.join(dst_config_dir, "full_embeddings.csv")
            try:
                shutil.copy2(src_file, dst_file)
                print(f"Copied: {src_file}\n      → {dst_file}")
            except Exception as e:
                print(f"Error copying {src_file}: {e}")


Copied: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL_test/INSULA_left_16_16/batch_16_seed_128/2025-09-17/18-46-16_0/schiz_random_embeddings/full_embeddings.csv
      → /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL_test/INSULA-embeddings/batch_16_seed_128/full_embeddings.csv
Copied: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL_test/INSULA_left_16_16/batch_16_seed_212/2025-09-17/18-58-25_0/schiz_random_embeddings/full_embeddings.csv
      → /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL_test/INSULA-embeddings/batch_16_seed_212/full_embeddings.csv
Copied: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL_test/INSULA_left_16_16/batch_16_seed_42/2025-09-17/18-31-43_0/schiz_random_embeddings/full_embeddings.csv
      → /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL_test/INSULA-embeddings/batch_16_seed_42/full_embeddings.csv
Copied: /neurospin/dico/

In [None]:
import os
import shutil

# === Configuration ===
src_root = (
    "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL_test/INSULA_left_16_16"
)
dst_root = (
    "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_HULL_test/"
    "INSULA-embeddings"
)
# ======================

# Ensure the destination directory exists
os.makedirs(dst_root, exist_ok=True)

for config_name in sorted(os.listdir(src_root)):
    # Include all batch_32_seed_* configs
    if not config_name.startswith("batch_16"):
        continue

    config_path = os.path.join(src_root, config_name)
    if not os.path.isdir(config_path):
        continue

    # Dive into each date folder under this config
    for date_name in sorted(os.listdir(config_path)):
        date_path = os.path.join(config_path, date_name)
        if not os.path.isdir(date_path):
            continue

        # Dive into each run folder
        for run_name in sorted(os.listdir(date_path)):
            run_path = os.path.join(date_path, run_name)
            if not os.path.isdir(run_path):
                continue

            # Look for the embeddings directory
            embed_dir = os.path.join(run_path, "schiz_random_embeddings")
            src_file  = os.path.join(embed_dir, "full_embeddings.csv")
            if not os.path.isfile(src_file):
                continue

            # Prepare a subdirectory in dst_root for this config
            dst_config_dir = os.path.join(dst_root, config_name)
            os.makedirs(dst_config_dir, exist_ok=True)

            # Copy the embeddings file
            dst_file = os.path.join(dst_config_dir, "full_embeddings.csv")
            try:
                shutil.copy2(src_file, dst_file)
                print(f"Copied: {src_file}\n      → {dst_file}")
            except Exception as e:
                print(f"Error copying {src_file}: {e}")


In [2]:
# INSULA=======================================================================================================================================================
import os
import shutil

# === Configuration ===
src_root = (
    "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL/INSULA_left_32_16"
)
dst_root = (
    "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL/"
    "INSULA-embeddings"
)
# ======================

# Ensure the destination directory exists
os.makedirs(dst_root, exist_ok=True)

for config_name in sorted(os.listdir(src_root)):
    # Include all batch_32_seed_* configs
    if not config_name.startswith("sigma_"):
        continue

    config_path = os.path.join(src_root, config_name)
    if not os.path.isdir(config_path):
        continue

    # Dive into each date folder under this config
    for date_name in sorted(os.listdir(config_path)):
        date_path = os.path.join(config_path, date_name)
        if not os.path.isdir(date_path):
            continue

        # Dive into each run folder
        for run_name in sorted(os.listdir(date_path)):
            run_path = os.path.join(date_path, run_name)
            if not os.path.isdir(run_path):
                continue

            # Look for the embeddings directory
            embed_dir = os.path.join(run_path, "schiz_random_embeddings")
            src_file  = os.path.join(embed_dir, "full_embeddings.csv")
            if not os.path.isfile(src_file):
                continue

            # Prepare a subdirectory in dst_root for this config
            dst_config_dir = os.path.join(dst_root, config_name)
            os.makedirs(dst_config_dir, exist_ok=True)

            # Copy the embeddings file
            dst_file = os.path.join(dst_config_dir, "full_embeddings.csv")
            try:
                shutil.copy2(src_file, dst_file)
                print(f"Copied: {src_file}\n      → {dst_file}")
            except Exception as e:
                print(f"Error copying {src_file}: {e}")


Copied: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL/INSULA_left_32_16/sigma_0.2_factor_0.2_FACTOR_batch_16/2025-09-15/16-42-11_0/schiz_random_embeddings/full_embeddings.csv
      → /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL/INSULA-embeddings/sigma_0.2_factor_0.2_FACTOR_batch_16/full_embeddings.csv
Copied: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL/INSULA_left_32_16/sigma_0.4_factor_0.4_FACTOR_batch_16/2025-09-15/16-42-11_0/schiz_random_embeddings/full_embeddings.csv
      → /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL/INSULA-embeddings/sigma_0.4_factor_0.4_FACTOR_batch_16/full_embeddings.csv
Copied: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL/INSULA_left_32_16/sigma_0.6_factor_0.6_FACTOR_batch_16/2025-09-15/16-42-12_0/schiz_random_embeddings/full_embeddings.csv
      → /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL/INSULA-

In [1]:
# INSULA_TEST ================================================================================================================================================
import os
import shutil

# === Configuration ===
src_root = (
    "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL/INSULA_left_16_16"
)
dst_root = (
    "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL_test/"
    "FCLP-embeddings"
)
# ======================

# Ensure the destination directory exists
os.makedirs(dst_root, exist_ok=True)

for config_name in sorted(os.listdir(src_root)):
    # Include all batch_32_seed_* configs
    if not config_name.startswith("batch_16"):
        continue

    config_path = os.path.join(src_root, config_name)
    if not os.path.isdir(config_path):
        continue

    # Dive into each date folder under this config
    for date_name in sorted(os.listdir(config_path)):
        date_path = os.path.join(config_path, date_name)
        if not os.path.isdir(date_path):
            continue

        # Dive into each run folder
        for run_name in sorted(os.listdir(date_path)):
            run_path = os.path.join(date_path, run_name)
            if not os.path.isdir(run_path):
                continue

            # Look for the embeddings directory
            embed_dir = os.path.join(run_path, "schiz_random_embeddings")
            src_file  = os.path.join(embed_dir, "full_embeddings.csv")
            if not os.path.isfile(src_file):
                continue

            # Prepare a subdirectory in dst_root for this config
            dst_config_dir = os.path.join(dst_root, config_name)
            os.makedirs(dst_config_dir, exist_ok=True)

            # Copy the embeddings file
            dst_file = os.path.join(dst_config_dir, "full_embeddings.csv")
            try:
                shutil.copy2(src_file, dst_file)
                print(f"Copied: {src_file}\n      → {dst_file}")
            except Exception as e:
                print(f"Error copying {src_file}: {e}")


Copied: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL_test/FCLp-subsc-FCLa-INSULA_left_16_16/batch_16_seed_128/2025-09-15/11-00-35_0/schiz_random_embeddings/full_embeddings.csv
      → /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL_test/FCLP-embeddings/batch_16_seed_128/full_embeddings.csv
Copied: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL_test/FCLp-subsc-FCLa-INSULA_left_16_16/batch_16_seed_212/2025-09-15/11-00-34_0/schiz_random_embeddings/full_embeddings.csv
      → /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL_test/FCLP-embeddings/batch_16_seed_212/full_embeddings.csv
Copied: /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL_test/FCLp-subsc-FCLa-INSULA_left_16_16/batch_16_seed_42/2025-09-15/10-59-22_0/schiz_random_embeddings/full_embeddings.csv
      → /neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/SURFACE_TOTAL_test/FCLP-embeddings/batch_16_seed

In [44]:
#!/usr/bin/env python3
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# === CONFIG ===
BASE_DIR  = "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/BT/OCCIPITAL_left_32_32"
AGE_CSV   = "/neurospin/dico/data/deep_folding/current/datasets/UkBioBank40/log_thickness_surface_Age.csv"
COVARIATE = "log_surface_total"
OUT_DIR   = os.path.join(BASE_DIR, "residualized")
# ============

def find_embedding_paths(base_dir):
    for cfg in os.listdir(base_dir):
        cfg_dir = os.path.join(base_dir, cfg)
        if not os.path.isdir(cfg_dir): continue
        for date in os.listdir(cfg_dir):
            date_dir = os.path.join(cfg_dir, date)
            if not os.path.isdir(date_dir): continue
            for run in os.listdir(date_dir):
                emb_dir = os.path.join(
                    date_dir, run,
                    "ukb40_FCLp_no_classiffier_random_embeddings"
                )
                emb_file = os.path.join(emb_dir, "full_embeddings.csv")
                if os.path.isfile(emb_file):
                    # flatten the run path into a single name
                    rel = f"{cfg}_{date}_{run}"
                    yield rel, emb_file

def residualize(emb_df, cov_ser):
    """Regress out cov_ser from each column of emb_df."""
    X = cov_ser.values.reshape(-1, 1)
    R = np.zeros_like(emb_df.values)
    lm = LinearRegression()
    for j in range(emb_df.shape[1]):
        y = emb_df.iloc[:, j].values
        lm.fit(X, y)
        R[:, j] = y - lm.predict(X)
    return pd.DataFrame(R, index=emb_df.index, columns=emb_df.columns)

def main():
    # load covariate
    parts = pd.read_csv(AGE_CSV, index_col=0)
    if COVARIATE not in parts.columns:
        raise KeyError(f"{COVARIATE} not in {AGE_CSV}")
    cov = parts[COVARIATE]

    os.makedirs(OUT_DIR, exist_ok=True)

    for rel, emb_path in find_embedding_paths(BASE_DIR):
        emb = pd.read_csv(emb_path, index_col=0)

        # intersect IDs and drop any NaN in the covariate
        common = emb.index.intersection(cov.index)
        cov_sub = cov.loc[common]
        emb_sub = emb.loc[common]
        valid = cov_sub.notna()
        if valid.sum() == 0:
            print(f"[SKIP] {rel}: no valid covariate values")
            continue

        emb_sub = emb_sub.loc[valid]
        cov_sub = cov_sub.loc[valid]

        # residualize
        resid_df = residualize(emb_sub, cov_sub)

        # write out
        out_file = os.path.join(OUT_DIR, f"{rel}_resid.csv")
        resid_df.to_csv(out_file)
        print(f"[OK] {rel} → {out_file}")

if __name__ == "__main__":
    main()


: 

In [55]:
#!/usr/bin/env python3
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# === CONFIG ===
BASE_DIR  = "/neurospin/dico/babdelghani/Runs/02_champollion_v1/Output/BT/FCLP-embeddings"
AGE_CSV   = "/neurospin/dico/data/deep_folding/current/datasets/UkBioBank40/log_thickness_surface_Age.csv"
COVARIATE = "log_surface_total"
OUT_DIR   = os.path.join(BASE_DIR, "residualized")
# ============

def find_embedding_paths(base_dir):
    for cfg in sorted(os.listdir(base_dir)):
        cfg_dir = os.path.join(base_dir, cfg)
        if not os.path.isdir(cfg_dir):
            continue
        # recurse until we find full_embeddings.csv
        for root, _, files in os.walk(cfg_dir):
            if "full_embeddings.csv" in files:
                yield cfg, os.path.join(root, "full_embeddings.csv")

def residualize(emb_df, cov_ser):
    X = cov_ser.values.reshape(-1, 1)
    R = np.zeros_like(emb_df.values)
    lm = LinearRegression()
    for j in range(emb_df.shape[1]):
        y = emb_df.iloc[:, j].values
        lm.fit(X, y)
        R[:, j] = y - lm.predict(X)
    return pd.DataFrame(R, index=emb_df.index, columns=emb_df.columns)

def main():
    # 1) load and prefix covariate
    parts = pd.read_csv(AGE_CSV, index_col=0)
    if COVARIATE not in parts.columns:
        raise KeyError(f"Covariate '{COVARIATE}' not found in {AGE_CSV}")
    if not str(parts.index[0]).startswith("sub-"):
        parts.index = ["sub-" + str(i) for i in parts.index.astype(str)]
    cov = parts[COVARIATE].astype(float)

    # 2) make output dir
    os.makedirs(OUT_DIR, exist_ok=True)

    # 3) process every embedding file found
    for cfg, emb_path in find_embedding_paths(BASE_DIR):
        print(f"[-->] Residualizing {cfg}")
        emb = pd.read_csv(emb_path, index_col=0)

        # align IDs and drop NaNs
        common = emb.index.intersection(cov.index)
        emb_sub = emb.loc[common]
        cov_sub = cov.loc[common].dropna()
        emb_sub = emb_sub.loc[cov_sub.index]

        if emb_sub.shape[0] == 0:
            print(f"[ SKIP ] {cfg}: no overlapping IDs or all covariate NaN")
            continue

        # residualize
        resid_df = residualize(emb_sub, cov_sub)

        # save
        out_file = os.path.join(OUT_DIR, f"{cfg}_resid.csv")
        resid_df.to_csv(out_file)
        print(f"[   OK ] {cfg} → {out_file}")

if __name__ == "__main__":
    main()


[-->] Residualizing batch_32_seed_128
[ SKIP ] batch_32_seed_128: no overlapping IDs or all covariate NaN
[-->] Residualizing batch_32_seed_212
[ SKIP ] batch_32_seed_212: no overlapping IDs or all covariate NaN
[-->] Residualizing batch_32_seed_42
[ SKIP ] batch_32_seed_42: no overlapping IDs or all covariate NaN
[-->] Residualizing batch_32_seed_48
[ SKIP ] batch_32_seed_48: no overlapping IDs or all covariate NaN
[-->] Residualizing batch_32_seed_75
[ SKIP ] batch_32_seed_75: no overlapping IDs or all covariate NaN
