In [1]:
import os
import random

# === PARAMÃˆTRES ===
output_dir = "folds_updated"
os.makedirs(output_dir, exist_ok=True)

# === CHARGEMENT DES SUJETS Ã€ EXCLURE ET RÃ‰CUPÃ‰RABLES ===
with open("sarcopenia_subj_DIASEM.txt") as f:
    diasem_subjects = set(line.strip() for line in f if line.strip())

with open("sarcopenia_subj_DIASEM_PROBLEM.txt") as f:
    problem_subjects = set(line.strip() for line in f if line.strip())

recoverable_subjects = list(problem_subjects - diasem_subjects)

# === CHARGEMENT DES FOLDS ORIGINAUX ===
folds = {}
for i in range(1, 6):
    with open(f"train_cases_{i}.txt") as f_train, open(f"test_cases_{i}.txt") as f_test:
        train_raw = [line.strip() for line in f_train if line.strip()]
        test_raw = [line.strip() for line in f_test if line.strip()]
        folds[i] = {'train_raw': train_raw, 'test_raw': test_raw}

# === RECONSTRUCTION DES NOUVEAUX FOLDS ===
new_folds = {}

for i in range(1, 6):
    print(f"\n--- Fold {i} ---")

    # Nettoyage des anciens splits
    train = [s for s in folds[i]['train_raw'] if s not in diasem_subjects]
    test = [s for s in folds[i]['test_raw'] if s not in diasem_subjects]

    used_in_fold = set(train + test)

    # Sujets restants pour complÃ©ter
    candidates = [s for s in recoverable_subjects if s not in used_in_fold]
    random.shuffle(candidates)

    # ComplÃ©ment test (prioritaire)
    while len(test) < 16 and candidates:
        candidate = candidates.pop()
        if candidate not in train and candidate not in test:
            test.append(candidate)
            used_in_fold.add(candidate)

    # ComplÃ©ment train
    candidates = [s for s in recoverable_subjects if s not in used_in_fold]
    random.shuffle(candidates)
    while len(train) < 63 and candidates:
        candidate = candidates.pop()
        if candidate not in train and candidate not in test:
            train.append(candidate)
            used_in_fold.add(candidate)

    # VÃ©rifications
    assert len(test) == 16, f"Fold {i}: test incomplet ({len(test)} sujets)"
    assert len(train) == 63, f"Fold {i}: train incomplet ({len(train)} sujets)"
    assert not set(train) & set(test), f"Fold {i}: train et test se recoupent"
    assert all(s not in diasem_subjects for s in train + test), f"Fold {i}: sujet DIASEM dÃ©tectÃ©"

    new_folds[i] = {'train': train, 'test': test}
    print(f"âœ… Train: {len(train)} | Test: {len(test)}")

# === SAUVEGARDE DES NOUVEAUX FICHIERS ===
for i in range(1, 6):
    with open(os.path.join(output_dir, f"train_cases_{i}.txt"), "w") as f_train:
        f_train.write("\n".join(new_folds[i]['train']) + "\n")
    with open(os.path.join(output_dir, f"test_cases_{i}.txt"), "w") as f_test:
        f_test.write("\n".join(new_folds[i]['test']) + "\n")

print(f"\nðŸŽ‰ Tous les nouveaux folds sont enregistrÃ©s dans le dossier `{output_dir}`")



--- Fold 1 ---
âœ… Train: 63 | Test: 16

--- Fold 2 ---
âœ… Train: 63 | Test: 16

--- Fold 3 ---
âœ… Train: 63 | Test: 16

--- Fold 4 ---
âœ… Train: 63 | Test: 16

--- Fold 5 ---
âœ… Train: 63 | Test: 16

ðŸŽ‰ Tous les nouveaux folds sont enregistrÃ©s dans le dossier `folds_updated`
