In this file, the goal to create a .csv file containing the anomaly score (using  One-Class SVM after residualization) for each subject in PEPR_Marseille (including those who were not selected) as the file containing the reconstruction_error

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from scipy.stats import ks_2samp
from scipy import stats
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm

In [2]:

def read_error_csv(path):

    if os.path.exists(path):
        df = pd.read_csv(path)
        return df
    return None

In [3]:
def remove_first_column(Embeddings):
    X= Embeddings.iloc[:,1:76]
    return X

In [4]:
Participants_epilepsy=pd.read_csv("/neurospin/dico/data/human/epilepsy_PBS/participants.tsv", sep="\t")

In [5]:
epilepsy_controls= Participants_epilepsy[Participants_epilepsy["Diagnosis"]=="controls"]


In [6]:
def separate_epilepsy_PBS(Embeddings): #To separate the Embeddings into controls, irm_neg and irm_pos
    Embeddings = Embeddings.rename(columns={Embeddings.columns[0]: "Index"})

    Embeddings_control = Embeddings.merge(epilepsy_controls[["Index"]], on="Index", how="inner")

    return Embeddings_control

In [7]:
def Normalisation(X1, X2, X3,X4):
    scaler = StandardScaler()
    X1 = scaler.fit_transform(X1)
    X2=scaler.transform(X2)
    X3=scaler.transform(X3)
    X4=scaler.transform(X4)

    return X1, X2, X3, X4

In [8]:
def Residualization(X1,X2,X3,X4):
    L= np.concatenate((X1,X2,X3,X4), axis=0)

    covariates = pd.DataFrame({
    'Site': ['UKB']*len(X1) 
            + ['hcp']*len(X2) 
            + ['epilepsy_PBS']*len(X3) 
            + ['PEPR']*len(X4)
    })
    covariates = pd.get_dummies(covariates)
    covariates_train= covariates.head(len(X1)+len(X2)+len(X3))
    covariates_test= covariates.tail(len(X4))

    L_train=np.concatenate((X1,X2,X3), axis=0)
    L_test= X4

    covariates = sm.add_constant(covariates)  # Add intercept

    L1_resid = np.zeros_like(L_train)
    L2_resid = np.zeros_like(L_test)

    for j in range(L_train.shape[1]):
        model = sm.OLS(L_train[:, j].astype(float), covariates_train.astype(float)).fit()
        L1_resid[:, j] = model.resid 
        L2_resid[:, j] = L_test[:,j] - model.predict(covariates_test)

    return L1_resid[:len(X1),:], L1_resid[len(X1):len(X1)+len(X2),:], L1_resid[len(X1)+len(X2):len(X1)+len(X2)+len(X3),:], L2_resid


In [9]:
dir_path= "/neurospin/tmp/fred/models/2025-11-10" 

In [10]:
import re

Calcul du score d'anomalies

In [11]:
def Calcul_score(X1, X4, nu=0.02):

    clf = OneClassSVM(kernel='rbf', gamma='scale', nu=nu)
    clf.fit(X1)
    #score1 = clf.decision_function(X_subset1)

    score4 = clf.decision_function(X4)


    return  -score4

In [12]:
import re


In [13]:
subfolders = sorted([
    f for f in os.listdir(dir_path)
    if os.path.isdir(os.path.join(dir_path, f))
])



for i, folder in enumerate(subfolders):
    print(i)
    regions=re.match(r"(.*?(left|right))", folder).group(1)

    current_path = os.path.join(dir_path, folder)

    ukb_file = os.path.join(current_path, "Embeddings.csv")
    hcp_file = os.path.join(current_path, "hcp", "Embeddings.csv")
    epi_file = os.path.join(current_path, "epilepsy_PBS", "Embeddings.csv")
    PEPR_file= os.path.join(current_path, "PEPR_Marseille","Embeddings.csv")
 
    current_path = os.path.join(dir_path, folder, "PEPR_Marseille")

    # lecture
    Embeddings_ukb = read_error_csv(ukb_file)
    Embeddings_hcp = read_error_csv(hcp_file)
    Embeddings_epilepsy = read_error_csv(epi_file)
    Embeddings_PEPR = read_error_csv(PEPR_file)

    Embeddings_control= separate_epilepsy_PBS(Embeddings_epilepsy)

    X1= remove_first_column(Embeddings_ukb)
    X2= remove_first_column(Embeddings_hcp)
    X3= remove_first_column(Embeddings_control)
    X4= remove_first_column(Embeddings_PEPR)

    X1,X2,X3, X4= Normalisation(X1,X2,X3, X4)
    X1,X2,X3, X4= Residualization(X1,X2,X3, X4)

    
    score_PEPR = Calcul_score(X1, X4)

    dico_score= {"ID": Embeddings_PEPR.iloc[:,0],
                 "Score": score_PEPR}
    df_score= pd.DataFrame(dico_score)
    df_score.to_csv(os.path.join(current_path, "Residualisation_Score_SVM.csv"))


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
