<a href="https://colab.research.google.com/github/matteo9910/StressDetectionBasedOnWearableSensorData/blob/main/VerBIO_Public_Cross_Test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import zipfile
import os
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt, iirnotch
import seaborn as sns
import scipy.stats as stats
import numpy as np
from scipy.stats import shapiro
import math
from scipy.stats import mannwhitneyu
from scipy.signal import welch
import ipywidgets as widgets
from IPython.display import display
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.signal import resample
from scipy.signal import find_peaks
import scipy.signal
from scipy.interpolate import interp1d
import glob
from glob import glob
import math

In [None]:
def read_signal_file(filepath, signal, condition):
    try:
        df = pd.read_csv(filepath, skiprows=1)

        if condition == 'PPT':
            if signal == 'ACC' and df.shape[1] >= 4:
                df = df.iloc[:, 1:4]
                df.columns = ['ACC1', 'ACC2', 'ACC3']
            elif signal in ['BVP', 'EDA', 'TEMP'] and df.shape[1] >= 2:
                df = df.iloc[:, [1]]
                df.columns = [signal]
            else:
                raise ValueError(f"Formato inatteso in {filepath}")
        else:
            if signal == 'ACC' and df.shape[1] == 3:
                df.columns = ['ACC1', 'ACC2', 'ACC3']
            elif signal in ['BVP', 'EDA', 'TEMP'] and df.shape[1] == 1:
                df.columns = [signal]
            else:
                raise ValueError(f"Formato inatteso in {filepath}")

        return df

    except Exception as e:
        print(f"[ERROR] Errore nel file {filepath}: {e}")
        return None

In [None]:
def upsample_signals(signals_dict, target_freq=64):
    """
    Effettua l'upsampling di ogni segnale nel dizionario signals_dict alla lunghezza massima
    tra tutti i segnali, basata sulla frequenza target.
    """
    max_duration = max(len(signal) for signal in signals_dict.values()) / target_freq
    target_length = int(max_duration * target_freq)

    upsampled_signals = {}
    for name, signal in signals_dict.items():
        resampled_signal = resample(signal, target_length)
        upsampled_signals[name] = resampled_signal

    return pd.DataFrame(upsampled_signals)

In [None]:
def load_subject_data(subject, base_dir, valid_tests_map):
    phases = ['PRE', 'POST']
    conditions = ['RELAX', 'PPT']
    signals = ['ACC', 'BVP', 'EDA', 'TEMP']
    data = []

    for phase in phases:
        for condition in conditions:
            signal_dict = {}
            for signal in signals:
                file_path = os.path.join(base_dir, phase, "E4", subject, f"{signal}_{condition}.csv")
                df_signal = read_signal_file(file_path, signal, condition)
                if df_signal is not None:
                    for col in df_signal.columns:
                        signal_dict[col] = df_signal[col].values

            if signal_dict:
                df_upsampled = upsample_signals(signal_dict)
                df_upsampled["subject"] = subject
                df_upsampled["session"] = phase
                df_upsampled["phase"] = condition
                data.append(df_upsampled)

    for test_name in valid_tests_map[subject]:
        for condition in conditions:
            signal_dict = {}
            for signal in signals:
                file_path = os.path.join(base_dir, "TEST", test_name, "E4", subject, f"{signal}_{condition}.csv")
                df_signal = read_signal_file(file_path, signal, condition)
                if df_signal is not None:
                    for col in df_signal.columns:
                        signal_dict[col] = df_signal[col].values

            if signal_dict:
                df_upsampled = upsample_signals(signal_dict)
                df_upsampled["subject"] = subject
                df_upsampled["session"] = test_name
                df_upsampled["phase"] = condition
                data.append(df_upsampled)

    if data:
        df_final = pd.concat(data, ignore_index=True)
        signal_cols = ['ACC1', 'ACC2', 'ACC3', 'BVP', 'EDA', 'TEMP']
        meta_cols = ['subject', 'session', 'phase']
        all_cols = [col for col in signal_cols if col in df_final.columns] + meta_cols
        return df_final[all_cols]
    else:
        print(f"Nessun dato caricato per {subject}")
        return pd.DataFrame()

In [None]:
def load_all_subjects_data(base_dir, complete_subjects_map):
    all_data = []
    for subject, valid_tests in complete_subjects_map.items():
        print(f"Caricamento soggetto {subject} con TEST validi: {valid_tests}")
        df_subject = load_subject_data(subject, base_dir, complete_subjects_map)
        if not df_subject.empty:
            all_data.append(df_subject)
    if all_data:
        return pd.concat(all_data, ignore_index=True)
    else:
        print("Nessun dato disponibile per i soggetti completi.")
        return pd.DataFrame()

In [None]:
base_dir = "/content/drive/MyDrive/VerBIO_Public"

complete_subjects_map = {
    "P005": ["TEST01", "TEST05"],
    "P008": ["TEST01", "TEST05"],
    "P023": ["TEST01", "TEST05"],
    "P032": ["TEST01", "TEST05"],
    "P035": ["TEST01", "TEST05"],
    "P037": ["TEST01", "TEST05"],
    "P038": ["TEST01"],
    "P041": ["TEST01", "TEST05"],
    "P043": ["TEST01"],
    "P044": ["TEST01", "TEST05"],
    "P046": ["TEST01"],
    "P047": ["TEST01", "TEST05"],
    "P049": ["TEST01"],
    "P058": ["TEST01"],
    "P062": ["TEST01", "TEST05"],
    "P065": ["TEST01", "TEST05"],
    "P071": ["TEST01", "TEST05"],
}

df_all = load_all_subjects_data(base_dir, complete_subjects_map)

Caricamento soggetto P005 con TEST validi: ['TEST01', 'TEST05']
Caricamento soggetto P008 con TEST validi: ['TEST01', 'TEST05']
Caricamento soggetto P023 con TEST validi: ['TEST01', 'TEST05']
Caricamento soggetto P032 con TEST validi: ['TEST01', 'TEST05']
Caricamento soggetto P035 con TEST validi: ['TEST01', 'TEST05']
Caricamento soggetto P037 con TEST validi: ['TEST01', 'TEST05']
Caricamento soggetto P038 con TEST validi: ['TEST01']
Caricamento soggetto P041 con TEST validi: ['TEST01', 'TEST05']
Caricamento soggetto P043 con TEST validi: ['TEST01']
Caricamento soggetto P044 con TEST validi: ['TEST01', 'TEST05']
Caricamento soggetto P046 con TEST validi: ['TEST01']
Caricamento soggetto P047 con TEST validi: ['TEST01', 'TEST05']
Caricamento soggetto P049 con TEST validi: ['TEST01']
Caricamento soggetto P058 con TEST validi: ['TEST01']
Caricamento soggetto P062 con TEST validi: ['TEST01', 'TEST05']
Caricamento soggetto P065 con TEST validi: ['TEST01', 'TEST05']
Caricamento soggetto P071 

In [None]:
df_all.head()

Unnamed: 0,ACC1,ACC2,ACC3,BVP,EDA,TEMP,subject,session,phase
0,12.0,13.0,60.0,-8.26,0.13347,32.05,P005,PRE,RELAX
1,5.769194,15.372568,68.929877,-10.13,0.130896,32.054514,P005,PRE,RELAX
2,14.0,13.0,61.0,-12.01,0.128816,32.058249,P005,PRE,RELAX
3,18.849692,12.25662,55.569971,-13.52,0.127238,32.061199,P005,PRE,RELAX
4,15.0,14.0,61.0,-14.26,0.126157,32.063375,P005,PRE,RELAX


In [None]:
# Calcola le lunghezze dei segnali nella fase PPT per ogni soggetto e sessione
ppt_lengths = (
    df_all[df_all['phase'] == 'PPT']
    .groupby(['subject', 'session'])
    .size()
    .to_dict()
)

In [None]:
def upsample_fused_column_to_length(fused_values, target_length):
    """
    Esegue il resampling della colonna Fused a una lunghezza target specifica
    utilizzando scipy.signal.resample.
    """
    fused_resampled = resample(fused_values, target_length)
    return fused_resampled


def create_fused_dataset(base_dir, complete_subjects_map, ppt_lengths):
    all_data = []

    for subject, valid_tests in complete_subjects_map.items():
        subject_data = []

        # --- Fasi PRE e POST ---
        for phase in ["PRE", "POST"]:
            anno_path = os.path.join(base_dir, phase, "Annotation", f"{phase}_{subject}_annotation.csv")
            if os.path.exists(anno_path):
                df = pd.read_csv(anno_path)
                if 'Fused' in df.columns:
                    key = (subject, phase)
                    if key in ppt_lengths:
                        target_len = ppt_lengths[key]
                        upsampled = upsample_fused_column_to_length(df['Fused'].values, target_len)
                        df_upsampled = pd.DataFrame({
                            "Fused_score": upsampled,
                            "subject": subject,
                            "session": phase
                        })
                        subject_data.append(df_upsampled)
                    else:
                        print(f"[WARN] Lunghezza non trovata per {key}, saltato.")
                else:
                    print(f"[WARN] Colonna 'Fused' non trovata in {anno_path}")
            else:
                print(f"[WARN] File non trovato: {anno_path}")

        # --- Fase TEST ---
        for test in valid_tests:
            anno_path = os.path.join(base_dir, "TEST", test, "Annotation", f"{test}_{subject}_annotation.csv")
            if os.path.exists(anno_path):
                df = pd.read_csv(anno_path)
                if 'Fused' in df.columns:
                    key = (subject, test)
                    if key in ppt_lengths:
                        target_len = ppt_lengths[key]
                        upsampled = upsample_fused_column_to_length(df['Fused'].values, target_len)
                        df_upsampled = pd.DataFrame({
                            "Fused_score": upsampled,
                            "subject": subject,
                            "session": test
                        })
                        subject_data.append(df_upsampled)
                    else:
                        print(f"[WARN] Lunghezza non trovata per {key}, saltato.")
                else:
                    print(f"[WARN] Colonna 'Fused' non trovata in {anno_path}")
            else:
                print(f"[WARN] File non trovato: {anno_path}")

        # --- Aggregazione per soggetto ---
        if subject_data:
            all_data.append(pd.concat(subject_data, ignore_index=True))

    # --- Dataset finale ---
    if all_data:
        return pd.concat(all_data, ignore_index=True)
    else:
        print("Nessun dato disponibile.")
        return pd.DataFrame()

In [None]:
complete_subjects_map = {
    "P005": ["TEST01", "TEST05"],
    "P008": ["TEST01", "TEST05"],
    "P023": ["TEST01", "TEST05"],
    "P032": ["TEST01", "TEST05"],
    "P035": ["TEST01", "TEST05"],
    "P037": ["TEST01", "TEST05"],
    "P038": ["TEST01"],
    "P041": ["TEST01", "TEST05"],
    "P043": ["TEST01"],
    "P044": ["TEST01", "TEST05"],
    "P046": ["TEST01"],
    "P047": ["TEST01", "TEST05"],
    "P049": ["TEST01"],
    "P058": ["TEST01"],
    "P062": ["TEST01", "TEST05"],
    "P065": ["TEST01", "TEST05"],
    "P071": ["TEST01", "TEST05"]
}

base_dir = "/content/drive/MyDrive/VerBIO_Public"

subjects_annotation = create_fused_dataset(base_dir, complete_subjects_map, ppt_lengths)

In [None]:
subjects_annotation.head()

Unnamed: 0,Fused_score,subject,session
0,0.005876,P005,PRE
1,0.001657,P005,PRE
2,-0.002391,P005,PRE
3,-0.006267,P005,PRE
4,-0.009969,P005,PRE


In [None]:
subjects_annotation.groupby(['subject', 'session']).size()

Unnamed: 0_level_0,Unnamed: 1_level_0,0
subject,session,Unnamed: 2_level_1
P005,POST,16850
P005,PRE,11271
P005,TEST01,13275
P005,TEST05,10887
P008,POST,15789
...,...,...
P065,TEST05,14294
P071,POST,15841
P071,PRE,17998
P071,TEST01,14709


In [None]:
df_all[df_all['phase'] == 'PPT'].groupby(['subject', 'session']).size()

Unnamed: 0_level_0,Unnamed: 1_level_0,0
subject,session,Unnamed: 2_level_1
P005,POST,16850
P005,PRE,11271
P005,TEST01,13275
P005,TEST05,10887
P008,POST,15789
...,...,...
P065,TEST05,14294
P071,POST,15841
P071,PRE,17998
P071,TEST01,14709


In [None]:
def label_dataset(df_signals, df_annotations, threshold=0.2):
    """
    Etichetta il dataset dei segnali:
    - label = 0 per tutti i record nella fase RELAX
    - label = 1 per i valori in fase PPT dove il Fused_score > threshold
      altrimenti 0.
    """
    # Inizializza colonna label con zeri
    df_signals['label'] = 0

    # Trova tutte le righe in fase PPT
    mask_ppt = df_signals['phase'] == 'PPT'
    df_ppt = df_signals[mask_ppt]

    # Etichettatura per soggetto e session
    for (subject, session), group in df_ppt.groupby(['subject', 'session']):
        # Estrai fused score allineato
        fused_values = df_annotations[
            (df_annotations['subject'] == subject) &
            (df_annotations['session'] == session)
        ]['Fused_score'].values

        # Protezione contro mismatch
        signal_idx = df_signals[
            (df_signals['subject'] == subject) &
            (df_signals['session'] == session) &
            (df_signals['phase'] == 'PPT')
        ].index

        if len(signal_idx) == len(fused_values):
            df_signals.loc[signal_idx, 'label'] = (fused_values > threshold).astype(int)
        else:
            print(f"[WARN] Mismatch di lunghezza per {subject} - {session}: "
                  f"{len(signal_idx)} vs {len(fused_values)}")

    return df_signals

In [None]:
df = label_dataset(df_all, subjects_annotation)

In [None]:
df.head()

Unnamed: 0,ACC1,ACC2,ACC3,BVP,EDA,TEMP,subject,session,phase,label
0,12.0,13.0,60.0,-8.26,0.13347,32.05,P005,PRE,RELAX,0
1,5.769194,15.372568,68.929877,-10.13,0.130896,32.054514,P005,PRE,RELAX,0
2,14.0,13.0,61.0,-12.01,0.128816,32.058249,P005,PRE,RELAX,0
3,18.849692,12.25662,55.569971,-13.52,0.127238,32.061199,P005,PRE,RELAX,0
4,15.0,14.0,61.0,-14.26,0.126157,32.063375,P005,PRE,RELAX,0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2159612 entries, 0 to 2159611
Data columns (total 10 columns):
 #   Column   Dtype  
---  ------   -----  
 0   ACC1     float64
 1   ACC2     float64
 2   ACC3     float64
 3   BVP      float64
 4   EDA      float64
 5   TEMP     float64
 6   subject  object 
 7   session  object 
 8   phase    object 
 9   label    int64  
dtypes: float64(6), int64(1), object(3)
memory usage: 164.8+ MB


In [None]:
df.describe()

Unnamed: 0,ACC1,ACC2,ACC3,BVP,EDA,TEMP,label
count,2159612.0,2159612.0,2159612.0,2159612.0,2159612.0,2159612.0,2159612.0
mean,5.537891,-2.094674,12.85213,-0.01148349,0.712568,32.45926,0.3151126
std,45.64637,29.81659,31.48243,51.05985,2.126496,2.010616,0.4645608
min,-147.6892,-152.274,-154.4291,-1187.2,-0.03896454,26.37496,0.0
25%,-44.0,-17.98167,-9.0,-8.34,0.1041351,31.21319,0.0
50%,18.24339,-1.898498,12.27897,0.36,0.2151499,32.67925,0.0
75%,47.37183,14.94794,37.98685,9.8,0.4571115,34.09125,1.0
max,170.1855,133.1136,161.8245,1269.4,23.84075,35.9593,1.0


In [None]:
df.isna().sum()

Unnamed: 0,0
ACC1,0
ACC2,0
ACC3,0
BVP,0
EDA,0
TEMP,0
subject,0
session,0
phase,0
label,0


LABEL DISTRIBUTION ANALYSIS

In [None]:
freq_absolute_global = df["label"].value_counts()
freq_relative_global = df["label"].value_counts(normalize=True)

global_distribution = pd.DataFrame({
    'Frequenza Assoluta': freq_absolute_global,
    'Frequenza Relativa (%)': (freq_relative_global * 100).round(2)
})

global_distribution

Unnamed: 0_level_0,Frequenza Assoluta,Frequenza Relativa (%)
label,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1479091,68.49
1,680521,31.51


In [None]:
ppt_df = df[df['phase'] == 'PPT']

phase_distribution = ppt_df.groupby(['session', 'label']).size().unstack(fill_value=0)

phase_distribution['Totale'] = phase_distribution.sum(axis=1)
phase_distribution['% Label 0'] = ((phase_distribution[0] / phase_distribution['Totale']) * 100).round(2)
phase_distribution['% Label 1'] = ((phase_distribution[1] / phase_distribution['Totale']) * 100).round(2)

phase_distribution = phase_distribution[['Totale', 0, 1, '% Label 0', '% Label 1']]
phase_distribution.columns = ['Totale', 'Label 0', 'Label 1', 'Label 0 (%)', 'Label 1 (%)']

phase_distribution

Unnamed: 0_level_0,Totale,Label 0,Label 1,Label 0 (%),Label 1 (%)
session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
POST,280734,103500,177234,36.87,63.13
PRE,279194,83905,195289,30.05,69.95
TEST01,273701,65028,208673,23.76,76.24
TEST05,151711,52386,99325,34.53,65.47


In [None]:
# Raggruppa per soggetto e label nella fase PPT
etichetta_ppt = df[df['phase'] == 'PPT'].groupby(['subject', 'label']).size().unstack(fill_value=0)

# Rinomina le colonne per chiarezza
etichetta_ppt.columns = ['Label 0 (Baseline)', 'Label 1 (Stress)']

# Aggiungi anche una colonna con il totale
etichetta_ppt['Totale'] = etichetta_ppt.sum(axis=1)

# Ordina per soggetti con più label 1 (stress)
etichetta_ppt = etichetta_ppt.sort_values(by='Label 1 (Stress)', ascending=False)

pd.set_option('display.max_rows', None)  # opzionale: mostra tutti i soggetti
etichetta_ppt

Unnamed: 0_level_0,Label 0 (Baseline),Label 1 (Stress),Totale
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
P047,1451,68613,70064
P032,3025,64488,67513
P041,2070,54518,56588
P071,4869,53388,58257
P062,2399,53219,55618
P058,2549,52197,54746
P043,3080,48661,51741
P023,11647,46430,58077
P005,6765,45518,52283
P044,8479,43001,51480


In [None]:
def remove_subjects_without_positive_labels(df, label_col='label', subject_col='subject'):
    """
    Rimuove tutti i datapoint dei soggetti che non hanno nessuna etichetta positiva (label == 1).

    Parameters:
        df (pd.DataFrame): Il dataset completo.
        label_col (str): Il nome della colonna delle etichette.
        subject_col (str): Il nome della colonna dei soggetti.

    Returns:
        df_filtered (pd.DataFrame): Il dataframe filtrato.
        removed_subjects (list): Lista dei soggetti rimossi.
    """
    # Trova i soggetti che hanno almeno una label == 1
    subjects_with_positive = df[df[label_col] == 1][subject_col].unique()

    # Trova i soggetti che NON hanno etichette positive
    all_subjects = df[subject_col].unique()
    removed_subjects = sorted(list(set(all_subjects) - set(subjects_with_positive)))

    # Filtra il dataframe per rimuovere i soggetti da escludere
    df_filtered = df[~df[subject_col].isin(removed_subjects)].reset_index(drop=True)

    return df_filtered, removed_subjects

In [None]:
df, soggetti_rimossi = remove_subjects_without_positive_labels(df)

print("Soggetti rimossi:", soggetti_rimossi)

Soggetti rimossi: []


In [None]:
pip install neurokit2

Collecting neurokit2
  Downloading neurokit2-0.2.11-py2.py3-none-any.whl.metadata (37 kB)
Downloading neurokit2-0.2.11-py2.py3-none-any.whl (696 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m696.5/696.5 kB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: neurokit2
Successfully installed neurokit2-0.2.11


In [None]:
import neurokit2 as nk

In [None]:
ppg_raw = df["BVP"].values

fs = 64

ppg_signals, ppg_info = nk.ppg_process(ppg_raw, sampling_rate=fs)

ppg_clean = ppg_signals["PPG_Clean"]

df['BVP_CLEAN'] = ppg_clean

In [None]:
from scipy.signal import find_peaks

# Parametri
fs = 64  # frequenza di campionamento
window_sec = 60
overlap = 0.75
window_size = int(fs * window_sec)
step_size = int(window_size * (1 - overlap))

# Lista per raccogliere le feature
features_list = []

# Scorri per finestra
for start in range(0, len(df) - window_size, step_size):
    window = df["BVP_CLEAN"].iloc[start:start + window_size].values
    labels = df["label"].iloc[start:start + window_size].values
    subject = df["subject"].iloc[start]
    phase = df["phase"].iloc[start]
    session = df["session"].iloc[start]

    # Conta le etichette nella finestra
    counts = np.bincount(labels.astype(int), minlength=2)
    total = counts.sum()
    dominant_label = np.argmax(counts)
    dominant_ratio = counts[dominant_label] / total

    # Scarta la finestra se nessuna classe è predominante almeno al 70%
    if dominant_ratio < 0.7:
        continue

    label = dominant_label  # assegna label predominante

    try:
        # HRV features (NeuroKit2)
        signals, info = nk.ppg_peaks(window, sampling_rate=fs)
        hrv_features = nk.hrv(info, sampling_rate=fs, show=False)

        # --- Time-domain features manuali (Scipy) ---
        peaks, _ = find_peaks(window, distance=fs * 0.5, height=0)
        if len(peaks) < 2:
            raise ValueError("Troppi pochi picchi")

        # Amplitude
        amplitudes = window[peaks]
        amp_mean = np.mean(amplitudes)

        # Rise time
        rise_times = []
        for peak in peaks:
            if peak == 0:
                continue
            min_idx = np.argmin(window[:peak])
            rise_time = (peak - min_idx) / fs
            rise_times.append(rise_time)
        rise_time_mean = np.mean(rise_times)

        # Duration (IBI)
        ibi = np.diff(peaks) / fs
        duration_mean = np.mean(ibi)

        # Inserisci le feature nel DataFrame
        hrv_features["PPG_Amplitude"] = amp_mean
        hrv_features["PPG_RiseTime"] = rise_time_mean
        hrv_features["PPG_Duration"] = duration_mean
        hrv_features["label"] = label
        hrv_features["subject"] = subject
        hrv_features["phase"] = phase
        hrv_features["session"] = session

        features_list.append(hrv_features)

    except Exception as e:
        print(f"Errore nella finestra {start}-{start + window_size}: {e}")

# Combina tutte le feature in un unico DataFrame
bvp_features = pd.concat(features_list, ignore_index=True) if features_list else pd.DataFrame()

In [None]:
print("Shape of BVP Features:",bvp_features.shape)
bvp_features.head()

Shape of BVP Features: (1977, 90)


Unnamed: 0,HRV_MeanNN,HRV_SDNN,HRV_SDANN1,HRV_SDNNI1,HRV_SDANN2,HRV_SDNNI2,HRV_SDANN5,HRV_SDNNI5,HRV_RMSSD,HRV_SDSD,...,HRV_KFD,HRV_LZC,PPG_Amplitude,PPG_RiseTime,PPG_Duration,label,subject,phase,session,HRV_DFA_alpha2
0,774.465461,241.082297,,,,,,,363.338649,365.727612,...,3.505378,1.315353,11.754638,26.448333,0.805321,0,P005,RELAX,PRE,
1,761.160714,227.699015,,,,,,,327.306506,329.473459,...,3.557895,1.139416,8.777484,10.878495,0.792083,0,P005,RELAX,PRE,
2,762.378247,227.041769,,,,,,,304.170445,306.154532,...,2.784,1.302189,8.843158,20.045847,0.784792,0,P005,RELAX,PRE,
3,763.392857,196.316732,,,,,,,260.292597,262.012245,...,3.367048,1.139416,7.955898,20.556818,0.773438,0,P005,RELAX,PRE,
4,746.594551,165.391042,,,,,,,220.122664,221.556692,...,5.517238,1.128149,7.331028,8.75,0.788035,0,P005,RELAX,PRE,


In [None]:
bvp_features = bvp_features.replace([np.inf, -np.inf], np.nan)

In [None]:
bvp_features.isna().sum()

Unnamed: 0,0
HRV_MeanNN,0
HRV_SDNN,0
HRV_SDANN1,1977
HRV_SDNNI1,1977
HRV_SDANN2,1977
HRV_SDNNI2,1977
HRV_SDANN5,1977
HRV_SDNNI5,1977
HRV_RMSSD,0
HRV_SDSD,0


In [None]:
# Lista delle feature WESAD da mantenere
wesad_features = [
    'HRV_MeanNN', 'HRV_SDNN', 'HRV_MedianNN', 'HRV_MadNN',
    'HRV_SDRMSSD', 'HRV_Prc20NN', 'HRV_pNN50', 'HRV_MinNN',
    'HRV_HTI', 'HRV_TINN', 'HRV_MFDFA_alpha1_Max', 'HRV_MFDFA_alpha1_Fluctuation',
    'HRV_SampEn', 'HRV_FuzzyEn', 'HRV_MSEn', 'HRV_CD',
    'HRV_HFD', 'HRV_KFD', 'HRV_LZC', 'PPG_Amplitude',
    'PPG_Duration', 'SCR_Peaks_N', 'SCR_Peaks_Amplitude_Mean',
    'AccMag_Mean', 'AccMag_Std', 'AccMag_IQR', 'AccMag_Skew', 'AccMag_Kurtosis',
    'Temp_Mean', 'Temp_Std', 'Temp_Slope'
]

# Funzione di filtro
def filter_features(df, keep_features, exclude_cols=None):
    if exclude_cols is None:
        exclude_cols = []
    filtered_cols = [col for col in df.columns if col in keep_features or col in exclude_cols]
    return df[filtered_cols]

In [None]:
# Esempio di utilizzo su bvp_features
exclude_columns = ['label', 'subject', 'phase', 'session']
bvp_features = filter_features(bvp_features, wesad_features, exclude_columns)

In [None]:
for col in bvp_features.columns:
  print(col)

HRV_MeanNN
HRV_SDNN
HRV_MedianNN
HRV_MadNN
HRV_SDRMSSD
HRV_Prc20NN
HRV_pNN50
HRV_MinNN
HRV_HTI
HRV_TINN
HRV_MFDFA_alpha1_Max
HRV_MFDFA_alpha1_Fluctuation
HRV_SampEn
HRV_FuzzyEn
HRV_MSEn
HRV_CD
HRV_HFD
HRV_KFD
HRV_LZC
PPG_Amplitude
PPG_Duration
label
subject
phase
session


In [None]:
bvp_features.isna().sum()

Unnamed: 0,0
HRV_MeanNN,0
HRV_SDNN,0
HRV_MedianNN,0
HRV_MadNN,0
HRV_SDRMSSD,0
HRV_Prc20NN,0
HRV_pNN50,0
HRV_MinNN,0
HRV_HTI,0
HRV_TINN,0


In [None]:
bvp_features = bvp_features.fillna(bvp_features.median(numeric_only = True))

In [None]:
sum(bvp_features.isna().sum())

0

In [None]:
print("Final Shape of BVP Features:", bvp_features.shape)
bvp_features.head()

Final Shape of BVP Features: (1977, 25)


Unnamed: 0,HRV_MeanNN,HRV_SDNN,HRV_MedianNN,HRV_MadNN,HRV_SDRMSSD,HRV_Prc20NN,HRV_pNN50,HRV_MinNN,HRV_HTI,HRV_TINN,...,HRV_CD,HRV_HFD,HRV_KFD,HRV_LZC,PPG_Amplitude,PPG_Duration,label,subject,phase,session
0,774.465461,241.082297,726.5625,162.159375,0.66352,609.375,84.210526,312.5,12.666667,359.375,...,1.912942,2.017399,3.505378,1.315353,11.754638,0.805321,0,P005,RELAX,PRE
1,761.160714,227.699015,718.75,138.99375,0.695675,612.5,84.415584,312.5,11.0,390.625,...,1.902365,1.983479,3.557895,1.139416,8.777484,0.792083,0,P005,RELAX,PRE
2,762.378247,227.041769,703.125,162.159375,0.746429,612.5,85.714286,312.5,11.0,390.625,...,1.826509,1.981984,2.784,1.302189,8.843158,0.784792,0,P005,RELAX,PRE
3,763.392857,196.316732,750.0,138.99375,0.754216,628.125,80.519481,328.125,15.4,343.75,...,1.82669,1.999609,3.367048,1.139416,7.955898,0.773438,0,P005,RELAX,PRE
4,746.594551,165.391042,750.0,162.159375,0.751359,625.0,78.205128,328.125,15.6,343.75,...,2.355539,1.973491,5.517238,1.128149,7.331028,0.788035,0,P005,RELAX,PRE


In [None]:
eda_raw = df["EDA"].values

fs = 64

eda_signals, eda_info = nk.eda_process(eda_raw, sampling_rate=fs)

eda_clean = eda_signals["EDA_Clean"]

df['EDA_CLEAN'] = eda_clean

In [None]:
# Parametri
fs = 64  # Frequenza di campionamento
window_sec = 60
overlap = 0.75
window_size = int(fs * window_sec)
step_size = int(window_size * (1 - overlap))

# Lista per raccogliere le feature
eda_features_list = []

# Scorri per finestra
for start in range(0, len(df) - window_size, step_size):
    window_signal = df["EDA_CLEAN"].iloc[start:start + window_size].values
    labels = df["label"].iloc[start:start + window_size].values
    subject = df["subject"].iloc[start]
    phase = df["phase"].iloc[start]
    session = df["session"].iloc[start]

    # Conta le etichette nella finestra
    counts = np.bincount(labels.astype(int), minlength=2)
    total = counts.sum()
    dominant_label = np.argmax(counts)
    dominant_ratio = counts[dominant_label] / total

    # Scarta finestre ambigue (<70% di predominanza)
    if dominant_ratio < 0.7:
        continue

    label = dominant_label

    try:
        # Estrai caratteristiche EDA da finestra
        eda_signals, eda_info = nk.eda_peaks(window_signal, sampling_rate=fs)
        features = nk.eda_intervalrelated(eda_signals, sampling_rate=fs)

        # Aggiungi metadati
        features["label"] = label
        features["subject"] = subject
        features["phase"] = phase
        features["session"] = session

        eda_features_list.append(features)

    except Exception as e:
        print(f"Errore nella finestra {start}-{start + window_size}: {e}")

# Combina tutte le feature in un unico DataFrame
if eda_features_list:
    eda_features = pd.concat(eda_features_list, ignore_index=True)
else:
    print("Nessuna finestra valida.")
    eda_features = pd.DataFrame()

In [None]:
print("Shape of EDA features:", eda_features.shape)
eda_features.head()

Shape of EDA features: (1977, 9)


Unnamed: 0,SCR_Peaks_N,SCR_Peaks_Amplitude_Mean,EDA_Sympathetic,EDA_SympatheticN,EDA_Autocorrelation,label,subject,phase,session
0,37.0,0.004352,,,,0,P005,RELAX,PRE
1,36.0,0.004107,,,,0,P005,RELAX,PRE
2,35.0,0.003905,,,,0,P005,RELAX,PRE
3,38.0,0.004363,,,,0,P005,RELAX,PRE
4,41.0,0.004438,,,,0,P005,RELAX,PRE


In [None]:
eda_features.isna().sum()

Unnamed: 0,0
SCR_Peaks_N,0
SCR_Peaks_Amplitude_Mean,0
EDA_Sympathetic,1977
EDA_SympatheticN,1977
EDA_Autocorrelation,1977
label,0
subject,0
phase,0
session,0


In [None]:
# Lista delle feature WESAD da mantenere
wesad_features = [
    'HRV_MeanNN', 'HRV_SDNN', 'HRV_MedianNN', 'HRV_MadNN',
    'HRV_SDRMSSD', 'HRV_Prc20NN', 'HRV_pNN50', 'HRV_MinNN',
    'HRV_HTI', 'HRV_TINN', 'HRV_MFDFA_alpha1_Max', 'HRV_MFDFA_alpha1_Fluctuation',
    'HRV_SampEn', 'HRV_FuzzyEn', 'HRV_MSEn', 'HRV_CD',
    'HRV_HFD', 'HRV_KFD', 'HRV_LZC', 'PPG_Amplitude',
    'PPG_Duration', 'SCR_Peaks_N', 'SCR_Peaks_Amplitude_Mean',
    'AccMag_Mean', 'AccMag_Std', 'AccMag_IQR', 'AccMag_Skew', 'AccMag_Kurtosis',
    'Temp_Mean', 'Temp_Std', 'Temp_Slope'
]

# Funzione di filtro
def filter_features(df, keep_features, exclude_cols=None):
    if exclude_cols is None:
        exclude_cols = []
    filtered_cols = [col for col in df.columns if col in keep_features or col in exclude_cols]
    return df[filtered_cols]

In [None]:
# Esempio di utilizzo su bvp_features
exclude_columns = ['label', 'subject', 'phase', 'session']
eda_features = filter_features(eda_features, wesad_features, exclude_columns)

In [None]:
for col in eda_features.columns:
  print(col)

SCR_Peaks_N
SCR_Peaks_Amplitude_Mean
label
subject
phase
session


In [None]:
eda_features.isna().sum()

Unnamed: 0,0
SCR_Peaks_N,0
SCR_Peaks_Amplitude_Mean,0
label,0
subject,0
phase,0
session,0


In [None]:
print("Final Shape of EDA Features:", eda_features.shape)
eda_features.head()

Final Shape of EDA Features: (1977, 6)


Unnamed: 0,SCR_Peaks_N,SCR_Peaks_Amplitude_Mean,label,subject,phase,session
0,37.0,0.004352,0,P005,RELAX,PRE
1,36.0,0.004107,0,P005,RELAX,PRE
2,35.0,0.003905,0,P005,RELAX,PRE
3,38.0,0.004363,0,P005,RELAX,PRE
4,41.0,0.004438,0,P005,RELAX,PRE


In [None]:
def butter_highpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    return b, a

def butter_lowpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a

In [None]:
def acc_denoise(acc_signal, fs):

    b_high, a_high = butter_highpass(0.5, fs)
    acc_signal = filtfilt(b_high, a_high, acc_signal)

    b_low, a_low = butter_lowpass(20.0, fs)
    acc_signal = filtfilt(b_low, a_low, acc_signal)

    return acc_signal

In [None]:
df['ACC_MAG'] = np.sqrt(df['ACC1']**2+df['ACC2']**2+df['ACC3']**2)
df = df.drop(columns = ['ACC1', 'ACC2', 'ACC3'], axis = 1)

In [None]:
fs = 64
df['ACC_MAG_CLEAN'] = acc_denoise(df['ACC_MAG'].values, fs)

In [None]:
# Parametri
fs = 64  # frequenza di campionamento
window_sec = 60
overlap = 0.75
window_size = int(fs * window_sec)
step_size = int(window_size * (1 - overlap))

# Lista per salvare le feature
acc_features_list = []

# Estrai feature per ciascun soggetto
for subject_id in df["subject"].unique():
    print(f"Elaborazione soggetto {subject_id}")
    df_s = df[df["subject"] == subject_id].reset_index(drop=True)

    for start in range(0, len(df_s) - window_size + 1, step_size):
        end = start + window_size
        window = df_s["ACC_MAG_CLEAN"].iloc[start:end].values
        labels = df_s["label"].iloc[start:start + window_size].values
        phases = df_s["phase"].iloc[start:start + window_size].values
        session = df_s["session"].iloc[start]

        if len(window) == 0:
            continue

        # Conta le etichette nella finestra
        counts = np.bincount(labels.astype(int), minlength=2)
        total = counts.sum()
        dominant_label = np.argmax(counts)
        dominant_ratio = counts[dominant_label] / total

        # Scarta la finestra se nessuna classe è predominante ≥ 70%
        if dominant_ratio < 0.7:
            continue

        label = dominant_label
        protocol_phase = pd.Series(phases).mode()[0]

        try:
            features = {
                "AccMag_Mean": np.mean(window),
                "AccMag_Std": np.std(window),
                "AccMag_Max": np.max(window),
                "AccMag_Min": np.min(window),
                "AccMag_Range": np.max(window) - np.min(window),
                "AccMag_Median": np.median(window),
                "AccMag_IQR": np.percentile(window, 75) - np.percentile(window, 25),
                "AccMag_Skew": scipy.stats.skew(window),
                "AccMag_Kurtosis": scipy.stats.kurtosis(window),
                "label": label,
                "subject": subject_id,
                "phase": protocol_phase,  # usa protocol_phase modalità se necessario
                "session": session
            }
            acc_features_list.append(features)

        except Exception as e:
            print(f"Errore finestra {start}-{end}: {e}")
            continue

# Combina le feature in un DataFrame
acc_features = pd.DataFrame(acc_features_list)

Elaborazione soggetto P005
Elaborazione soggetto P008
Elaborazione soggetto P023
Elaborazione soggetto P032
Elaborazione soggetto P035
Elaborazione soggetto P037
Elaborazione soggetto P038
Elaborazione soggetto P041
Elaborazione soggetto P043
Elaborazione soggetto P044
Elaborazione soggetto P046
Elaborazione soggetto P047
Elaborazione soggetto P049
Elaborazione soggetto P058
Elaborazione soggetto P062
Elaborazione soggetto P065
Elaborazione soggetto P071


In [None]:
print("Shape of ACC Features", acc_features.shape)
acc_features.head()

Shape of ACC Features (1945, 13)


Unnamed: 0,AccMag_Mean,AccMag_Std,AccMag_Max,AccMag_Min,AccMag_Range,AccMag_Median,AccMag_IQR,AccMag_Skew,AccMag_Kurtosis,label,subject,phase,session
0,-0.006293,4.48604,54.849013,-38.144843,92.993857,-0.066113,1.28271,1.657764,30.981339,0,P005,RELAX,PRE
1,-0.00014,4.799695,54.849013,-32.159447,87.00846,-0.072885,1.483798,1.857713,27.438934,0,P005,RELAX,PRE
2,-0.001071,4.273247,44.474567,-32.159447,76.634014,-0.039412,1.489626,1.614605,28.032515,0,P005,RELAX,PRE
3,-0.001218,2.900711,44.474567,-24.412122,68.886689,-0.023265,0.877292,1.81705,49.947262,0,P005,RELAX,PRE
4,0.002359,2.429859,44.474567,-24.412122,68.886689,-0.02817,0.43483,3.031965,93.492645,0,P005,RELAX,PRE


In [None]:
# Esempio di utilizzo su bvp_features
exclude_columns = ['label', 'subject', 'phase', 'session']
acc_features = filter_features(acc_features, wesad_features, exclude_columns)

In [None]:
for col in acc_features.columns:
  print(col)

AccMag_Mean
AccMag_Std
AccMag_IQR
AccMag_Skew
AccMag_Kurtosis
label
subject
phase
session


In [None]:
acc_features.isna().sum()

Unnamed: 0,0
AccMag_Mean,0
AccMag_Std,0
AccMag_IQR,0
AccMag_Skew,0
AccMag_Kurtosis,0
label,0
subject,0
phase,0
session,0


In [None]:
print("Final Shape of ACC Features:", acc_features.shape)
acc_features.head()

Final Shape of ACC Features: (1945, 9)


Unnamed: 0,AccMag_Mean,AccMag_Std,AccMag_IQR,AccMag_Skew,AccMag_Kurtosis,label,subject,phase,session
0,-0.006293,4.48604,1.28271,1.657764,30.981339,0,P005,RELAX,PRE
1,-0.00014,4.799695,1.483798,1.857713,27.438934,0,P005,RELAX,PRE
2,-0.001071,4.273247,1.489626,1.614605,28.032515,0,P005,RELAX,PRE
3,-0.001218,2.900711,0.877292,1.81705,49.947262,0,P005,RELAX,PRE
4,0.002359,2.429859,0.43483,3.031965,93.492645,0,P005,RELAX,PRE


In [None]:
def temp_denoise(temp_signal, fs):
  b_low, a_low = butter_lowpass(0.5, fs)
  temp_signal = filtfilt(b_low, a_low, temp_signal)
  return temp_signal

In [None]:
fs = 64
df['TEMP_CLEAN'] = temp_denoise(df['TEMP'].values, fs)

In [None]:
# Parametri
fs = 64  # frequenza di campionamento
window_sec = 60
overlap = 0.75
window_size = int(window_sec * fs)
step_size = int(window_size * (1 - overlap))

# Lista per raccogliere le feature
features_list = []

# Estrazione feature per ogni finestra
for start in range(0, len(df) - window_size + 1, step_size):
    end = start + window_size
    window_signal = df["TEMP_CLEAN"].iloc[start:end].values
    window_labels = df["label"].iloc[start:end].values
    window_phases = df["phase"].iloc[start:end].values
    window_subject = df["subject"].iloc[start]
    window_session = df["session"].iloc[start]

    # Conta le etichette nella finestra
    counts = np.bincount(window_labels.astype(int), minlength=2)
    total = counts.sum()
    dominant_label = np.argmax(counts)
    dominant_ratio = counts[dominant_label] / total

    # Scarta la finestra se non c'è predominanza ≥ 70%
    if dominant_ratio < 0.7:
        continue

    label = dominant_label
    protocol_phase = pd.Series(window_phases).mode()[0]

    try:
        mean_val = np.mean(window_signal)
        std_val = np.std(window_signal)
        min_val = np.min(window_signal)
        max_val = np.max(window_signal)
        slope = np.polyfit(np.arange(len(window_signal)), window_signal, 1)[0]

        features = {
            "Temp_Mean": mean_val,
            "Temp_Std": std_val,
            "Temp_Min": min_val,
            "Temp_Max": max_val,
            "Temp_Slope": slope,
            "label": label,
            "subject": window_subject,
            "phase": protocol_phase,
            "session": window_session
        }

        features_list.append(features)

    except Exception as e:
        print(f"Errore nella finestra {start}-{end}: {e}")
        continue

# Combina tutte le feature in un unico DataFrame
if features_list:
    temp_features = pd.DataFrame(features_list)
else:
    print("Nessuna finestra valida.")
    temp_features = pd.DataFrame()

In [None]:
print("Shape of TEMP Features", temp_features.shape)
temp_features.head()

Shape of TEMP Features (1977, 9)


Unnamed: 0,Temp_Mean,Temp_Std,Temp_Min,Temp_Max,Temp_Slope,label,subject,phase,session
0,32.051672,0.008743,32.026621,32.071759,1e-06,0,P005,RELAX,PRE
1,32.05567,0.011645,32.032514,32.088154,7e-06,0,P005,RELAX,PRE
2,32.056677,0.012085,32.030968,32.088154,3e-06,0,P005,RELAX,PRE
3,32.053362,0.016861,32.005182,32.088154,-8e-06,0,P005,RELAX,PRE
4,32.043338,0.024214,31.992677,32.088154,-1.8e-05,0,P005,RELAX,PRE


In [None]:
# Esempio di utilizzo su bvp_features
exclude_columns = ['label', 'subject', 'phase', 'session']
temp_features = filter_features(temp_features, wesad_features, exclude_columns)

In [None]:
for col in temp_features.columns:
  print(col)

Temp_Mean
Temp_Std
Temp_Slope
label
subject
phase
session


In [None]:
temp_features.isna().sum()

Unnamed: 0,0
Temp_Mean,0
Temp_Std,0
Temp_Slope,0
label,0
subject,0
phase,0
session,0


In [None]:
print("Final Shape of TEMP Features:", temp_features.shape)
temp_features.head()

Final Shape of TEMP Features: (1977, 7)


Unnamed: 0,Temp_Mean,Temp_Std,Temp_Slope,label,subject,phase,session
0,32.051672,0.008743,1e-06,0,P005,RELAX,PRE
1,32.05567,0.011645,7e-06,0,P005,RELAX,PRE
2,32.056677,0.012085,3e-06,0,P005,RELAX,PRE
3,32.053362,0.016861,-8e-06,0,P005,RELAX,PRE
4,32.043338,0.024214,-1.8e-05,0,P005,RELAX,PRE


In [None]:
print("Final Shape of BVP Features:", bvp_features.shape)
print("Final Shape of EDA Features:", eda_features.shape)
print("Final Shape of ACC Features:", acc_features.shape)
print("Final Shape of TEMP Features:", temp_features.shape)

Final Shape of BVP Features: (1977, 25)
Final Shape of EDA Features: (1977, 6)
Final Shape of ACC Features: (1945, 9)
Final Shape of TEMP Features: (1977, 7)


In [None]:
# Trova la lunghezza minima
min_len = min(len(bvp_features), len(eda_features), len(acc_features), len(temp_features))

# Troncamento e reindicizzazione
bvp_trimmed = bvp_features.iloc[:min_len].reset_index(drop=True)
eda_trimmed = eda_features.iloc[:min_len].reset_index(drop=True)
acc_trimmed = acc_features.iloc[:min_len].reset_index(drop=True)
temp_trimmed = temp_features.iloc[:min_len].reset_index(drop=True)

# Verifica che le colonne label e subject siano allineate
# Se presenti in ogni dataframe, tienile solo da uno (es: da bvp_trimmed)
eda_trimmed = eda_trimmed.drop(columns=['label', 'subject', 'phase', 'session'], errors='ignore')
acc_trimmed = acc_trimmed.drop(columns=['label', 'subject', 'phase', 'session'], errors='ignore')
temp_trimmed = temp_trimmed.drop(columns=['label', 'subject', 'phase', 'session'], errors='ignore')

# Concatenazione finale
all_features = pd.concat([bvp_trimmed, eda_trimmed, acc_trimmed, temp_trimmed], axis=1)

# Controllo dimensioni
print("Final shape of concatenated dataset:", all_features.shape)

Final shape of concatenated dataset: (1945, 35)


In [None]:
all_features.head()

Unnamed: 0,HRV_MeanNN,HRV_SDNN,HRV_MedianNN,HRV_MadNN,HRV_SDRMSSD,HRV_Prc20NN,HRV_pNN50,HRV_MinNN,HRV_HTI,HRV_TINN,...,SCR_Peaks_N,SCR_Peaks_Amplitude_Mean,AccMag_Mean,AccMag_Std,AccMag_IQR,AccMag_Skew,AccMag_Kurtosis,Temp_Mean,Temp_Std,Temp_Slope
0,774.465461,241.082297,726.5625,162.159375,0.66352,609.375,84.210526,312.5,12.666667,359.375,...,37.0,0.004352,-0.006293,4.48604,1.28271,1.657764,30.981339,32.051672,0.008743,1e-06
1,761.160714,227.699015,718.75,138.99375,0.695675,612.5,84.415584,312.5,11.0,390.625,...,36.0,0.004107,-0.00014,4.799695,1.483798,1.857713,27.438934,32.05567,0.011645,7e-06
2,762.378247,227.041769,703.125,162.159375,0.746429,612.5,85.714286,312.5,11.0,390.625,...,35.0,0.003905,-0.001071,4.273247,1.489626,1.614605,28.032515,32.056677,0.012085,3e-06
3,763.392857,196.316732,750.0,138.99375,0.754216,628.125,80.519481,328.125,15.4,343.75,...,38.0,0.004363,-0.001218,2.900711,0.877292,1.81705,49.947262,32.053362,0.016861,-8e-06
4,746.594551,165.391042,750.0,162.159375,0.751359,625.0,78.205128,328.125,15.6,343.75,...,41.0,0.004438,0.002359,2.429859,0.43483,3.031965,93.492645,32.043338,0.024214,-1.8e-05


In [None]:
# Percorso per formato pickle
save_path_csv = "/content/drive/MyDrive/CROSS TEST/DATASET/VERBIO_E4_60SEC_75OL.csv"

# Salvataggio
all_features.to_csv(save_path_csv, index=False)

print(f"Dataset salvato in formato pickle in: {save_path_csv}")

Dataset salvato in formato pickle in: /content/drive/MyDrive/CROSS TEST/DATASET/VERBIO_E4_60SEC_75OL.csv
