In [None]:
import joblib
import numpy as np 
import mne
import numpy as np
from scipy.stats import skew, kurtosis
from mne.preprocessing import ICA
from mne.time_frequency import psd_array_welch
import pandas as pd
from scipy.fftpack import fft
import pywt  # For wavelet transform

In [None]:
def preprocess_eeg_data(vhdr_file_path, l_freq=1.0, h_freq=40.0, notch_freq=50):
    raw = mne.io.read_raw_brainvision(vhdr_file_path, preload=True)
    eog_channels = ['VPVA', 'VNVB', 'HPHL', 'HNHR']
    raw.set_channel_types({ch: 'eog' for ch in eog_channels if ch in raw.ch_names})
    raw.notch_filter(freqs=[notch_freq], picks='eeg')
    raw.filter(l_freq=l_freq, h_freq=h_freq, picks='eeg')
    raw.set_eeg_reference('average', projection=True)
    
    # ICA for artifact removal
    ica = ICA(n_components=20, random_state=97, max_iter=800)
    ica.fit(raw)
    eog_indices, _ = ica.find_bads_eog(raw)
    ica.exclude = eog_indices
    raw = ica.apply(raw)
    
    return raw

In [None]:
def extract_channel_features(raw, fmin=0.5, fmax=50):
    # Select only EEG channels
    raw.pick_types(eeg=True)  # This removes non-EEG channels
    data = raw.get_data()
    channel_names = raw.ch_names
    features = {ch: {} for ch in channel_names}

    # Time-domain features
    for i, ch in enumerate(channel_names):
        features[ch]['mean'] = np.mean(data[i])
        features[ch]['variance'] = np.var(data[i])
        features[ch]['skewness'] = skew(data[i])
        features[ch]['kurtosis'] = kurtosis(data[i])
        features[ch]['peak_to_peak'] = np.ptp(data[i])

        # Fourier Transform (FFT)
        fft_values = np.abs(fft(data[i]))
        features[ch]['fft_mean'] = np.mean(fft_values)
        features[ch]['fft_std'] = np.std(fft_values)
        features[ch]['fft_max'] = np.max(fft_values)

        # Wavelet Transform (DWT) using Daubechies wavelet (db4) #morle
        coeffs = pywt.wavedec(data[i], 'db4', level=4)
        features[ch]['wavelet_energy'] = sum(np.sum(np.square(c)) for c in coeffs)
        features[ch]['wavelet_entropy'] = 0  # Initialize wavelet_entropy
        
        for c in coeffs:
            c = c[np.isfinite(c)]
            c_norm = c / (np.sum(np.abs(c)) + 1e-10)
            features[ch]['wavelet_entropy'] += -np.sum(c_norm * np.log2(c_norm + 1e-10))

    # Frequency-domain features using PSD
    psd = raw.compute_psd(method='welch', fmin=fmin, fmax=fmax, n_fft=2048)
    psd_data = psd.get_data()
    freqs = psd.freqs
    psd_df = pd.DataFrame(psd_data, columns=freqs, index=channel_names)

    bands = {'delta': (0.5, 4), 'theta': (4, 8), 'slow_alpha': (6, 9), 'alpha': (8, 12),
             'beta': (12, 30), 'gamma': (30, 50)}

    for band, (low, high) in bands.items():
        band_power = psd_df.loc[:, (freqs >= low) & (freqs <= high)].mean(axis=1)
        for ch in channel_names:
            features[ch][f'{band}_power'] = band_power[ch]

    # Frontal Alpha Asymmetry (F3-F4)
    if 'F3' in channel_names and 'F4' in channel_names:
        features['F3_F4_alpha_asymmetry'] = features['F4']['alpha_power'] - features['F3']['alpha_power']

    # Convert features to DataFrame
    features_df = pd.DataFrame(features).T

    return features_df


In [None]:
def process_and_combine(eo_file_path, ec_file_path, output_file):
    all_features = []

    # Process EO file
    raw_eo = preprocess_eeg_data(eo_file_path)
    features_eo = extract_channel_features(raw_eo)
    #features_eo['condition'] = 'EO'
    all_features.append(features_eo)

    # Process EC file
    raw_ec = preprocess_eeg_data(ec_file_path)
    features_ec = extract_channel_features(raw_ec)
    #features_ec['condition'] = 'EC'
    all_features.append(features_ec)

    # Combine EO and EC features
    combined_features = pd.concat(all_features, keys=['EO', 'EC'], names=['condition', 'channel'])
    
    # Save combined features to a single CSV file
    combined_features.to_csv(output_file)
    print(f"Features successfully saved to {output_file}")
    # return combined_features

In [None]:
process_and_combine("../ibrain_dataset/mdds/sub-87999321/ses-1/eeg/sub-87999321_ses-1_task-restEO_eeg.vhdr","../ibrain_dataset/mdds/sub-87999321/ses-1/eeg/sub-87999321_ses-1_task-restEC_eeg.vhdr","preprocessed.csv")

In [None]:
data = pd.read_csv("preprocessed.csv")
data.head()

In [None]:
data.columns

In [None]:
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder


# Modified Function to Load and Preprocess a Single CSV File
def load_and_preprocess_single_csv(df):
    """
    Loads and preprocesses a single CSV file for prediction.

    Args:
        file_path (str): The path to the CSV file.

    Returns:
        pandas.DataFrame: Preprocessed DataFrame.
    """
    

    # Handle categorical data
    for col in df.select_dtypes(include=['object']).columns:
        df[col] = LabelEncoder().fit_transform(df[col])

    # Handle missing values
    df.fillna(df.median(), inplace=True)

    return df
def predict(eo_path, ec_path):
    # Load and Preprocess the Single CSV File
    outut_file = "preprocessed.csv"
    process_and_combine(eo_path, ec_path, outut_file)
    csv_data = pd.read_csv(outut_file)
    X_predict = load_and_preprocess_single_csv(csv_data)
    # Load Pre-trained Model and Scaler
    
    best_model = joblib.load("../model_weights/xgb_model.pkl")
    scaler = joblib.load("../scaler/xgb_scaler.pkl")  # Load the same scaler used during training
    # Apply Same Scaling as Training Data
    X_predict_scaled = scaler.transform(X_predict)

    # Predict on the Single CSV Data
    prediction = best_model.predict(X_predict_scaled)
    predicted_class = prediction[0]

    if predicted_class == 0:
        prediction_label = "Healthy (0)"
    elif predicted_class == 1:
        prediction_label = "MDD (1)"
    else:
        prediction_label = "Unknown" # Should not happen in binary classification, but for robustness

    print(f"Class: {predicted_class} - {prediction_label}")

    try:
        probability = best_model.predict_proba(X_predict_scaled)[0] # Probabilities for both classes
        probability_healthy = probability[0] * 100
        probability_mdd = probability[1] * 100
        print(f"Probability of being Healthy: {probability_healthy:.2f}%")
        print(f"Probability of having MDD: {probability_mdd:.2f}%")
    except AttributeError:
        print("Prediction probabilities are not available for this model.")
    except Exception as e:
        print(f"Error getting prediction probabilities: {e}")


In [81]:
predict("../ibrain_dataset/mdds/sub-87999321/ses-1/eeg/sub-87999321_ses-1_task-restEO_eeg.vhdr","../ibrain_dataset/mdds/sub-87999321/ses-1/eeg/sub-87999321_ses-1_task-restEC_eeg.vhdr")
print("\n Prediction process completed.")

Extracting parameters from ../ibrain_dataset/mdds/sub-87999321/ses-1/eeg/sub-87999321_ses-1_task-restEO_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 59990  =      0.000 ...   119.980 secs...
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 49 - 51 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 49.38
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 49.12 Hz)
- Upper passband edge: 50.62 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 50.88 Hz)
- Filter length: 3301 samples (6.602 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin

[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.0s


Fitting ICA took 1.0s.
Using EOG channels: VPVA, VNVB, HPHL, HNHR
... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 5000 samples (10.000 s)

... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.5

[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.0s


... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 5000 samples (10.000 s)

... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 5000 sam

[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.0s


Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 5000 samples (10.000 s)

Applying ICA to Raw instance
    Transforming to ICA space (20 components)
    Zeroing out 1 ICA component
    Projecting back using 29 PCA components
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


  features[ch]['wavelet_entropy'] += -np.sum(c_norm * np.log2(c_norm + 1e-10))


Effective window size : 4.096 (s)
Extracting parameters from ../ibrain_dataset/mdds/sub-87999321/ses-1/eeg/sub-87999321_ses-1_task-restEC_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 60014  =      0.000 ...   120.028 secs...
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 49 - 51 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 49.38
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 49.12 Hz)
- Upper passband edge: 50.62 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 50.88 Hz)
- Filter length: 3301 samples (6.602 s)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- W

[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.0s


Fitting ICA took 1.0s.
Using EOG channels: VPVA, VNVB, HPHL, HNHR
... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 5000 samples (10.000 s)

... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.5

[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.0s


- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 5000 samples (10.000 s)

... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 5000 samples (10.000 s)

... filtering ICA sources
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design 

[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.0s


... filtering target
Setting up band-pass filter from 1 - 10 Hz

FIR filter parameters
---------------------
Designing a two-pass forward and reverse, zero-phase, non-causal bandpass filter:
- Windowed frequency-domain design (firwin2) method
- Hann window
- Lower passband edge: 1.00
- Lower transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 0.75 Hz)
- Upper passband edge: 10.00 Hz
- Upper transition bandwidth: 0.50 Hz (-12 dB cutoff frequency: 10.25 Hz)
- Filter length: 5000 samples (10.000 s)

Applying ICA to Raw instance
    Transforming to ICA space (20 components)
    Zeroing out 2 ICA components
    Projecting back using 29 PCA components
NOTE: pick_types() is a legacy function. New code should use inst.pick(...).


  features[ch]['wavelet_entropy'] += -np.sum(c_norm * np.log2(c_norm + 1e-10))


Effective window size : 4.096 (s)
Features successfully saved to preprocessed.csv
Class: 1 - MDD (1)
Probability of being Healthy: 33.44%
Probability of having MDD: 66.56%

 Prediction process completed.
