### Task 1 - Preproccess the data

In [None]:
!pip install mne

In [None]:
import numpy as np
import mne
from scipy.io import loadmat
# from scipy.signal import welch
from scipy.stats import ttest_ind
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [None]:
# Load the EEG data from .mat files
healthy_data1 = loadmat('/content/H1.mat')
healthy_data2 = loadmat('/content/H2.mat')
healthy_data3 = loadmat('/content/H3.mat')

schizophrenia_data1 = loadmat('/content/Sch1.mat')
schizophrenia_data2 = loadmat('/content/Sch2.mat')
schizophrenia_data3 = loadmat('/content/Sch3.mat')

OSError: ignored

In [None]:
# Create MNE Raw objects
sfreq = 250
CH_NUMBER = 19

healthy_raw1 = mne.io.RawArray(healthy_data1.get('H1'), mne.create_info(CH_NUMBER, sfreq, ch_types='eeg'))
healthy_raw2 = mne.io.RawArray(healthy_data2.get('H2'), mne.create_info(CH_NUMBER, sfreq, ch_types='eeg'))
healthy_raw3 = mne.io.RawArray(healthy_data3.get('H3'), mne.create_info(CH_NUMBER, sfreq, ch_types='eeg'))

schizophrenia_raw1 = mne.io.RawArray(schizophrenia_data1.get('Sch1'), mne.create_info(CH_NUMBER, sfreq, ch_types='eeg'))
schizophrenia_raw2 = mne.io.RawArray(schizophrenia_data2.get('Sch2'), mne.create_info(CH_NUMBER, sfreq, ch_types='eeg'))
schizophrenia_raw3 = mne.io.RawArray(schizophrenia_data3.get('Sch3'), mne.create_info(CH_NUMBER, sfreq, ch_types='eeg'))

# Concatenate the datasets
healthy_raw = mne.concatenate_raws([healthy_raw1, healthy_raw2, healthy_raw3])
schizophrenia_raw = mne.concatenate_raws([schizophrenia_raw1, schizophrenia_raw2, schizophrenia_raw3])

# Apply bandpass filter to remove high-frequency noise
healthy_raw.filter(l_freq=1, h_freq=50)  # Adjust the frequency range as needed
schizophrenia_raw.filter(l_freq=1, h_freq=50)

# Remove power line interference (50 Hz)
healthy_raw.notch_filter(freqs=50)
schizophrenia_raw.notch_filter(freqs=50)


### Task 2 - Segment the data into 2 second

In [None]:
# Divide into 2-second segments
duration = 2  # in seconds
healthy_epochs = mne.make_fixed_length_epochs(healthy_raw, duration=duration)
schizophrenia_epochs = mne.make_fixed_length_epochs(schizophrenia_raw, duration=duration)

### Task 3 - Extract
- Mean
- Standard derivation
- total signal power
- Alpha band power
- Beta band power
- theta band power
- fourier series coefficients

for each segment

In [None]:

# Define frequency bands
freq_bands = {'theta': (4, 8),
              'alpha': (8, 13),
              'beta': (13, 30)}

# Function to calculate band power
def calculate_band_power(data, sfreq, freq_band):
    psd, freqs = mne.time_frequency.psd_array_welch(data, sfreq=sfreq, fmin=freq_band[0], fmax=freq_band[1])
    return np.sum(psd, axis=-1)

# Function to extract features from each segment
def extract_features(epochs):
    global sfreq
    features = []

    for epoch in epochs:
        data = epoch  # Get the EEG data for the epoch
        sfreq = sfreq

        # Calculate mean and standard deviation
        mean = np.mean(data, axis=-1, keepdims=True)
        std = np.std(data, axis=-1, keepdims=True)

        # Calculate total signal power
        total_power = np.sum(data ** 2, axis=-1, keepdims=True)

        # Calculate band powers
        alpha_power = calculate_band_power(data, sfreq, freq_bands['alpha'])
        beta_power = calculate_band_power(data, sfreq, freq_bands['beta'])
        theta_power = calculate_band_power(data, sfreq, freq_bands['theta'])

        # Flatten the band powers
        alpha_power = alpha_power.reshape(alpha_power.shape[0], -1)
        beta_power = beta_power.reshape(beta_power.shape[0], -1)
        theta_power = theta_power.reshape(theta_power.shape[0], -1)

        # Calculate Fourier series coefficients and flatten them
        fourier_coeffs = np.abs(np.fft.fft(data, axis=-1)).reshape(data.shape[0], -1)

        # Append features for the current segment
        segment_features = np.concatenate([mean, std, total_power, alpha_power, beta_power, theta_power, fourier_coeffs], axis=-1)
        features.append(segment_features)

    return np.array(features)

# Extract features for healthy and schizophrenia segments
healthy_features = extract_features(healthy_epochs)
schizophrenia_features = extract_features(schizophrenia_epochs)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window size : 1.024 (s)
Effective window 

### Task 4 & 5
- Extract features for Each channel
- Implement feature matrix for both groups with Lable at the Last column

In [None]:
  # Assuming healthy_epochs and schizophrenia_epochs are MNE Epochs objects
# Extract the data from epochs
healthy_data = healthy_epochs.get_data()
schizophrenia_data = schizophrenia_epochs.get_data()

# Create an array to store the labels (0 for healthy, 1 for schizophrenia)
num_channels = healthy_data.shape[1]
healthy_labels = np.zeros((len(healthy_epochs), 1, 1))  # Adjust the shape to (num_epochs, 1, 1)
schizophrenia_labels = np.ones((len(schizophrenia_epochs), 1, 1))  # Adjust the shape to (num_epochs, 1, 1)

# Ensure the shape of labels matches the number of epochs
healthy_labels_broadcasted = np.tile(healthy_labels, (1, num_channels, healthy_data.shape[2]))
schizophrenia_labels_broadcasted = np.tile(schizophrenia_labels, (1, num_channels, schizophrenia_data.shape[2]))

# Concatenate the data and labels along the last dimension
healthy_data_with_labels = np.concatenate([healthy_data, healthy_labels_broadcasted], axis=-1)
schizophrenia_data_with_labels = np.concatenate([schizophrenia_data, schizophrenia_labels_broadcasted], axis=-1)

# Concatenate the data from both groups along the first axis
all_data = np.concatenate([healthy_data_with_labels, schizophrenia_data_with_labels], axis=0)

# Create an array to store labels
labels = np.concatenate([np.zeros(len(healthy_epochs)), np.ones(len(schizophrenia_epochs))])

# Create an array to store t-test results
t_test_results = np.zeros((num_channels, all_data.shape[2] - 1))  # -1 to exclude the label column

# Iterate over channels and features
for channel in range(num_channels):
    for feature in range(all_data.shape[2] - 1):  # Exclude the label column
        # Extract data for the current channel and feature
        healthy_channel_feature_data = all_data[:len(healthy_epochs), channel, feature]
        schizophrenia_channel_feature_data = all_data[len(healthy_epochs):, channel, feature]

        # Perform independent samples t-test
        t_stat, p_value = ttest_ind(healthy_channel_feature_data, schizophrenia_channel_feature_data)

        # Store the p-value in the t_test_results array
        t_test_results[channel, feature] = p_value

# Concatenate the t-test results with the original data
features_matrix = all_data[:, :, :-1].reshape(all_data.shape[0], -1)

# Create column names for channels and features

column_names = [f'Channel_{ch}_Epoch_{ep}_Feature_{f}' for ep in range(all_data.shape[0]) for ch in range(num_channels) for f in range(all_data.shape[2] - 1)]

# Convert the features_matrix to a Pandas DataFrame
df = pd.DataFrame(features_matrix, columns=column_names)

# Add labels to the feature matrix
df['Label'] = labels

# Display the DataFrame
print(df)


NameError: ignored