<a href="https://colab.research.google.com/github/rishabhvenkat/Pavlov_VS_IITM/blob/main/Pavlov_VS_IITM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install mne pandas

Collecting mne
  Downloading mne-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading mne-1.8.0-py3-none-any.whl (7.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m50.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mne
Successfully installed mne-1.8.0


In [2]:
!pip install pymatreader


Collecting pymatreader
  Downloading pymatreader-1.0.0-py3-none-any.whl.metadata (1.5 kB)
Collecting xmltodict (from pymatreader)
  Downloading xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Downloading pymatreader-1.0.0-py3-none-any.whl (9.3 kB)
Downloading xmltodict-0.14.2-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict, pymatreader
Successfully installed pymatreader-1.0.0 xmltodict-0.14.2


In [3]:
import os

# Checking if the files exist
set_file = 'VegetativeState_44.set'
fdt_file = 'VegetativeState_44.fdt'

print(f"Set file exists: {os.path.exists(set_file)}")
print(f"FDT file exists: {os.path.exists(fdt_file)}")


Set file exists: True
FDT file exists: True


In [4]:
!pip install --upgrade mne




In [11]:
import mne
import pandas as pd

# Function to load EEG data
def load_eeg_data(set_file):
    try:
        # Try loading the .set file with preload
        eeg_data = mne.io.read_raw_eeglab(set_file, preload=True)
        return eeg_data
    except RuntimeError as e:
        print(f"RuntimeError occurred: {e}")
        print("Trying to load with preload=False.")
        try:
            # Try loading the .set file without preloading
            eeg_data = mne.io.read_raw_eeglab(set_file, preload=False)
            return eeg_data
        except Exception as e:
            print(f"An error occurred while loading the file: {e}")
            return None

# Loading the EEG data
set_file = 'VegetativeState_44.set'
eeg_data = load_eeg_data(set_file)

if eeg_data is not None:
    # Print number of channels and samples
    print(f'Number of channels: {len(eeg_data.ch_names)}')
    print(f'Number of samples: {eeg_data.n_times}')

    # Extracting data and times if loading was successful
    data, times = eeg_data.get_data(return_times=True)

    # Converting to a pandas DataFrame
    df = pd.DataFrame(data.T, columns=eeg_data.ch_names)

    # Adding a time column
    df['Time'] = times

    # Saving DataFrame to CSV
    df.to_csv('output_file.csv', index=False)
    print("Data successfully saved to output_file.csv.")
else:
    print("Failed to load EEG data.")


Reading /content/VegetativeState_44.fdt
Reading 0 ... 22118399  =      0.000 ... 86399.996 secs...


  eeg_data = mne.io.read_raw_eeglab(set_file, preload=True)


Number of channels: 5
Number of samples: 22118400
Data successfully saved to output_file.csv.


In [12]:
import pandas as pd

# Loading the CSV into a DataFrame
df = pd.read_csv('/content/output_file.csv')

df.head()

Unnamed: 0,C3:REF,C4:REF,Fz:REF,Cz:REF,Pz:REF,Time
0,3.7e-05,5e-05,2.7e-05,5.9e-05,4.4e-05,0.0
1,3.9e-05,5.3e-05,3e-05,6.1e-05,4.5e-05,0.003906
2,4.5e-05,6.1e-05,3.3e-05,5.3e-05,5e-05,0.007812
3,4.1e-05,5.6e-05,2.9e-05,6.1e-05,4.6e-05,0.011719
4,4e-05,5e-05,3.1e-05,6.4e-05,4.5e-05,0.015625


In [15]:
import mne
import numpy as np
import pandas as pd
from scipy import stats  # Import stats module for z-score calculation

# Loading the .SET file
eeg_data = mne.io.read_raw_eeglab('VegetativeState_44.set', preload=True)

# Step 1: Bandpass Filter
eeg_data.filter(l_freq=1., h_freq=40.)

# Selecting only EEG channels, excluding bad ones
picks = mne.pick_types(eeg_data.info, eeg=True, exclude='bads')
n_channels = len(picks)
n_components = min(20, n_channels)

# Step 2: Fitting ICA
ica = mne.preprocessing.ICA(n_components=n_components, random_state=97)
ica.fit(eeg_data, picks=picks)

# Step 3: Mark components for exclusion
ica.exclude = [0, 1]  # Adjust based on your analysis

# Step 4: Apply ICA
ica.apply(eeg_data)

# Step 5: Epoching the data
events, event_id = mne.events_from_annotations(eeg_data)
unique_events = np.unique(events, axis=0)
epochs = mne.Epochs(eeg_data, unique_events, event_id, tmin=-0.2, tmax=0.5,
                    baseline=(None, 0), detrend=1, reject=dict(eeg=100e-6),
                    event_repeated='drop')

# Step 6: Average the epochs to create an ERP
erp = epochs.average()

# Step 7: Converting to a DataFrame and saving to CSV
data = epochs.get_data()  # Get the data without return_times
n_epochs, n_channels, n_times = data.shape

# Initializing an empty list to store DataFrames for each epoch
df_list = []

# Looping through each epoch to create a DataFrame
for epoch_idx in range(n_epochs):
    epoch_data = data[epoch_idx, :, :]  # Select the current epoch data
    time_points = epochs.times  # Time points for this epoch
    epoch_df = pd.DataFrame(epoch_data.T, columns=[f'Channel_{i+1}' for i in range(n_channels)])  # Transpose and create DataFrame
    epoch_df['Time'] = time_points  # Add time points as a new column
    df_list.append(epoch_df)  # Append to the list

# Concatenating all the individual DataFrames into one
final_df = pd.concat(df_list, ignore_index=True)

# Save the final DataFrame to CSV
final_df.to_csv('epochs_data.csv', index=False)
print("Data successfully saved to epochs_data.csv.")



# Step 9: Removing outliers using z-score method
df_cleaned = df[(np.abs(stats.zscore(df.select_dtypes(include=[np.number]))) < 3).all(axis=1)]

# Step 10: Saving cleaned DataFrame to another CSV
df_cleaned.to_csv('cleaned_eeg_data.csv', index=False)

print("EEG preprocessing complete. Processed data saved to 'processed_eeg_data.csv' and 'cleaned_eeg_data.csv'.")


Reading /content/VegetativeState_44.fdt
Reading 0 ... 22118399  =      0.000 ... 86399.996 secs...


  eeg_data = mne.io.read_raw_eeglab('VegetativeState_44.set', preload=True)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth: 10.00 Hz (-6 dB cutoff frequency: 45.00 Hz)
- Filter length: 845 samples (3.301 s)

Fitting ICA to data using 5 channels (please be patient, this may take a while)
Selecting by number: 5 components
Fitting ICA took 64.1s.
Applying ICA to Raw instance
    Transforming to ICA space (5 components)
    Zeroing out 2 ICA components
    Projecting back using 5 PCA components
Used Annotations descriptions: ['Start prescoring', 'boundary']
Multiple event values for single event times found. Keeping the first occurrence and dr