# COGS 189 Final Project

This project aims to analyze emotions to a social media feed using BCI</br>
Group Members: </br>
Stephen Gelinas (A15816513) </br>
Aditya Tomar (A17162996) </br>
Shay Samat </br>
Rolando Restua </br>
Kevin Wong 

## Data Loading

We will first load and inspect the raw EEG data we collected with OpenBCI

In [None]:
import pandas as pd
from IPython.display import Image
import numpy as np
import mne
import matplotlib as plt
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('data/eeg.txt')
df.head()

There appears to be no missing values from the data collection process in the raw EEG data

In [None]:
df.isna().sum()

The image below illustrates locations of the 8 selected channels for the data collection process (including GND and REF) 

In [None]:
Image("data/channels.png", width=400)

## Data Cleaning/Preprocessing

In [None]:
# drop data from these channels
dropped = [' Other', ' Other.7', ' Analog Channel 0', ' Analog Channel 1', ' Analog Channel 2']
df_cleaned = df.drop(columns=dropped)

In [None]:
def plot_raw_data(df):
    # Create subplots
    df = df.iloc[1: , :]
    exg = [i for i in df.columns if 'EXG' in i]
    plt.rcParams["figure.figsize"] = (40,20)
    fig, (ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8) = plt.subplots(nrows=8, ncols=1, sharex=True)

    # Plot data on each subplot
    df[exg[0]].plot(ax=ax1)
    ax1.set_title('Fp1')
    df[exg[1]].plot(ax=ax2)
    ax2.set_title('Fz')
    df[exg[2]].plot(ax=ax3)
    ax3.set_title('Cz')
    df[exg[3]].plot(ax=ax4)
    ax4.set_title('Pz')
    df[exg[4]].plot(ax=ax5)
    ax5.set_title('O1')
    df[exg[5]].plot(ax=ax6)
    ax6.set_title('O2')
    df[exg[6]].plot(ax=ax7)
    ax7.set_title('C3')
    df[exg[7]].plot(ax=ax8)
    ax8.set_title('C4')
    ax8.set_xlabel('Timestamp')

    # Show the plot
    plt.show()


In [None]:
# Get the sampling frequency from the timestamps
sfreq = 1 / np.mean(np.diff(df[' Timestamp']))

# Convert the data to MNE format
ch_names = df.columns[1:9].tolist()
ch_types = ['eeg'] * len(ch_names)
info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
data = df[ch_names].values.T.astype(np.float32)
raw = mne.io.RawArray(data, info)

In [None]:
plot_raw_data(df)

In [None]:
# Plot raw PSD
raw.plot_psd()

In [None]:
# Get the accelerometer data
accelerometer_data = df[[' Accel Channel 0', ' Accel Channel 1', ' Accel Channel 2']].values.T.astype(np.float32)

# Calculate the norm of the accelerometer data to get the overall acceleration
acceleration = np.linalg.norm(accelerometer_data, axis=0)

# Identify and remove segments with high acceleration (i.e. head movements)
threshold = np.percentile(acceleration, 95)
bad_segments = np.where(acceleration > threshold)[0]
raw.annotations.append(bad_segments, [1] * len(bad_segments), 'bad')

# Interpolate bad segments
raw.interpolate_bads(reset_bads=True)

In [None]:
channels = ['Fp1', 'Fz', 'Cz', 'Pz', 'O1', 'O2', 'C3', 'C4'] 

In [None]:
# Apply high-pass filter to remove eye movements and remove slow drifts
high_pass = raw.filter(l_freq=1.0, h_freq=None)
high_pass = high_pass.get_data().T
high_pass = pd.DataFrame(high_pass)
high_pass.columns = ch_names
high_pass.head()

In [None]:
plot_raw_data(high_pass)

In [None]:
raw.plot_psd()

In [None]:
# Apply low-pass filter to remove eye blinks and high-frequency noise

low_pass = raw.filter(l_freq= None, h_freq=40.0)
low_pass = low_pass.get_data().T
low_pass = pd.DataFrame(low_pass)
low_pass.columns = ch_names
low_pass.head()

In [None]:
plot_raw_data(low_pass)

In [None]:
raw.plot_psd()

In [None]:
# Get the preprocessed data as a numpy array
df_cleaned = raw.get_data()

In [None]:
# Set epoch length to 10 seconds
epoch_length = 10

# Compute the number of samples in an epoch
n_samples_per_epoch = int(epoch_length * raw.info['sfreq'])

# Create an array of epoch start times
epoch_starts = range(0, raw.n_times - n_samples_per_epoch + 1, n_samples_per_epoch)

# Define event codes for each ID
event_dict = {'happy': 1, 'sad': 2, 'neutral': 3}

# Create an array of event codes for each epoch
epoch_ids = ['sad', 'neutral', 'neutral', 'neutral', 'happy', 'sad', 'happy', 'happy', 'sad', 'happy',
             'neutral', 'sad', 'sad', 'neutral', 'happy', 'sad', 'sad', 'neutral', 'happy', 'neutral',
             'sad', 'happy', 'neutral', 'neutral', 'happy', 'sad', 'sad', 'neutral', 'happy', 'neutral',
             'sad', 'sad', 'neutral', 'sad', 'neutral', 'happy', 'neutral', 'neutral', 'happy', 'happy',
             'sad', 'sad', 'sad', 'happy', 'sad', 'happy', 'sad', 'happy', 'sad', 'sad', 'neutral',
             'happy', 'neutral', 'happy', 'happy', 'happy', 'happy', 'happy', 'sad', 'neutral', 'happy',
             'neutral', 'sad', 'neutral', 'sad', 'neutral', 'sad', 'neutral', 'happy', 'sad', 'sad',
             'happy', 'happy', 'neutral', 'happy', 'happy', 'neutral', 'neutral', 'happy', 'neutral',
             'sad', 'neutral', 'happy', 'happy', 'sad', 'sad', 'neutral', 'neutral', 'sad', 'neutral']

event_codes = np.array([event_dict[id] for id in epoch_ids])
event_id = {'happy': 1, 'sad': 2, 'neutral': 3}

# Create an events array
events = np.column_stack((range(len(event_codes)), np.zeros(len(event_codes)), event_codes)).astype(int)

# Use mne.EpochsArray to create epochs of fixed duration with event information
epochs_data = [raw.get_data(start=start, stop=start + n_samples_per_epoch) for start in epoch_starts]
epochs_array = mne.EpochsArray(data=epochs_data[9:], info=raw.info, events=events, event_id=event_id)
epochs_array.rename_channels(dict(zip(epochs_array.ch_names, channels)))

# Print the number of epochs
print(f"Number of epochs: {len(epochs_array)}")


In [None]:
happy_epochs = epochs_array['happy']
sad_epochs = epochs_array['sad']
neutral_epochs = epochs_array['neutral']


print(f"Number of happy epochs: {len(happy_epochs)}")
print(f"Number of sad epochs: {len(sad_epochs)}")
print(f"Number of neutral epochs: {len(neutral_epochs)}")

In [None]:
# Define the number of subplots and their layout
nrows = len(channels)
ncols = 3
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 15))

# Define the colors for each epoch
colors = {'happy': 'blue', 'sad': 'red', 'neutral': 'green'}

# Define the epoch ids in the order they appear in the epoch arrays
epoch_ids = ['happy', 'sad', 'neutral']

# Iterate over each channel
for i, channel in enumerate(channels):
    # Get the data for the channel for each epoch
    happy_data = happy_epochs.get_data()[:, i, :]
    sad_data = sad_epochs.get_data()[:, i, :]
    neutral_data = neutral_epochs.get_data()[:, i, :]
    
    # Plot the data for each epoch in a separate subplot
    for j, epoch_data in enumerate([happy_data, sad_data, neutral_data]):
        axs[i, j].plot(epoch_data.T, color=colors[epoch_ids[j]])
        axs[i, j].set_title(f'{channel} - {epoch_ids[j]}')
        axs[i, j].set_xlabel('Time')
        axs[i, j].set_ylabel('Amplitude')

# Add a legend for the epoch colors
fig.legend(handles=colors.values(), labels=colors.keys(), loc='upper center', bbox_to_anchor=(0.5, 0.95), ncol=3)

# Adjust the layout and spacing of the subplots
fig.tight_layout(pad=3.0)
plt.subplots_adjust(top=0.90)


In [None]:
# plot the grand average ERP for all channels
erp = epochs_array['happy'].average()
erp2 = epochs_array['sad'].average()
erp3 = epochs_array['neutral'].average()

colors = {'happy': 'blue', 'sad': 'red', 'neutral': 'green'}

erp.plot(titles = 'Happy',spatial_colors= False)
erp2.plot(titles = 'Sad')
erp3.plot(titles = 'Neutral')

In [None]:
# Load preprocessed EEG data from a numpy file
data = df_cleaned

# Select a subset of channels and time points to visualize
channels = [0, 1, 2, 3, 4, 5, 6, 7]
time_points = range(10000, 50000)

# Plot the EEG data
fig, ax = plt.subplots(8,figsize=(12, 6))
for i in channels:
    ax[i].plot(data[[i]][:, time_points].T)
    ax[i].set_xlabel('Time')
    ax[i].set_ylabel('Amplitude')
    ax[i].set_title('Channel '+str(i))
plt.show()

In [None]:
def ttest_1samp_no_p(X, sigma=0.0, method='relative'):
    var = np.var(X, axis=0)
    if sigma > 0.0:
        limit = sigma * np.max(var) if method == 'relative' else sigma
        var += limit
    t_vals = np.mean(X, axis=0) / np.sqrt(var / X.shape[0])
    df = X.shape[0] - 1
    return t_vals, df

# Get the data from the epochs
happy_epochs_data = happy_epochs.get_data()
sad_epochs_data = sad_epochs.get_data()
neutral_epochs_data = neutral_epochs.get_data()

# Calculate the number of epochs
n_epochs = happy_epochs_data.shape[0]

# Perform t-test for happy epochs
happy_t, happy_df = ttest_1samp_no_p(happy_epochs_data, n_epochs)

# Perform t-test for sad epochs
sad_t, sad_df = ttest_1samp_no_p(sad_epochs_data, n_epochs)

# Perform t-test for neutral epochs
neutral_t, neutral_df = ttest_1samp_no_p(neutral_epochs_data, n_epochs)

# Print the t-values for each condition
print(f"Happy t-value: {happy_t}")
print(f"Sad t-value: {sad_t}")
print(f"Neutral t-value: {neutral_t}")

In [None]:
# Calculate the average t-values for each condition
happy_avg_t = np.mean(happy_t)
sad_avg_t = np.mean(sad_t)
neutral_avg_t = np.mean(neutral_t)

# Print the average t-values for each condition
print(f"Happy average t-value: {happy_avg_t}")
print(f"Sad average t-value: {sad_avg_t}")
print(f"Neutral average t-value: {neutral_avg_t}")