In [216]:
import numpy as np
import xgboost as xgb
from xgboost import XGBClassifier #, XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import os

In [89]:
class EEGEnjoymentClassifier:
  """
  A class for predicting enjoyment levels using XGBoost regression based on EEG features.
  """
  def __init__(self):
    # Define model parameters (adjust as needed)
    #self.model = XGBRegressor(n_estimators = 2, learning_rate = 0.3, max_depth = 5, objective = 'reg:squarederror')
    self.model = XGBClassifier(n_estimators = 2, learning_rate = 0.3, max_depth = 5, objective = 'binary:logistic')
    self.X_train_history = None
    self.y_train_history = None 
    
  def fit(self, X_train, y_train = None):
    """
    Train the XGBoost model on EEG data (X) and optionally enjoyment levels (y).
    If y is not given (i.e. post calibration), then it will self-classify the X for training purposes
    and assume it has the ground truth

    Args:
      X: A numpy array of shape (n_samples, 5) where each row represents an EEG sample
        with features (delta, theta, alpha, sigma, beta).
      y: A numpy array of shape (n_samples,) containing enjoyment levels (continuous values).
    """
    # If y_train is not provided, use classifyData to generate labels
    # Implicitly assumes 1 datapoint
    if y_train is None:
      y_train = np.array(self.classifyData(X_train))
    
    if self.X_train_history is None:
      self.X_train_history = X_train
    else: 
      self.X_train_history = np.vstack([self.X_train_history, X_train])
      
    if self.y_train_history is None:
      self.y_train_history = y_train
    else:
      self.y_train_history = np.hstack([self.y_train_history, y_train])
      
    print("X_train_history", self.X_train_history.shape)
    print("y_train_history", self.y_train_history.shape)
    
    # Train the model
    self.model.fit(self.X_train_history, self.y_train_history)
    
  def classifyData(self, brainData):
    """
    Classify brain data (single sample) and return the predicted enjoyment level.

    Args:
      brainData: A numpy array of shape (5,) representing an EEG sample
        with features (delta, theta, alpha, sigma, beta).

    Returns:
      A float representing the predicted enjoyment level.
    """
    return self.model.predict(brainData)

In [219]:
# Grab files from preprocessing\junk_data
junk_data_folder = r"preprocessing\junk_data"
data_list = []
for file in os.listdir(junk_data_folder):
    # Load the CSV data into a NumPy array
    data = np.genfromtxt(os.path.join(junk_data_folder, file), delimiter=",")
    data_list.append(data)
junk_data = np.stack(data_list[:-1], axis = 0) # (20, 16, 4) = samples, channels, features
labels = data_list[-1][:,1] # assuming the labels are in order of filename appearance, and that 1 = liked, 0 = disliked
print(junk_data.shape)
print(labels.shape)

(20, 16, 4)
(20,)


In [220]:
# Generate dummy EEG data and enjoyment levels
brain_data = junk_data.reshape(junk_data.shape[0],-1)  # 20 samples, 64 features (16 channels * [delta, theta, alpha, sigma])?
enjoyment_levels = labels  # Enjoyment levels between 0 or 1
X_train, X_test, y_train, y_test = train_test_split(brain_data, enjoyment_levels, test_size = 0.2, random_state=123)

# Create and train the EEG enjoyment regressor
classifier = EEGEnjoymentClassifier()
classifier.fit(X_train, y_train)

X_train_history (16, 64)
y_train_history (16,)


In [221]:
# Simulate real-time scenario with new brain data
y_pred = classifier.classifyData(X_test)
print("Predicted enjoyment level:", y_pred)
print("Actual enjoyment level:", y_test)
print("Accuracy", accuracy_score(y_pred, y_test))

Predicted enjoyment level: [1 1 0 1]
Actual enjoyment level: [1. 1. 1. 1.]
Accuracy 0.75


In [222]:
# Retrain model with new data 
# (current implementation just considers a the first sample junk_data[0] as a new sample)
# note that running this code multiple times will increase the whole training dataset used by the model
classifier.fit(junk_data[0].reshape(1,-1))

X_train_history (17, 64)
y_train_history (17,)


In [223]:
# Simulate real-time scenario with new brain data
y_pred = classifier.classifyData(X_test)
print("Predicted enjoyment level:", y_pred)
print("Actual enjoyment level:", y_test)
print("Accuracy", accuracy_score(y_pred, y_test))

Predicted enjoyment level: [1 1 0 1]
Actual enjoyment level: [1. 1. 1. 1.]
Accuracy 0.75


In [6]:
# Dustin note: i dont remember what this code was for or if I even wrote it
# Simulate receiving enjoyment feedback (loss can be calculated here)
actual_enjoyment = 3.8  # Assume user provides actual enjoyment level (0-5)
loss = abs(predicted_enjoyment - actual_enjoyment)  # Calculate a simple loss