Configure Google Colab

In [None]:
!pip install sktime

In [None]:
%cd drive/MyDrive/Datasets/Iteration_5/csv_files/
%ls

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pickle
from math import ceil

from sklearn.linear_model import RidgeClassifierCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, Normalizer, MinMaxScaler

# from sktime.transformations.panel.rocket import Rocket

Helpers

In [7]:
# constants
happiness_dir_name = 'happiness'
holocaust_dir_name = 'holocaust'
horror_dir_name = 'horror'
nature_dir_name = 'nature'
plank_dir_name = 'plank'
relax_dir_name = 'relax'
rest_dir_name = 'rest'
rollercoaster_dir_name = 'rollercoaster'

happiness_dir = os.path.join(os.getcwd(), happiness_dir_name)
holocaust_dir = os.path.join(os.getcwd(), holocaust_dir_name)
horror_dir = os.path.join(os.getcwd(), horror_dir_name)
nature_dir = os.path.join(os.getcwd(), nature_dir_name)
plank_dir = os.path.join(os.getcwd(), plank_dir_name)
relax_dir = os.path.join(os.getcwd(), relax_dir_name)
rest_dir = os.path.join(os.getcwd(), rest_dir_name)
rollercoaster_dir = os.path.join(os.getcwd(), rollercoaster_dir_name)

happiness_filepaths = [os.path.join(happiness_dir, f) for f in os.listdir(happiness_dir) if os.path.isfile(os.path.join(happiness_dir, f))]
holocaust_filepaths = [os.path.join(holocaust_dir, f) for f in os.listdir(holocaust_dir) if os.path.isfile(os.path.join(holocaust_dir, f))]
horror_filepaths = [os.path.join(horror_dir, f) for f in os.listdir(horror_dir) if os.path.isfile(os.path.join(horror_dir, f))]
nature_filepaths = [os.path.join(nature_dir, f) for f in os.listdir(nature_dir) if os.path.isfile(os.path.join(nature_dir, f))]
plank_filepaths = [os.path.join(plank_dir, f) for f in os.listdir(plank_dir) if os.path.isfile(os.path.join(plank_dir, f))]
relax_filepaths = [os.path.join(relax_dir, f) for f in os.listdir(relax_dir) if os.path.isfile(os.path.join(relax_dir, f))]
rest_filepaths = [os.path.join(rest_dir, f) for f in os.listdir(rest_dir) if os.path.isfile(os.path.join(rest_dir, f))]
rollercoaster_filepaths = [os.path.join(rollercoaster_dir, f) for f in os.listdir(rollercoaster_dir) if os.path.isfile(os.path.join(rollercoaster_dir, f))]

emotions_filepaths = happiness_filepaths + holocaust_filepaths + \
                      horror_filepaths + nature_filepaths + plank_filepaths  + \
                      relax_filepaths + rest_filepaths + rollercoaster_filepaths 
chunk_size = 40 # how many time-series samples are given one label

In [8]:
def get_chunk_from_df(dataframe, chunk_size=chunk_size):
  """Get next bunch of records from dataframe

    Parameters
    ----------
    dataframe : pandas.DataFrame
        From csv file exported from NeuroSky app
    chunk_size : int
        Number of dataframe rows corresponding with one label. Default 50

    Return
    ------
    generator : pandas.DataFrame
        With next "chunk_size" rows
    """
  for start_row in range(0, dataframe.shape[0], chunk_size):
    end_row  = min(start_row + chunk_size, dataframe.shape[0])
    yield dataframe.iloc[start_row:end_row, :]

In [9]:
normalize_cmd = 'normalizer'
minmax_cmd = 'minmax'
standarize_cmd = 'standarize'
smooth_cmd = 'smooth'
filter_size = 10

def process_input_file(df, remove_first=10, remove_last=0, scalers=[], make_double=False):
  df = df.drop(['attention', 'meditation'], axis=1)
  df = df[(df.index > remove_first) & (df.index < len(df)-remove_last)]
  df = df.loc[(df!=0).all(axis=1)]
  df = df.reset_index(drop=True)

  if make_double:
    df1 = pd.DataFrame(data=None, index=df.index)
    df2 = pd.concat([df, df1]).sort_index()
    df = df2.interpolate().astype(int).reset_index(drop=True)
    
  if 'normalizer' in scalers:
    scaled_values = Normalizer().fit_transform(df.values)
    df = pd.DataFrame(scaled_values, columns=df.columns)
  if 'minmax' in scalers:
    scaled_values = MinMaxScaler().fit_transform(df.values)
    df = pd.DataFrame(scaled_values, columns=df.columns)
  if 'standarize' in scalers:
    scaled_values = StandardScaler().fit_transform(df.values)
    df = pd.DataFrame(scaled_values, columns=df.columns)
  if 'smooth' in scalers:
    X_smooth = df.to_numpy()
    for i in range(X_smooth.shape[1]):
        X_smooth[:, i] = np.convolve(X_smooth[:,i], np.ones(filter_size), 'same') / filter_size
    X_smooth = X_smooth/10**6
    df = pd.DataFrame(data=X_smooth, columns=df.columns)

  
    
  return df

# Process data for time series classification (sktime)

In [11]:
chunks_list = []
labels_list = []
for f in emotions_filepaths:
  df = pd.read_csv(f, sep=';')
  label = os.path.normpath(f).split(os.sep)[-2]
  df = process_input_file(df, scalers=[minmax_cmd, standarize_cmd])
  get_chunk = get_chunk_from_df(df)

  for chunk_num in range(int(df.shape[0]/chunk_size)):
    chunk = next(get_chunk)
    new_chunk = pd.DataFrame(
      data={
          'delta': [chunk.delta.to_numpy()],
          'theta': [chunk.theta.to_numpy()],
          'lowAlpha': [chunk.lowAlpha.to_numpy()],
          'highAlpha': [chunk.highAlpha.to_numpy()],
          'lowBeta': [chunk.lowBeta.to_numpy()],
          'highBeta': [chunk.highBeta.to_numpy()],
          'lowGamma': [chunk.lowGamma.to_numpy()],
          'highGamma': [chunk.highGamma.to_numpy()]
      })
    chunks_list.append(new_chunk)
    labels_list.append(label)
df_time = pd.concat(chunks_list)
df_time = df_time.reset_index(drop=True)
df_time["label"]=labels_list
df_time

Unnamed: 0,delta,theta,lowAlpha,highAlpha,lowBeta,highBeta,lowGamma,highGamma,label
0,"[0.34956081273667705, 0.4109626059926764, -0.7...","[-0.5193361485023096, -0.46639700977187537, 1....","[-0.18252571171280607, -0.5934969165663018, 3....","[-0.24953066873675012, -0.5002952719369379, 2....","[0.3785034849720095, -0.41437890350187917, 1.9...","[0.0126386901709675, -0.35865868146430285, 0.2...","[-0.346051234476952, -0.5435309748792886, 1.17...","[-0.3590095539175621, -0.46370700481464106, 0....",happiness
1,"[-1.2193328131642378, -0.3728417334512287, 0.5...","[-0.3445865448134677, -0.3200676703867306, -0....","[-0.14981299139246324, 0.11539168768126626, 0....","[-0.2597275010131657, -0.05014804994379826, -0...","[-0.29773663209834544, 0.038659601586994524, -...","[-0.15380464524418128, -0.3268485395793597, -0...","[0.0003661102730891319, -0.33605142200185045, ...","[0.003328179947150259, -0.32618279971705283, -...",happiness
2,"[-0.8359739849152328, 0.9829670692014476, 0.50...","[0.42144319182790235, -0.5111523027604072, -0....","[-0.19194686216775803, 0.08478354220018557, -0...","[-0.4527240133496356, -0.21403019825489664, -0...","[-0.35189747542508537, 0.16973770231961482, -0...","[-0.31982918657612813, 0.29450531660423557, 0....","[0.18281302000093527, 0.09605967998515366, 0.2...","[-0.14682935176699752, 0.30687618128913247, -0...",happiness
3,"[0.4071907603775057, 0.8400593232514181, -0.36...","[-0.42849001696736555, -0.27953676606497224, -...","[-0.5137490672879529, -0.09003741609207862, -0...","[-0.46897194321924235, -0.18362814391125096, 0...","[-0.5204663590650667, -0.41242395129787346, 0....","[-0.37025745672515503, 0.45747994450416873, 0....","[-0.4495261152209664, 0.7504844937530578, 0.77...","[-0.4550029412008696, 0.38516301630671074, -0....",happiness
4,"[-0.6123368863113889, -0.8243383228623037, -0....","[-0.5182292425372186, -0.6402284258896745, -0....","[-0.48668621230791104, -0.6402778015839942, -0...","[-0.47863485511978815, -0.5330109812426861, -0...","[-0.4317134550644915, -0.5706595161452531, -0....","[-0.4505839827271164, -0.47754157577843764, -0...","[-0.5626696888217014, -0.6124038555025045, -0....","[-0.4528642284272001, -0.4909631583023366, -0....",happiness
...,...,...,...,...,...,...,...,...,...
602,"[0.3982678582722847, -0.10182100823120972, 0.4...","[0.37363602048622396, 3.5748094799452232, 0.36...","[1.7713024056633528, 0.8413751133293906, 0.363...","[0.7469676266546412, 2.0264151838627558, -0.53...","[-0.2630793388149663, -0.1494860912866692, -0....","[1.4880756071161185, 0.06991749421093901, 1.82...","[0.9671671363474181, 1.7226645703325405, 0.012...","[0.27268054578394446, 1.6785712761355467, 0.25...",rollercoaster
603,"[-0.3450139387291717, 0.06790508039139945, 2.0...","[2.098508350071126, 0.18087203531304052, 3.984...","[-0.02802189312698045, -0.527559129983794, 8.4...","[1.5583555152574033, -0.19110855425017584, -0....","[0.22164974709088026, 0.18662836539311584, 2.6...","[0.3823412784602395, 0.2844853470902825, 1.957...","[0.16762712719654066, 0.45773320887428587, 0.4...","[0.5087638232086141, -0.3175134929790767, 1.38...",rollercoaster
604,"[0.8995709745656576, -0.8005350254710867, -0.8...","[0.2034277159140864, -0.7498155748534173, -0.6...","[4.506748869681674, -0.2146403923506665, -0.07...","[1.5734967820520214, -0.30372071391838407, -0....","[1.7902095647472083, -0.5379638574367683, -0.1...","[1.4094912493427665, -0.16827382708127878, -0....","[0.3822075448976343, -0.3657515045195673, -0.2...","[0.4686475946151064, -0.5974650406389125, -0.0...",rollercoaster
605,"[-1.023226229898515, 0.7055711906628157, -0.61...","[-0.6332292994082938, -0.3821423726078545, -0....","[-0.4923562563544098, -0.3293419587662767, -0....","[-0.16138470911485855, -0.17397544166332446, -...","[-0.3819056854467774, 0.2039992339101845, -0.5...","[-0.6254453611171087, -0.28175534689618686, -0...","[-0.2872169277884454, 0.03693480632218459, -0....","[-0.482873634019545, 1.2503136942011142, -0.58...",rollercoaster


In [12]:
df_time['label'].value_counts()

happiness        99
nature           96
plank            82
horror           72
holocaust        70
relax            68
rest             60
rollercoaster    60
Name: label, dtype: int64

In [None]:
df_time.to_csv('df_time_series.csv')

# Process data for traditional processing

In [13]:
chunks_list = []
labels_list = []
df_list = []
for f in emotions_filepaths:
  df = pd.read_csv(f, sep=';')
  label = os.path.normpath(f).split(os.sep)[-2]
  df = process_input_file(df, remove_first=25, remove_last=25, scalers=[smooth_cmd])
  df['label']=label
  df_list.append(df)
df_seconds = pd.concat(df_list)
df_seconds = df_seconds.reset_index(drop=True)
df_seconds

Unnamed: 0,delta,theta,lowAlpha,highAlpha,lowBeta,highBeta,lowGamma,highGamma,label
0,0.186188,0.035327,0.008678,0.019436,0.012044,0.008582,0.005851,0.004385,happiness
1,0.249095,0.052902,0.012438,0.020675,0.012932,0.009894,0.006477,0.004649,happiness
2,0.318001,0.056180,0.012778,0.020908,0.013623,0.010612,0.006561,0.004672,happiness
3,0.383998,0.062920,0.018410,0.021940,0.014660,0.010950,0.006762,0.004787,happiness
4,0.498938,0.065180,0.019788,0.022802,0.017354,0.012368,0.007395,0.004895,happiness
...,...,...,...,...,...,...,...,...,...
21607,1.397923,0.288205,0.090126,0.060092,0.074694,0.043253,0.059317,0.020761,rollercoaster
21608,1.236768,0.254862,0.048056,0.038609,0.021826,0.032543,0.050653,0.013683,rollercoaster
21609,0.933506,0.205315,0.038986,0.026887,0.014375,0.025006,0.036829,0.008533,rollercoaster
21610,0.834086,0.185413,0.036705,0.025343,0.012945,0.023059,0.035161,0.007757,rollercoaster


In [14]:
df_seconds['label'].value_counts()

happiness        3551
nature           3510
plank            3051
horror           2524
holocaust        2467
relax            2376
rest             2178
rollercoaster    1955
Name: label, dtype: int64

In [None]:
df_seconds.to_csv('df_processed.csv')

# Process files for neural network

In [16]:
chunks_list = []
labels_list = []
for f in emotions_filepaths:
  df = pd.read_csv(f, sep=';')
  label = os.path.normpath(f).split(os.sep)[-2]
  # print(f"Read shape: {df.shape}")
  df = process_input_file(df, scalers=[smooth_cmd], make_double=True)
  get_chunk = get_chunk_from_df(df, chunk_size=40)

  for chunk_num in range(int(df.shape[0]/40)):
    chunk = next(get_chunk)
    new_chunk = chunk.to_numpy()
    chunks_list.append(new_chunk)
    labels_list.append(label)
arr_eegnet = np.asarray(chunks_list)
arr_eegnet = np.swapaxes(arr_eegnet, 1, 2)
print(f"Output array shape: {arr_eegnet.shape}")

Output array shape: (1265, 8, 40)


In [19]:
# prepare labels for 8-class classification
labels_mapping = {
                  happiness_dir_name: 0, 
                  holocaust_dir_name: 1, 
                  horror_dir_name: 2, 
                  plank_dir_name: 3, 
                  relax_dir_name: 4, 
                  rest_dir_name: 5, 
                  rollercoaster_dir_name: 6, 
                  nature_dir_name: 7
                }
labels_8class = [labels_mapping[label] for label in labels_list]
labels_8class = np.asarray(labels_8class)

In [20]:
# map 8-class labels to binary classification
labels_mapping_binary = {
                  happiness_dir_name: 1, #0
                  horror_dir_name: 1, #2
                  plank_dir_name: 1,  #3
                  rollercoaster_dir_name: 1,  #6

                  holocaust_dir_name: 0,  #1
                  relax_dir_name: 0,  #4 
                  rest_dir_name: 0, #5
                  nature_dir_name: 0  #7
                }
labels_2class = [labels_mapping_binary[label] for label in labels_list]
labels_2class = np.asarray(labels_2class)

In [None]:
pd.DataFrame(data=labels_8class).to_csv("y_8classes.csv")
pd.DataFrame(data=labels_2class).to_csv("y_2classes.csv")
pickle.dump(arr_eegnet, open("X_639_smooth.pkl", "wb"))