# Data Preprocessing and Feature Engineering
## 1. Data Preprocessing
### 1.1 Encode categorical features

In [53]:
# import libraries
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

from scipy import signal
from biosppy.signals import ecg 
from biosppy.signals import eeg
from biosppy.signals import resp
from scipy.interpolate import interp1d 

import warnings
warnings.filterwarnings("ignore")

In [2]:
# load data
train_df = pd.read_csv("../data/train.csv")
test_df = pd.read_csv("../data/test.csv")

In [3]:
# separate labels, categorical features and numerical features
y_train = train_df["event"]
cat_features = train_df[["crew", "seat"]]
train_df = train_df.drop(["crew", "seat", "experiment", "event"], axis=1)

In [4]:
# fit one hot encoder on train data
cat_encoder = OneHotEncoder(handle_unknown='ignore')
cat_encoder.fit(cat_features)

In [55]:
# store the one hot encoder, we will use this on real time data
pickle.dump(cat_encoder, open('cat_encoder.pkl', 'wb'))

In [5]:
# one hot encode train data
cat_features_encoded = cat_encoder.transform(cat_features).toarray()

In [6]:
cat_features_encoded.shape

(4867421, 11)

### 1.2 Scale numerical features

In [7]:
# fit standard scaler on train data
scaler = StandardScaler()
scaler.fit(train_df)

In [8]:
scaled_features = scaler.transform(train_df)

In [61]:
scaled_features.shape

(4867421, 24)

In [54]:
# store the standard scaler, we will use this on real time data
pickle.dump(scaler, open('standard_scaler.pkl', 'wb'))

In [11]:
# we will store final results in X_train
X_train = pd.concat([pd.DataFrame(cat_features_encoded, columns=cat_encoder.get_feature_names_out()), pd.DataFrame(scaled_features, columns=train_df.columns)], axis=1)

In [12]:
X_train.head(5)

Unnamed: 0,crew_1,crew_2,crew_3,crew_4,crew_5,crew_6,crew_7,crew_8,crew_13,seat_0,...,eeg_f4,eeg_c4,eeg_p4,eeg_poz,eeg_c3,eeg_cz,eeg_o2,ecg,r,gsr
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.044743,1.791526,1.035794,1.036489,1.010907,-0.222505,0.606679,-0.807615,0.978227,-0.918807
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.14013,1.489485,1.153979,1.063078,1.049268,0.045027,0.744553,-0.807615,0.978227,-0.918807
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.273377,-0.614862,1.153774,1.252968,1.10256,0.055446,0.935755,-0.807615,0.978227,-0.918807
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.243756,0.875642,1.380259,1.224039,0.947126,0.106441,0.797634,-0.807615,0.978227,-0.918807
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.161679,0.289967,1.356063,1.225626,0.99554,0.063637,0.959812,-0.807615,0.978227,-0.918807


In [13]:
X_train.shape

(4867421, 35)

In [14]:
X_train.columns

Index(['crew_1', 'crew_2', 'crew_3', 'crew_4', 'crew_5', 'crew_6', 'crew_7',
       'crew_8', 'crew_13', 'seat_0', 'seat_1', 'time', 'eeg_fp1', 'eeg_f7',
       'eeg_f8', 'eeg_t4', 'eeg_t6', 'eeg_t5', 'eeg_t3', 'eeg_fp2', 'eeg_o1',
       'eeg_p3', 'eeg_pz', 'eeg_f3', 'eeg_fz', 'eeg_f4', 'eeg_c4', 'eeg_p4',
       'eeg_poz', 'eeg_c3', 'eeg_cz', 'eeg_o2', 'ecg', 'r', 'gsr'],
      dtype='object')

## 2. Extract new features

We will be using the biosppy module for processing biosignals.

1. ECG data: we will use biosppy.signals.ecg.ecg for deriving heart rate from ecg signals.
2. Respiration data: we will use biosppy.signals.resp.resp for deriving respiration rate from respiration data.
3. EEG data: we will use biosppy.signals.eeg.get_power_features for deriving frequency bands from eeg electrodes data.

These functions take the raw data of different time stamps and derive the output. However, they do not derive the data for all the timestamps. Hence, we will use interpolation on the derived data to get the output for all the time stamps in our data.


In [24]:
#https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.interp1d.html
def interpolation_fn(timestamps,biosppy_ts, biosppy_values):
    """
    Interpolation function to produce outputs all timestamps.
    """
    interpolation = interp1d(biosppy_ts,biosppy_values, kind="linear", fill_value="extrapolate")
    return interpolation(timestamps)


The train data is grouped by crew and for every crew, the data is grouped by the cognitive case which was tried to reproduce (column 'experiment') and is sorted as per time.

Since our data is already grouped and sorted, we can store them in different dataframes and pass it to these functions separately because each crew and cognitive case could have different values for the signals.
### 2.1 Store data to separate dataframes

In [15]:
# load data
train_df = pd.read_csv("../data/train.csv")
test_df = pd.read_csv("../data/test.csv")

In [17]:
# get data of different cognitive states
train_ca_df = train_df[train_df["experiment"]=="CA"]
train_da_df = train_df[train_df["experiment"]=="DA"]
train_ss_df = train_df[train_df["experiment"]=="SS"]

In [18]:
# get data of different crews for case "CA"
train_ca_1_df = train_ca_df[train_df["crew"]==1]
train_ca_2_df = train_ca_df[train_df["crew"]==2]
train_ca_3_df = train_ca_df[train_df["crew"]==3]
train_ca_4_df = train_ca_df[train_df["crew"]==4]
train_ca_5_df = train_ca_df[train_df["crew"]==5]
train_ca_6_df = train_ca_df[train_df["crew"]==6]
train_ca_7_df = train_ca_df[train_df["crew"]==7]
train_ca_8_df = train_ca_df[train_df["crew"]==8]
train_ca_13_df = train_ca_df[train_df["crew"]==13]

In [19]:
# get data of different crews for case "DA"
train_da_1_df = train_da_df[train_df["crew"]==1]
train_da_2_df = train_da_df[train_df["crew"]==2]
train_da_3_df = train_da_df[train_df["crew"]==3]
train_da_4_df = train_da_df[train_df["crew"]==4]
train_da_5_df = train_da_df[train_df["crew"]==5]
train_da_6_df = train_da_df[train_df["crew"]==6]
train_da_7_df = train_da_df[train_df["crew"]==7]
train_da_8_df = train_da_df[train_df["crew"]==8]
train_da_13_df = train_da_df[train_df["crew"]==13]

In [20]:
# get data of different crews for case "SS"
train_ss_1_df = train_ss_df[train_df["crew"]==1]
train_ss_2_df = train_ss_df[train_df["crew"]==2]
train_ss_3_df = train_ss_df[train_df["crew"]==3]
train_ss_4_df = train_ss_df[train_df["crew"]==4]
train_ss_5_df = train_ss_df[train_df["crew"]==5]
train_ss_6_df = train_ss_df[train_df["crew"]==6]
train_ss_7_df = train_ss_df[train_df["crew"]==7]
train_ss_8_df = train_ss_df[train_df["crew"]==8]
train_ss_13_df = train_ss_df[train_df["crew"]==13]

### 2.2 Heart rate from ECG data

#### 2.2.1 Derive heart rate

In [21]:
# compute heart rate
# for state "CA"
ecg_bio_ca_1=ecg.ecg(train_ca_1_df["ecg"],show=False)
ecg_bio_ca_2=ecg.ecg(train_ca_2_df["ecg"],show=False)
ecg_bio_ca_3=ecg.ecg(train_ca_3_df["ecg"],show=False)
ecg_bio_ca_4=ecg.ecg(train_ca_4_df["ecg"],show=False)
ecg_bio_ca_5=ecg.ecg(train_ca_5_df["ecg"],show=False)
ecg_bio_ca_6=ecg.ecg(train_ca_6_df["ecg"],show=False)
ecg_bio_ca_7=ecg.ecg(train_ca_7_df["ecg"],show=False)
ecg_bio_ca_8=ecg.ecg(train_ca_8_df["ecg"],show=False)
ecg_bio_ca_13=ecg.ecg(train_ca_13_df["ecg"],show=False)

# for state "DA"
ecg_bio_da_1=ecg.ecg(train_da_1_df["ecg"],show=False)
ecg_bio_da_2=ecg.ecg(train_da_2_df["ecg"],show=False)
ecg_bio_da_3=ecg.ecg(train_da_3_df["ecg"],show=False)
ecg_bio_da_4=ecg.ecg(train_da_4_df["ecg"],show=False)
ecg_bio_da_5=ecg.ecg(train_da_5_df["ecg"],show=False)
ecg_bio_da_6=ecg.ecg(train_da_6_df["ecg"],show=False)
ecg_bio_da_7=ecg.ecg(train_da_7_df["ecg"],show=False)
ecg_bio_da_8=ecg.ecg(train_da_8_df["ecg"],show=False)
ecg_bio_da_13=ecg.ecg(train_da_13_df["ecg"],show=False)

# for state "SS"
ecg_bio_ss_1=ecg.ecg(train_ss_1_df["ecg"],show=False)
ecg_bio_ss_2=ecg.ecg(train_ss_2_df["ecg"],show=False)
ecg_bio_ss_3=ecg.ecg(train_ss_3_df["ecg"],show=False)
ecg_bio_ss_4=ecg.ecg(train_ss_4_df["ecg"],show=False)
ecg_bio_ss_5=ecg.ecg(train_ss_5_df["ecg"],show=False)
ecg_bio_ss_6=ecg.ecg(train_ss_6_df["ecg"],show=False)
ecg_bio_ss_7=ecg.ecg(train_ss_7_df["ecg"],show=False)
ecg_bio_ss_8=ecg.ecg(train_ss_8_df["ecg"],show=False)
ecg_bio_ss_13=ecg.ecg(train_ss_13_df["ecg"],show=False)

#### 2.2.2 Interpolation of heart rate

In [25]:
# get data for all time stamps (interpolation)
# for state "CA"
heart_rate_ca_1=interpolation_fn(train_ca_1_df["time"],ecg_bio_ca_1["heart_rate_ts"],ecg_bio_ca_1["heart_rate"])
heart_rate_ca_2=interpolation_fn(train_ca_2_df["time"],ecg_bio_ca_2["heart_rate_ts"],ecg_bio_ca_2["heart_rate"])
heart_rate_ca_3=interpolation_fn(train_ca_3_df["time"],ecg_bio_ca_3["heart_rate_ts"],ecg_bio_ca_3["heart_rate"])
heart_rate_ca_4=interpolation_fn(train_ca_4_df["time"],ecg_bio_ca_4["heart_rate_ts"],ecg_bio_ca_4["heart_rate"])
heart_rate_ca_5=interpolation_fn(train_ca_5_df["time"],ecg_bio_ca_5["heart_rate_ts"],ecg_bio_ca_5["heart_rate"])
heart_rate_ca_6=interpolation_fn(train_ca_6_df["time"],ecg_bio_ca_6["heart_rate_ts"],ecg_bio_ca_6["heart_rate"])
heart_rate_ca_7=interpolation_fn(train_ca_7_df["time"],ecg_bio_ca_7["heart_rate_ts"],ecg_bio_ca_7["heart_rate"])
heart_rate_ca_8=interpolation_fn(train_ca_8_df["time"],ecg_bio_ca_8["heart_rate_ts"],ecg_bio_ca_8["heart_rate"])
heart_rate_ca_13=interpolation_fn(train_ca_13_df["time"],ecg_bio_ca_13["heart_rate_ts"],ecg_bio_ca_13["heart_rate"])

# for state "DA"
heart_rate_da_1=interpolation_fn(train_da_1_df["time"],ecg_bio_da_1["heart_rate_ts"],ecg_bio_da_1["heart_rate"])
heart_rate_da_2=interpolation_fn(train_da_2_df["time"],ecg_bio_da_2["heart_rate_ts"],ecg_bio_da_2["heart_rate"])
heart_rate_da_3=interpolation_fn(train_da_3_df["time"],ecg_bio_da_3["heart_rate_ts"],ecg_bio_da_3["heart_rate"])
heart_rate_da_4=interpolation_fn(train_da_4_df["time"],ecg_bio_da_4["heart_rate_ts"],ecg_bio_da_4["heart_rate"])
heart_rate_da_5=interpolation_fn(train_da_5_df["time"],ecg_bio_da_5["heart_rate_ts"],ecg_bio_da_5["heart_rate"])
heart_rate_da_6=interpolation_fn(train_da_6_df["time"],ecg_bio_da_6["heart_rate_ts"],ecg_bio_da_6["heart_rate"])
heart_rate_da_7=interpolation_fn(train_da_7_df["time"],ecg_bio_da_7["heart_rate_ts"],ecg_bio_da_7["heart_rate"])
heart_rate_da_8=interpolation_fn(train_da_8_df["time"],ecg_bio_da_8["heart_rate_ts"],ecg_bio_da_8["heart_rate"])
heart_rate_da_13=interpolation_fn(train_da_13_df["time"],ecg_bio_da_13["heart_rate_ts"],ecg_bio_da_13["heart_rate"])

# for state "SS"
heart_rate_ss_1=interpolation_fn(train_ss_1_df["time"],ecg_bio_ss_1["heart_rate_ts"],ecg_bio_ss_1["heart_rate"])
heart_rate_ss_2=interpolation_fn(train_ss_2_df["time"],ecg_bio_ss_2["heart_rate_ts"],ecg_bio_ss_2["heart_rate"])
heart_rate_ss_3=interpolation_fn(train_ss_3_df["time"],ecg_bio_ss_3["heart_rate_ts"],ecg_bio_ss_3["heart_rate"])
heart_rate_ss_4=interpolation_fn(train_ss_4_df["time"],ecg_bio_ss_4["heart_rate_ts"],ecg_bio_ss_4["heart_rate"])
heart_rate_ss_5=interpolation_fn(train_ss_5_df["time"],ecg_bio_ss_5["heart_rate_ts"],ecg_bio_ss_5["heart_rate"])
heart_rate_ss_6=interpolation_fn(train_ss_6_df["time"],ecg_bio_ss_6["heart_rate_ts"],ecg_bio_ss_6["heart_rate"])
heart_rate_ss_7=interpolation_fn(train_ss_7_df["time"],ecg_bio_ss_7["heart_rate_ts"],ecg_bio_ss_7["heart_rate"])
heart_rate_ss_8=interpolation_fn(train_ss_8_df["time"],ecg_bio_ss_8["heart_rate_ts"],ecg_bio_ss_8["heart_rate"])
heart_rate_ss_13=interpolation_fn(train_ss_13_df["time"],ecg_bio_ss_13["heart_rate_ts"],ecg_bio_ss_13["heart_rate"])

In [28]:
# concatenate the data in the same order in the train dataframe because we will be including this with the other features
heart_rate = np.concatenate(
                (heart_rate_ca_1, heart_rate_da_1, heart_rate_ss_1,
                heart_rate_ca_2, heart_rate_da_2, heart_rate_ss_2,
                heart_rate_ca_3, heart_rate_da_3, heart_rate_ss_3,
                heart_rate_ca_4, heart_rate_da_4, heart_rate_ss_4,
                heart_rate_ca_5, heart_rate_da_5, heart_rate_ss_5,
                heart_rate_ca_6, heart_rate_da_6, heart_rate_ss_6,
                heart_rate_ca_7, heart_rate_da_7, heart_rate_ss_7,
                heart_rate_ca_8, heart_rate_da_8, heart_rate_ss_8,
                heart_rate_ca_13, heart_rate_da_13, heart_rate_ss_13)
             )

In [29]:
heart_rate.shape

(4867421,)

In [59]:
X_train["heart_rate"] = heart_rate

### 2.3 Respiration rate from respiration data
#### 2.3.1 Derive respiration rate

In [30]:
# for state "CA"
resp_bio_ca_1=resp.resp(train_ca_1_df["r"],show=False)
resp_bio_ca_2=resp.resp(train_ca_2_df["r"],show=False)
resp_bio_ca_3=resp.resp(train_ca_3_df["r"],show=False)
resp_bio_ca_4=resp.resp(train_ca_4_df["r"],show=False)
resp_bio_ca_5=resp.resp(train_ca_5_df["r"],show=False)
resp_bio_ca_6=resp.resp(train_ca_6_df["r"],show=False)
resp_bio_ca_7=resp.resp(train_ca_7_df["r"],show=False)
resp_bio_ca_8=resp.resp(train_ca_8_df["r"],show=False)
resp_bio_ca_13=resp.resp(train_ca_13_df["r"],show=False)

# for state "DA"
resp_bio_da_1=resp.resp(train_da_1_df["r"],show=False)
resp_bio_da_2=resp.resp(train_da_2_df["r"],show=False)
resp_bio_da_3=resp.resp(train_da_3_df["r"],show=False)
resp_bio_da_4=resp.resp(train_da_4_df["r"],show=False)
resp_bio_da_5=resp.resp(train_da_5_df["r"],show=False)
resp_bio_da_6=resp.resp(train_da_6_df["r"],show=False)
resp_bio_da_7=resp.resp(train_da_7_df["r"],show=False)
resp_bio_da_8=resp.resp(train_da_8_df["r"],show=False)
resp_bio_da_13=resp.resp(train_da_13_df["r"],show=False)

# for state "SS"
resp_bio_ss_1=resp.resp(train_ss_1_df["r"],show=False)
resp_bio_ss_2=resp.resp(train_ss_2_df["r"],show=False)
resp_bio_ss_3=resp.resp(train_ss_3_df["r"],show=False)
resp_bio_ss_4=resp.resp(train_ss_4_df["r"],show=False)
resp_bio_ss_5=resp.resp(train_ss_5_df["r"],show=False)
resp_bio_ss_6=resp.resp(train_ss_6_df["r"],show=False)
resp_bio_ss_7=resp.resp(train_ss_7_df["r"],show=False)
resp_bio_ss_8=resp.resp(train_ss_8_df["r"],show=False)
resp_bio_ss_13=resp.resp(train_ss_13_df["r"],show=False)

#### 2.3.2 Interpolation for respiration rate

In [34]:
# for state "CA"
resp_rate_ca_1 = interpolation_fn(train_ca_1_df["time"],resp_bio_ca_1["resp_rate_ts"],resp_bio_ca_1["resp_rate"])
resp_rate_ca_2=interpolation_fn(train_ca_2_df["time"],resp_bio_ca_2["resp_rate_ts"],resp_bio_ca_2["resp_rate"])
resp_rate_ca_3=interpolation_fn(train_ca_3_df["time"],resp_bio_ca_3["resp_rate_ts"],resp_bio_ca_3["resp_rate"])
resp_rate_ca_4=interpolation_fn(train_ca_4_df["time"],resp_bio_ca_4["resp_rate_ts"],resp_bio_ca_4["resp_rate"])
resp_rate_ca_5=interpolation_fn(train_ca_5_df["time"],resp_bio_ca_5["resp_rate_ts"],resp_bio_ca_5["resp_rate"])
resp_rate_ca_6=interpolation_fn(train_ca_6_df["time"],resp_bio_ca_6["resp_rate_ts"],resp_bio_ca_6["resp_rate"])
resp_rate_ca_7=interpolation_fn(train_ca_7_df["time"],resp_bio_ca_7["resp_rate_ts"],resp_bio_ca_7["resp_rate"])
resp_rate_ca_8=interpolation_fn(train_ca_8_df["time"],resp_bio_ca_8["resp_rate_ts"],resp_bio_ca_8["resp_rate"])
resp_rate_ca_13=interpolation_fn(train_ca_13_df["time"],resp_bio_ca_13["resp_rate_ts"],resp_bio_ca_13["resp_rate"])

# for state "DA"
resp_rate_da_1=interpolation_fn(train_da_1_df["time"],resp_bio_da_1["resp_rate_ts"],resp_bio_da_1["resp_rate"])
resp_rate_da_2=interpolation_fn(train_da_2_df["time"],resp_bio_da_2["resp_rate_ts"],resp_bio_da_2["resp_rate"])
resp_rate_da_3=interpolation_fn(train_da_3_df["time"],resp_bio_da_3["resp_rate_ts"],resp_bio_da_3["resp_rate"])
resp_rate_da_4=interpolation_fn(train_da_4_df["time"],resp_bio_da_4["resp_rate_ts"],resp_bio_da_4["resp_rate"])
resp_rate_da_5=interpolation_fn(train_da_5_df["time"],resp_bio_da_5["resp_rate_ts"],resp_bio_da_5["resp_rate"])
resp_rate_da_6=interpolation_fn(train_da_6_df["time"],resp_bio_da_6["resp_rate_ts"],resp_bio_da_6["resp_rate"])
resp_rate_da_7=interpolation_fn(train_da_7_df["time"],resp_bio_da_7["resp_rate_ts"],resp_bio_da_7["resp_rate"])
resp_rate_da_8=interpolation_fn(train_da_8_df["time"],resp_bio_da_8["resp_rate_ts"],resp_bio_da_8["resp_rate"])
resp_rate_da_13=interpolation_fn(train_da_13_df["time"],resp_bio_da_13["resp_rate_ts"],resp_bio_da_13["resp_rate"])

# for state "SS"
resp_rate_ss_1=interpolation_fn(train_ss_1_df["time"],resp_bio_ss_1["resp_rate_ts"],resp_bio_ss_1["resp_rate"])
resp_rate_ss_2=interpolation_fn(train_ss_2_df["time"],resp_bio_ss_2["resp_rate_ts"],resp_bio_ss_2["resp_rate"])
resp_rate_ss_3=interpolation_fn(train_ss_3_df["time"],resp_bio_ss_3["resp_rate_ts"],resp_bio_ss_3["resp_rate"])
resp_rate_ss_4=interpolation_fn(train_ss_4_df["time"],resp_bio_ss_4["resp_rate_ts"],resp_bio_ss_4["resp_rate"])
resp_rate_ss_5=interpolation_fn(train_ss_5_df["time"],resp_bio_ss_5["resp_rate_ts"],resp_bio_ss_5["resp_rate"])
resp_rate_ss_6=interpolation_fn(train_ss_6_df["time"],resp_bio_ss_6["resp_rate_ts"],resp_bio_ss_6["resp_rate"])
resp_rate_ss_7=interpolation_fn(train_ss_7_df["time"],resp_bio_ss_7["resp_rate_ts"],resp_bio_ss_7["resp_rate"])
resp_rate_ss_8=interpolation_fn(train_ss_8_df["time"],resp_bio_ss_8["resp_rate_ts"],resp_bio_ss_8["resp_rate"])
resp_rate_ss_13=interpolation_fn(train_ss_13_df["time"],resp_bio_ss_13["resp_rate_ts"],resp_bio_ss_13["resp_rate"])

In [37]:
# concatenate the data in the same order in the train dataframe because we will be including this with the other features
resp_rate = np.concatenate(
    (resp_rate_ca_1, resp_rate_da_1, resp_rate_ss_1,
    resp_rate_ca_2, resp_rate_da_2, resp_rate_ss_2,
    resp_rate_ca_3, resp_rate_da_3, resp_rate_ss_3,
    resp_rate_ca_4, resp_rate_da_4, resp_rate_ss_4,
    resp_rate_ca_5, resp_rate_da_5, resp_rate_ss_5,
    resp_rate_ca_6, resp_rate_da_6, resp_rate_ss_6,
    resp_rate_ca_7, resp_rate_da_7, resp_rate_ss_7,
    resp_rate_ca_8, resp_rate_da_8, resp_rate_ss_8,
    resp_rate_ca_13, resp_rate_da_13, resp_rate_ss_13)
)

In [38]:
resp_rate.shape

(4867421,)

In [58]:
X_train["resp_rate"] = resp_rate

### 2.4 Categorize EEG features

In [39]:
train_df.columns

Index(['crew', 'experiment', 'time', 'seat', 'eeg_fp1', 'eeg_f7', 'eeg_f8',
       'eeg_t4', 'eeg_t6', 'eeg_t5', 'eeg_t3', 'eeg_fp2', 'eeg_o1', 'eeg_p3',
       'eeg_pz', 'eeg_f3', 'eeg_fz', 'eeg_f4', 'eeg_c4', 'eeg_p4', 'eeg_poz',
       'eeg_c3', 'eeg_cz', 'eeg_o2', 'ecg', 'r', 'gsr', 'event'],
      dtype='object')

In [40]:
# list of EEG features
eeg_features = [
                "eeg_fp1", "eeg_f7", "eeg_f8", "eeg_t4", "eeg_t6", 
                "eeg_t5", "eeg_t3", "eeg_fp2", "eeg_o1", "eeg_p3", 
                "eeg_pz", "eeg_f3", "eeg_fz", "eeg_f4", "eeg_c4", 
                "eeg_p4", "eeg_poz", "eeg_c3", "eeg_cz", "eeg_o2"
               ]

#### 2.4.1 Derive frequency bands for EEG 

In [41]:
# for state "CA"
eeg_bio_ca_1=eeg.get_power_features(train_ca_1_df[eeg_features])
eeg_bio_ca_2=eeg.get_power_features(train_ca_2_df[eeg_features])
eeg_bio_ca_3=eeg.get_power_features(train_ca_3_df[eeg_features])
eeg_bio_ca_4=eeg.get_power_features(train_ca_4_df[eeg_features])
eeg_bio_ca_5=eeg.get_power_features(train_ca_5_df[eeg_features])
eeg_bio_ca_6=eeg.get_power_features(train_ca_6_df[eeg_features])
eeg_bio_ca_7=eeg.get_power_features(train_ca_7_df[eeg_features])
eeg_bio_ca_8=eeg.get_power_features(train_ca_8_df[eeg_features])
eeg_bio_ca_13=eeg.get_power_features(train_ca_13_df[eeg_features])

# for state "DA"
eeg_bio_da_1=eeg.get_power_features(train_da_1_df[eeg_features])
eeg_bio_da_2=eeg.get_power_features(train_da_2_df[eeg_features])
eeg_bio_da_3=eeg.get_power_features(train_da_3_df[eeg_features])
eeg_bio_da_4=eeg.get_power_features(train_da_4_df[eeg_features])
eeg_bio_da_5=eeg.get_power_features(train_da_5_df[eeg_features])
eeg_bio_da_6=eeg.get_power_features(train_da_6_df[eeg_features])
eeg_bio_da_7=eeg.get_power_features(train_da_7_df[eeg_features])
eeg_bio_da_8=eeg.get_power_features(train_da_8_df[eeg_features])
eeg_bio_da_13=eeg.get_power_features(train_da_13_df[eeg_features])

# for state "SS"
eeg_bio_ss_1=eeg.get_power_features(train_ss_1_df[eeg_features])
eeg_bio_ss_2=eeg.get_power_features(train_ss_2_df[eeg_features])
eeg_bio_ss_3=eeg.get_power_features(train_ss_3_df[eeg_features])
eeg_bio_ss_4=eeg.get_power_features(train_ss_4_df[eeg_features])
eeg_bio_ss_5=eeg.get_power_features(train_ss_5_df[eeg_features])
eeg_bio_ss_6=eeg.get_power_features(train_ss_6_df[eeg_features])
eeg_bio_ss_7=eeg.get_power_features(train_ss_7_df[eeg_features])
eeg_bio_ss_8=eeg.get_power_features(train_ss_8_df[eeg_features])
eeg_bio_ss_13=eeg.get_power_features(train_ss_13_df[eeg_features])

Unlike heart and respiration rate which have only one main output, here we have 5 categories with the values of 20 electrodes.
Hence, we have to separatly interpolate for the 5 different bands for each of the 20 electrodes.

#### 2.4.2. Interpolation for "theta" band

In [44]:
# extracting theta band frequency
theta_feature=[]
for i in eeg_features:
    theta_feature.append(i+"_theta")

for i in range(20):
    # we take the theta frequency band and do the interpolation and store the data in the dataframe 
    theta_ca_1=interpolation_fn(train_ca_1_df["time"],eeg_bio_ca_1["ts"],eeg_bio_ca_1["theta"][:,i])
    theta_ca_2=interpolation_fn(train_ca_2_df["time"],eeg_bio_ca_2["ts"],eeg_bio_ca_2["theta"][:,i])
    theta_ca_3=interpolation_fn(train_ca_3_df["time"],eeg_bio_ca_3["ts"],eeg_bio_ca_3["theta"][:,i])
    theta_ca_4=interpolation_fn(train_ca_4_df["time"],eeg_bio_ca_4["ts"],eeg_bio_ca_4["theta"][:,i])
    theta_ca_5=interpolation_fn(train_ca_5_df["time"],eeg_bio_ca_5["ts"],eeg_bio_ca_5["theta"][:,i])
    theta_ca_6=interpolation_fn(train_ca_6_df["time"],eeg_bio_ca_6["ts"],eeg_bio_ca_6["theta"][:,i])
    theta_ca_7=interpolation_fn(train_ca_7_df["time"],eeg_bio_ca_7["ts"],eeg_bio_ca_7["theta"][:,i])
    theta_ca_8=interpolation_fn(train_ca_8_df["time"],eeg_bio_ca_8["ts"],eeg_bio_ca_8["theta"][:,i])
    theta_ca_13=interpolation_fn(train_ca_13_df["time"],eeg_bio_ca_13["ts"],eeg_bio_ca_13["theta"][:,i])
    
    theta_da_1=interpolation_fn(train_da_1_df["time"],eeg_bio_da_1["ts"],eeg_bio_da_1["theta"][:,i])
    theta_da_2=interpolation_fn(train_da_2_df["time"],eeg_bio_da_2["ts"],eeg_bio_da_2["theta"][:,i])
    theta_da_3=interpolation_fn(train_da_3_df["time"],eeg_bio_da_3["ts"],eeg_bio_da_3["theta"][:,i])
    theta_da_4=interpolation_fn(train_da_4_df["time"],eeg_bio_da_4["ts"],eeg_bio_da_4["theta"][:,i])
    theta_da_5=interpolation_fn(train_da_5_df["time"],eeg_bio_da_5["ts"],eeg_bio_da_5["theta"][:,i])
    theta_da_6=interpolation_fn(train_da_6_df["time"],eeg_bio_da_6["ts"],eeg_bio_da_6["theta"][:,i])
    theta_da_7=interpolation_fn(train_da_7_df["time"],eeg_bio_da_7["ts"],eeg_bio_da_7["theta"][:,i])
    theta_da_8=interpolation_fn(train_da_8_df["time"],eeg_bio_da_8["ts"],eeg_bio_da_8["theta"][:,i])
    theta_da_13=interpolation_fn(train_da_13_df["time"],eeg_bio_da_13["ts"],eeg_bio_da_13["theta"][:,i])
    
    theta_ss_1=interpolation_fn(train_ss_1_df["time"],eeg_bio_ss_1["ts"],eeg_bio_ss_1["theta"][:,i])
    theta_ss_2=interpolation_fn(train_ss_2_df["time"],eeg_bio_ss_2["ts"],eeg_bio_ss_2["theta"][:,i])
    theta_ss_3=interpolation_fn(train_ss_3_df["time"],eeg_bio_ss_3["ts"],eeg_bio_ss_3["theta"][:,i])
    theta_ss_4=interpolation_fn(train_ss_4_df["time"],eeg_bio_ss_4["ts"],eeg_bio_ss_4["theta"][:,i])
    theta_ss_5=interpolation_fn(train_ss_5_df["time"],eeg_bio_ss_5["ts"],eeg_bio_ss_5["theta"][:,i])
    theta_ss_6=interpolation_fn(train_ss_6_df["time"],eeg_bio_ss_6["ts"],eeg_bio_ss_6["theta"][:,i])
    theta_ss_7=interpolation_fn(train_ss_7_df["time"],eeg_bio_ss_7["ts"],eeg_bio_ss_7["theta"][:,i])
    theta_ss_8=interpolation_fn(train_ss_8_df["time"],eeg_bio_ss_8["ts"],eeg_bio_ss_8["theta"][:,i])
    theta_ss_13=interpolation_fn(train_ss_13_df["time"],eeg_bio_ss_13["ts"],eeg_bio_ss_13["theta"][:,i])

    
    # concatenate the data in the same order in the train dataframe because we will be including this with the other features
    theta=np.concatenate(
        (theta_ca_1, theta_ca_2, theta_ca_3, theta_ca_4, theta_ca_5, theta_ca_6, theta_ca_7, theta_ca_8, theta_ca_13,
         theta_da_1, theta_da_2, theta_da_3, theta_da_4, theta_da_5, theta_da_6, theta_da_7, theta_da_8, theta_da_13,
         theta_ss_1, theta_ss_2, theta_ss_3, theta_ss_4, theta_ss_5, theta_ss_6, theta_ss_7, theta_ss_8, theta_ss_13)
    )

    X_train[theta_feature[i]]=theta

#### 2.4.2. Interpolation for "alpha low" band

In [45]:
# extracting alpha_low band frequency
alpha_low_feature=[]
for i in eeg_features:
    alpha_low_feature.append(i+"_alpha_low")

for i in range(20):
    # we take the alpha_low frequency band and do the interpolation and store the data in the dataframe 
    alpha_low_ca_1=interpolation_fn(train_ca_1_df["time"],eeg_bio_ca_1["ts"],eeg_bio_ca_1["alpha_low"][:,i])
    alpha_low_ca_2=interpolation_fn(train_ca_2_df["time"],eeg_bio_ca_2["ts"],eeg_bio_ca_2["alpha_low"][:,i])
    alpha_low_ca_3=interpolation_fn(train_ca_3_df["time"],eeg_bio_ca_3["ts"],eeg_bio_ca_3["alpha_low"][:,i])
    alpha_low_ca_4=interpolation_fn(train_ca_4_df["time"],eeg_bio_ca_4["ts"],eeg_bio_ca_4["alpha_low"][:,i])
    alpha_low_ca_5=interpolation_fn(train_ca_5_df["time"],eeg_bio_ca_5["ts"],eeg_bio_ca_5["alpha_low"][:,i])
    alpha_low_ca_6=interpolation_fn(train_ca_6_df["time"],eeg_bio_ca_6["ts"],eeg_bio_ca_6["alpha_low"][:,i])
    alpha_low_ca_7=interpolation_fn(train_ca_7_df["time"],eeg_bio_ca_7["ts"],eeg_bio_ca_7["alpha_low"][:,i])
    alpha_low_ca_8=interpolation_fn(train_ca_8_df["time"],eeg_bio_ca_8["ts"],eeg_bio_ca_8["alpha_low"][:,i])
    alpha_low_ca_13=interpolation_fn(train_ca_13_df["time"],eeg_bio_ca_13["ts"],eeg_bio_ca_13["alpha_low"][:,i])
    
    alpha_low_da_1=interpolation_fn(train_da_1_df["time"],eeg_bio_da_1["ts"],eeg_bio_da_1["alpha_low"][:,i])
    alpha_low_da_2=interpolation_fn(train_da_2_df["time"],eeg_bio_da_2["ts"],eeg_bio_da_2["alpha_low"][:,i])
    alpha_low_da_3=interpolation_fn(train_da_3_df["time"],eeg_bio_da_3["ts"],eeg_bio_da_3["alpha_low"][:,i])
    alpha_low_da_4=interpolation_fn(train_da_4_df["time"],eeg_bio_da_4["ts"],eeg_bio_da_4["alpha_low"][:,i])
    alpha_low_da_5=interpolation_fn(train_da_5_df["time"],eeg_bio_da_5["ts"],eeg_bio_da_5["alpha_low"][:,i])
    alpha_low_da_6=interpolation_fn(train_da_6_df["time"],eeg_bio_da_6["ts"],eeg_bio_da_6["alpha_low"][:,i])
    alpha_low_da_7=interpolation_fn(train_da_7_df["time"],eeg_bio_da_7["ts"],eeg_bio_da_7["alpha_low"][:,i])
    alpha_low_da_8=interpolation_fn(train_da_8_df["time"],eeg_bio_da_8["ts"],eeg_bio_da_8["alpha_low"][:,i])
    alpha_low_da_13=interpolation_fn(train_da_13_df["time"],eeg_bio_da_13["ts"],eeg_bio_da_13["alpha_low"][:,i])
    
    alpha_low_ss_1=interpolation_fn(train_ss_1_df["time"],eeg_bio_ss_1["ts"],eeg_bio_ss_1["alpha_low"][:,i])
    alpha_low_ss_2=interpolation_fn(train_ss_2_df["time"],eeg_bio_ss_2["ts"],eeg_bio_ss_2["alpha_low"][:,i])
    alpha_low_ss_3=interpolation_fn(train_ss_3_df["time"],eeg_bio_ss_3["ts"],eeg_bio_ss_3["alpha_low"][:,i])
    alpha_low_ss_4=interpolation_fn(train_ss_4_df["time"],eeg_bio_ss_4["ts"],eeg_bio_ss_4["alpha_low"][:,i])
    alpha_low_ss_5=interpolation_fn(train_ss_5_df["time"],eeg_bio_ss_5["ts"],eeg_bio_ss_5["alpha_low"][:,i])
    alpha_low_ss_6=interpolation_fn(train_ss_6_df["time"],eeg_bio_ss_6["ts"],eeg_bio_ss_6["alpha_low"][:,i])
    alpha_low_ss_7=interpolation_fn(train_ss_7_df["time"],eeg_bio_ss_7["ts"],eeg_bio_ss_7["alpha_low"][:,i])
    alpha_low_ss_8=interpolation_fn(train_ss_8_df["time"],eeg_bio_ss_8["ts"],eeg_bio_ss_8["alpha_low"][:,i])
    alpha_low_ss_13=interpolation_fn(train_ss_13_df["time"],eeg_bio_ss_13["ts"],eeg_bio_ss_13["alpha_low"][:,i])

    
    # concatenate the data in the same order in the train dataframe because we will be including this with the other features
    alpha_low=np.concatenate(
        (alpha_low_ca_1, alpha_low_ca_2, alpha_low_ca_3, alpha_low_ca_4, alpha_low_ca_5, alpha_low_ca_6, alpha_low_ca_7, alpha_low_ca_8, alpha_low_ca_13,
         alpha_low_da_1, alpha_low_da_2, alpha_low_da_3, alpha_low_da_4, alpha_low_da_5, alpha_low_da_6, alpha_low_da_7, alpha_low_da_8, alpha_low_da_13,
         alpha_low_ss_1, alpha_low_ss_2, alpha_low_ss_3, alpha_low_ss_4, alpha_low_ss_5, alpha_low_ss_6, alpha_low_ss_7, alpha_low_ss_8, alpha_low_ss_13)
    )

    X_train[alpha_low_feature[i]]=alpha_low

In [46]:
alpha_low.shape

(4867421,)

#### 2.4.3. Interpolation for "alpha high" band

In [47]:
# extracting alpha_high band frequency
alpha_high_feature=[]
for i in eeg_features:
    alpha_high_feature.append(i+"_alpha_high")

for i in range(20):
    # we take the alpha_high frequency band and do the interpolation and store the data in the dataframe 
    alpha_high_ca_1=interpolation_fn(train_ca_1_df["time"],eeg_bio_ca_1["ts"],eeg_bio_ca_1["alpha_high"][:,i])
    alpha_high_ca_2=interpolation_fn(train_ca_2_df["time"],eeg_bio_ca_2["ts"],eeg_bio_ca_2["alpha_high"][:,i])
    alpha_high_ca_3=interpolation_fn(train_ca_3_df["time"],eeg_bio_ca_3["ts"],eeg_bio_ca_3["alpha_high"][:,i])
    alpha_high_ca_4=interpolation_fn(train_ca_4_df["time"],eeg_bio_ca_4["ts"],eeg_bio_ca_4["alpha_high"][:,i])
    alpha_high_ca_5=interpolation_fn(train_ca_5_df["time"],eeg_bio_ca_5["ts"],eeg_bio_ca_5["alpha_high"][:,i])
    alpha_high_ca_6=interpolation_fn(train_ca_6_df["time"],eeg_bio_ca_6["ts"],eeg_bio_ca_6["alpha_high"][:,i])
    alpha_high_ca_7=interpolation_fn(train_ca_7_df["time"],eeg_bio_ca_7["ts"],eeg_bio_ca_7["alpha_high"][:,i])
    alpha_high_ca_8=interpolation_fn(train_ca_8_df["time"],eeg_bio_ca_8["ts"],eeg_bio_ca_8["alpha_high"][:,i])
    alpha_high_ca_13=interpolation_fn(train_ca_13_df["time"],eeg_bio_ca_13["ts"],eeg_bio_ca_13["alpha_high"][:,i])
    
    alpha_high_da_1=interpolation_fn(train_da_1_df["time"],eeg_bio_da_1["ts"],eeg_bio_da_1["alpha_high"][:,i])
    alpha_high_da_2=interpolation_fn(train_da_2_df["time"],eeg_bio_da_2["ts"],eeg_bio_da_2["alpha_high"][:,i])
    alpha_high_da_3=interpolation_fn(train_da_3_df["time"],eeg_bio_da_3["ts"],eeg_bio_da_3["alpha_high"][:,i])
    alpha_high_da_4=interpolation_fn(train_da_4_df["time"],eeg_bio_da_4["ts"],eeg_bio_da_4["alpha_high"][:,i])
    alpha_high_da_5=interpolation_fn(train_da_5_df["time"],eeg_bio_da_5["ts"],eeg_bio_da_5["alpha_high"][:,i])
    alpha_high_da_6=interpolation_fn(train_da_6_df["time"],eeg_bio_da_6["ts"],eeg_bio_da_6["alpha_high"][:,i])
    alpha_high_da_7=interpolation_fn(train_da_7_df["time"],eeg_bio_da_7["ts"],eeg_bio_da_7["alpha_high"][:,i])
    alpha_high_da_8=interpolation_fn(train_da_8_df["time"],eeg_bio_da_8["ts"],eeg_bio_da_8["alpha_high"][:,i])
    alpha_high_da_13=interpolation_fn(train_da_13_df["time"],eeg_bio_da_13["ts"],eeg_bio_da_13["alpha_high"][:,i])
    
    alpha_high_ss_1=interpolation_fn(train_ss_1_df["time"],eeg_bio_ss_1["ts"],eeg_bio_ss_1["alpha_high"][:,i])
    alpha_high_ss_2=interpolation_fn(train_ss_2_df["time"],eeg_bio_ss_2["ts"],eeg_bio_ss_2["alpha_high"][:,i])
    alpha_high_ss_3=interpolation_fn(train_ss_3_df["time"],eeg_bio_ss_3["ts"],eeg_bio_ss_3["alpha_high"][:,i])
    alpha_high_ss_4=interpolation_fn(train_ss_4_df["time"],eeg_bio_ss_4["ts"],eeg_bio_ss_4["alpha_high"][:,i])
    alpha_high_ss_5=interpolation_fn(train_ss_5_df["time"],eeg_bio_ss_5["ts"],eeg_bio_ss_5["alpha_high"][:,i])
    alpha_high_ss_6=interpolation_fn(train_ss_6_df["time"],eeg_bio_ss_6["ts"],eeg_bio_ss_6["alpha_high"][:,i])
    alpha_high_ss_7=interpolation_fn(train_ss_7_df["time"],eeg_bio_ss_7["ts"],eeg_bio_ss_7["alpha_high"][:,i])
    alpha_high_ss_8=interpolation_fn(train_ss_8_df["time"],eeg_bio_ss_8["ts"],eeg_bio_ss_8["alpha_high"][:,i])
    alpha_high_ss_13=interpolation_fn(train_ss_13_df["time"],eeg_bio_ss_13["ts"],eeg_bio_ss_13["alpha_high"][:,i])

    
    # concatenate the data in the same order in the train dataframe because we will be including this with the other features
    alpha_high=np.concatenate(
        (alpha_high_ca_1, alpha_high_ca_2, alpha_high_ca_3, alpha_high_ca_4, alpha_high_ca_5, alpha_high_ca_6, alpha_high_ca_7, alpha_high_ca_8, alpha_high_ca_13,
         alpha_high_da_1, alpha_high_da_2, alpha_high_da_3, alpha_high_da_4, alpha_high_da_5, alpha_high_da_6, alpha_high_da_7, alpha_high_da_8, alpha_high_da_13,
         alpha_high_ss_1, alpha_high_ss_2, alpha_high_ss_3, alpha_high_ss_4, alpha_high_ss_5, alpha_high_ss_6, alpha_high_ss_7, alpha_high_ss_8, alpha_high_ss_13)
    )

    X_train[alpha_high_feature[i]]=alpha_high

#### 2.4.4. Interpolation for "beta" band

In [48]:
# extracting beta band frequency
beta_feature=[]
for i in eeg_features:
    beta_feature.append(i+"_beta")

for i in range(20):
    # we take the beta frequency band and do the interpolation and store the data in the dataframe 
    beta_ca_1=interpolation_fn(train_ca_1_df["time"],eeg_bio_ca_1["ts"],eeg_bio_ca_1["beta"][:,i])
    beta_ca_2=interpolation_fn(train_ca_2_df["time"],eeg_bio_ca_2["ts"],eeg_bio_ca_2["beta"][:,i])
    beta_ca_3=interpolation_fn(train_ca_3_df["time"],eeg_bio_ca_3["ts"],eeg_bio_ca_3["beta"][:,i])
    beta_ca_4=interpolation_fn(train_ca_4_df["time"],eeg_bio_ca_4["ts"],eeg_bio_ca_4["beta"][:,i])
    beta_ca_5=interpolation_fn(train_ca_5_df["time"],eeg_bio_ca_5["ts"],eeg_bio_ca_5["beta"][:,i])
    beta_ca_6=interpolation_fn(train_ca_6_df["time"],eeg_bio_ca_6["ts"],eeg_bio_ca_6["beta"][:,i])
    beta_ca_7=interpolation_fn(train_ca_7_df["time"],eeg_bio_ca_7["ts"],eeg_bio_ca_7["beta"][:,i])
    beta_ca_8=interpolation_fn(train_ca_8_df["time"],eeg_bio_ca_8["ts"],eeg_bio_ca_8["beta"][:,i])
    beta_ca_13=interpolation_fn(train_ca_13_df["time"],eeg_bio_ca_13["ts"],eeg_bio_ca_13["beta"][:,i])
    
    beta_da_1=interpolation_fn(train_da_1_df["time"],eeg_bio_da_1["ts"],eeg_bio_da_1["beta"][:,i])
    beta_da_2=interpolation_fn(train_da_2_df["time"],eeg_bio_da_2["ts"],eeg_bio_da_2["beta"][:,i])
    beta_da_3=interpolation_fn(train_da_3_df["time"],eeg_bio_da_3["ts"],eeg_bio_da_3["beta"][:,i])
    beta_da_4=interpolation_fn(train_da_4_df["time"],eeg_bio_da_4["ts"],eeg_bio_da_4["beta"][:,i])
    beta_da_5=interpolation_fn(train_da_5_df["time"],eeg_bio_da_5["ts"],eeg_bio_da_5["beta"][:,i])
    beta_da_6=interpolation_fn(train_da_6_df["time"],eeg_bio_da_6["ts"],eeg_bio_da_6["beta"][:,i])
    beta_da_7=interpolation_fn(train_da_7_df["time"],eeg_bio_da_7["ts"],eeg_bio_da_7["beta"][:,i])
    beta_da_8=interpolation_fn(train_da_8_df["time"],eeg_bio_da_8["ts"],eeg_bio_da_8["beta"][:,i])
    beta_da_13=interpolation_fn(train_da_13_df["time"],eeg_bio_da_13["ts"],eeg_bio_da_13["beta"][:,i])
    
    beta_ss_1=interpolation_fn(train_ss_1_df["time"],eeg_bio_ss_1["ts"],eeg_bio_ss_1["beta"][:,i])
    beta_ss_2=interpolation_fn(train_ss_2_df["time"],eeg_bio_ss_2["ts"],eeg_bio_ss_2["beta"][:,i])
    beta_ss_3=interpolation_fn(train_ss_3_df["time"],eeg_bio_ss_3["ts"],eeg_bio_ss_3["beta"][:,i])
    beta_ss_4=interpolation_fn(train_ss_4_df["time"],eeg_bio_ss_4["ts"],eeg_bio_ss_4["beta"][:,i])
    beta_ss_5=interpolation_fn(train_ss_5_df["time"],eeg_bio_ss_5["ts"],eeg_bio_ss_5["beta"][:,i])
    beta_ss_6=interpolation_fn(train_ss_6_df["time"],eeg_bio_ss_6["ts"],eeg_bio_ss_6["beta"][:,i])
    beta_ss_7=interpolation_fn(train_ss_7_df["time"],eeg_bio_ss_7["ts"],eeg_bio_ss_7["beta"][:,i])
    beta_ss_8=interpolation_fn(train_ss_8_df["time"],eeg_bio_ss_8["ts"],eeg_bio_ss_8["beta"][:,i])
    beta_ss_13=interpolation_fn(train_ss_13_df["time"],eeg_bio_ss_13["ts"],eeg_bio_ss_13["beta"][:,i])

    
    # concatenate the data in the same order in the train dataframe because we will be including this with the other features
    beta=np.concatenate(
        (beta_ca_1, beta_ca_2, beta_ca_3, beta_ca_4, beta_ca_5, beta_ca_6, beta_ca_7, beta_ca_8, beta_ca_13,
         beta_da_1, beta_da_2, beta_da_3, beta_da_4, beta_da_5, beta_da_6, beta_da_7, beta_da_8, beta_da_13,
         beta_ss_1, beta_ss_2, beta_ss_3, beta_ss_4, beta_ss_5, beta_ss_6, beta_ss_7, beta_ss_8, beta_ss_13)
    )

    X_train[beta_feature[i]]=beta

#### 2.4.5. Interpolation for "gamma" band

In [49]:
# extracting gamma band frequency
gamma_feature=[]
for i in eeg_features:
    gamma_feature.append(i+"_gamma")

for i in range(20):
    # we take the gamma frequency band and do the interpolation and store the data in the dataframe 
    gamma_ca_1=interpolation_fn(train_ca_1_df["time"],eeg_bio_ca_1["ts"],eeg_bio_ca_1["gamma"][:,i])
    gamma_ca_2=interpolation_fn(train_ca_2_df["time"],eeg_bio_ca_2["ts"],eeg_bio_ca_2["gamma"][:,i])
    gamma_ca_3=interpolation_fn(train_ca_3_df["time"],eeg_bio_ca_3["ts"],eeg_bio_ca_3["gamma"][:,i])
    gamma_ca_4=interpolation_fn(train_ca_4_df["time"],eeg_bio_ca_4["ts"],eeg_bio_ca_4["gamma"][:,i])
    gamma_ca_5=interpolation_fn(train_ca_5_df["time"],eeg_bio_ca_5["ts"],eeg_bio_ca_5["gamma"][:,i])
    gamma_ca_6=interpolation_fn(train_ca_6_df["time"],eeg_bio_ca_6["ts"],eeg_bio_ca_6["gamma"][:,i])
    gamma_ca_7=interpolation_fn(train_ca_7_df["time"],eeg_bio_ca_7["ts"],eeg_bio_ca_7["gamma"][:,i])
    gamma_ca_8=interpolation_fn(train_ca_8_df["time"],eeg_bio_ca_8["ts"],eeg_bio_ca_8["gamma"][:,i])
    gamma_ca_13=interpolation_fn(train_ca_13_df["time"],eeg_bio_ca_13["ts"],eeg_bio_ca_13["gamma"][:,i])
    
    gamma_da_1=interpolation_fn(train_da_1_df["time"],eeg_bio_da_1["ts"],eeg_bio_da_1["gamma"][:,i])
    gamma_da_2=interpolation_fn(train_da_2_df["time"],eeg_bio_da_2["ts"],eeg_bio_da_2["gamma"][:,i])
    gamma_da_3=interpolation_fn(train_da_3_df["time"],eeg_bio_da_3["ts"],eeg_bio_da_3["gamma"][:,i])
    gamma_da_4=interpolation_fn(train_da_4_df["time"],eeg_bio_da_4["ts"],eeg_bio_da_4["gamma"][:,i])
    gamma_da_5=interpolation_fn(train_da_5_df["time"],eeg_bio_da_5["ts"],eeg_bio_da_5["gamma"][:,i])
    gamma_da_6=interpolation_fn(train_da_6_df["time"],eeg_bio_da_6["ts"],eeg_bio_da_6["gamma"][:,i])
    gamma_da_7=interpolation_fn(train_da_7_df["time"],eeg_bio_da_7["ts"],eeg_bio_da_7["gamma"][:,i])
    gamma_da_8=interpolation_fn(train_da_8_df["time"],eeg_bio_da_8["ts"],eeg_bio_da_8["gamma"][:,i])
    gamma_da_13=interpolation_fn(train_da_13_df["time"],eeg_bio_da_13["ts"],eeg_bio_da_13["gamma"][:,i])
    
    gamma_ss_1=interpolation_fn(train_ss_1_df["time"],eeg_bio_ss_1["ts"],eeg_bio_ss_1["gamma"][:,i])
    gamma_ss_2=interpolation_fn(train_ss_2_df["time"],eeg_bio_ss_2["ts"],eeg_bio_ss_2["gamma"][:,i])
    gamma_ss_3=interpolation_fn(train_ss_3_df["time"],eeg_bio_ss_3["ts"],eeg_bio_ss_3["gamma"][:,i])
    gamma_ss_4=interpolation_fn(train_ss_4_df["time"],eeg_bio_ss_4["ts"],eeg_bio_ss_4["gamma"][:,i])
    gamma_ss_5=interpolation_fn(train_ss_5_df["time"],eeg_bio_ss_5["ts"],eeg_bio_ss_5["gamma"][:,i])
    gamma_ss_6=interpolation_fn(train_ss_6_df["time"],eeg_bio_ss_6["ts"],eeg_bio_ss_6["gamma"][:,i])
    gamma_ss_7=interpolation_fn(train_ss_7_df["time"],eeg_bio_ss_7["ts"],eeg_bio_ss_7["gamma"][:,i])
    gamma_ss_8=interpolation_fn(train_ss_8_df["time"],eeg_bio_ss_8["ts"],eeg_bio_ss_8["gamma"][:,i])
    gamma_ss_13=interpolation_fn(train_ss_13_df["time"],eeg_bio_ss_13["ts"],eeg_bio_ss_13["gamma"][:,i])

    
    # concatenate the data in the same order in the train dataframe because we will be including this with the other features 
    gamma=np.concatenate(
        (gamma_ca_1, gamma_ca_2, gamma_ca_3, gamma_ca_4, gamma_ca_5, gamma_ca_6, gamma_ca_7, gamma_ca_8, gamma_ca_13,
         gamma_da_1, gamma_da_2, gamma_da_3, gamma_da_4, gamma_da_5, gamma_da_6, gamma_da_7, gamma_da_8, gamma_da_13,
         gamma_ss_1, gamma_ss_2, gamma_ss_3, gamma_ss_4, gamma_ss_5, gamma_ss_6, gamma_ss_7, gamma_ss_8, gamma_ss_13)
    )

    X_train[gamma_feature[i]]=gamma

## 3. Convert output labels to integers

In [50]:
# assign int values
event_labels={"A":0,"B":1,"C":2,"D":3}
events=train_df["event"]
events=list(map(lambda i:event_labels[i],events))
X_train["y_label"]=events

In [60]:
X_train.shape

(4867421, 138)

We have 138 columns now:
1. 100 eeg band features
2. 11 for categorical
3. 2 for heart and respiration rate
4. 20 for EEG electrodes 
5. 'time', 'ECG', 'GSR' and 'r'
6. 1 Output label

## 4. Save final data

In [None]:
# save the processed data to csv
X_train.to_csv("final.csv")