# Notebook for feature engineering

# Import necessary libraries

In [None]:
import pandas as pd
import numpy as np
import joblib
pd.set_option('max_columns', None)
pd.set_option('max_rows', None)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# Import Training Data

In [None]:
train = pd.read_csv('../input/reducing-commercial-aviation-fatalities/train.csv')

In [None]:
train.head()

In [None]:
train.shape

In [None]:
train.describe()

# Visualizations

In [None]:
crew = 3
seat = 0
exp = 'DA'
ev = 'D'

sel = (train.crew == crew) & (train.experiment == exp) & (train.seat == seat)
pilot_info = train.loc[sel,:].sort_values(by='time')

plt.figure(figsize=[16,12])
for i in range(4, 27):
    plt.subplot(6,4,i-3)
    plt.plot(pilot_info.time,
            pilot_info.iloc[:,i], zorder=1)
    plt.scatter(pilot_info.loc[pilot_info.event == ev,:].time,
               pilot_info.loc[pilot_info.event == ev,:].iloc[:,i], c='red', zorder=2, s=1)
    plt.title(pilot_info.columns[i])
    
plt.tight_layout()
plt.show()

In [None]:
crew = 3
seat = 0
exp = 'CA'
ev = 'B'

sel = (train.crew == crew) & (train.experiment == exp) & (train.seat == seat)
pilot_info = train.loc[sel,:].sort_values(by='time')

plt.figure(figsize=[16,12])
for i in range(4, 27):
    plt.subplot(6,4,i-3)
    plt.plot(pilot_info.time,
            pilot_info.iloc[:,i], zorder=1)
    plt.scatter(pilot_info.loc[pilot_info.event == ev,:].time,
               pilot_info.loc[pilot_info.event == ev,:].iloc[:,i], c='red', zorder=2, s=1)
    plt.title(pilot_info.columns[i])
    
plt.tight_layout()
plt.show()

# Feature Engineering

I am going to feature engineer three of the montages from the file example that I saw.
https://www.kaggle.com/code/stuartbman/introduction-to-physiological-data

## Montage 1

In [None]:
train['f7_f8'] = train['eeg_f7'] - train['eeg_f8']
train['f3_f4'] = train['eeg_f3'] - train['eeg_f4']
train['t3_t4'] = train['eeg_t3'] - train['eeg_t4']
train['c3_c4'] = train['eeg_c3'] - train['eeg_c4']
train['t5_t6'] = train['eeg_t5'] - train['eeg_t6']
train['o1_o2'] = train['eeg_o1'] - train['eeg_o2']

## Montage 2

In [None]:
train['fp1_f7'] = train['eeg_fp1'] - train['eeg_f7']
train['f7_t3'] = train['eeg_f7'] - train['eeg_t3']
train['t3_t5'] = train['eeg_t3'] - train['eeg_t5']
train['t5_o1'] = train['eeg_t5'] - train['eeg_o1']

train['fp1_f3'] = train['eeg_fp1'] - train['eeg_f3']
train['f3_c3'] = train['eeg_f3'] - train['eeg_c3']
train['c3_p3'] = train['eeg_c3'] - train['eeg_p3']
train['p3_o1'] = train['eeg_p3'] - train['eeg_o1']

train['fz_cz'] = train['eeg_fz'] - train['eeg_cz']
train['cz_pz'] = train['eeg_cz'] - train['eeg_pz']


train['fp2_f8'] = train['eeg_fp2'] - train['eeg_f8']
train['f8_t4'] = train['eeg_f8'] - train['eeg_t4']
train['t4_t6'] = train['eeg_t4'] - train['eeg_t6']
train['t6_o2'] = train['eeg_t6'] - train['eeg_o2']

train['fp2_f4'] = train['eeg_fp2'] - train['eeg_f4']
train['f4_c4'] = train['eeg_f4'] - train['eeg_c4']
train['c4_p4'] = train['eeg_c4'] - train['eeg_p4']
train['p4_o2'] = train['eeg_p4'] - train['eeg_o2']

## Montage 3

In [None]:
train['fp1_fp2'] = train['eeg_fp1'] - train['eeg_fp2']
train['f7_f3'] = train['eeg_f7'] - train['eeg_f3']
train['f3_fz'] = train['eeg_f3'] - train['eeg_fz']
train['fz_f4'] = train['eeg_fz'] - train['eeg_f4']
train['f4_f8'] = train['eeg_f4'] - train['eeg_f8']

train['t3_c3'] = train['eeg_t3'] - train['eeg_c3']
train['c3_cz'] = train['eeg_c3'] - train['eeg_cz']
train['cz_c4'] = train['eeg_cz'] - train['eeg_c4']
train['c4_t4'] = train['eeg_c4'] - train['eeg_t4']

train['t5_p3'] = train['eeg_t5'] - train['eeg_p3']
train['p3_pz'] = train['eeg_p3'] - train['eeg_pz']
train['pz_p4'] = train['eeg_pz'] - train['eeg_p4']
train['p4_t6'] = train['eeg_p4'] - train['eeg_t6']

train['o1_o2'] = train['eeg_o1'] - train['eeg_o2']

In [None]:
Original_Features = train.columns[4:27].to_list()
Other_Features = train.columns[24:27].to_list()
Montage2 = ['fp1_f7', 'f7_t3', 't3_t5', 't5_o1', 'fp1_f3', 'f3_c3', 'c3_p3', 'p3_o1', 'fz_cz', 'cz_pz', 'fp2_f8', 'f8_t4', 't4_t6', 't6_o2', 'fp2_f4', 'f4_c4', 'c4_p4', 'p4_o2', "ecg", "r", "gsr"]
Montage1 = ['f7_f8','f3_f4','t3_t4','c3_c4','t5_t6','o1_o2',"ecg", "r", "gsr"]
Montage3 = ['fp1_fp2','t3_c3','f3_fz','fz_f4','f4_f8','t3_c3','c3_cz','cz_c4','c4_t4','t5_p3','p3_pz','pz_p4','p4_t6','o1_o2', "ecg", "r", "gsr"]