In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import balanced_accuracy_score
from biosppy.signals import eeg
from biosppy.signals import emg
from scipy.stats import pearsonr
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb

In [3]:
eeg_1 = pd.read_csv("../data/train_eeg1.csv")
eeg_2 = pd.read_csv("../data/train_eeg2.csv")
emg_1 = pd.read_csv("../data/train_emg.csv")
y = pd.read_csv("../data/train_labels.csv").y

In [31]:
eeg_1_test = pd.read_csv("../data/test_eeg1.csv")
eeg_2_test = pd.read_csv("../data/test_eeg2.csv")
emg_1_test = pd.read_csv("../data/test_emg.csv")

In [35]:
del eeg_1, eeg_2, emg_1, eeg_1_test, eeg_2_test, emg_1_test

In [34]:
eeg_1_tl = pd.concat([eeg_1,eeg_1_test])
eeg_2_tl = pd.concat([eeg_2,eeg_2_test])
emg_1_tl = pd.concat([emg_1,emg_1_test])

In [5]:
eeg_features1 = eeg.eeg(signal=np.array(eeg_1.iloc[0,1:]).reshape(-1,1), sampling_rate=128, show=False)

In [10]:
_, _, _, theta, alpha_low , alpha_high, beta, gamma, alpha, delta, _, _ = eeg_features1

In [27]:
signals = [theta, alpha, beta, gamma, delta]

In [29]:
my_features = [calculate_basic_stats(signal) for signal in signals]

In [30]:
my_features

[array([3.91706364e-11, 4.03346199e-11, 1.74717498e-11, 1.33670501e-12,
        8.49887725e-11]),
 array([2.65352610e-11, 2.21582652e-11, 1.22684866e-11, 8.91189764e-12,
        4.81740407e-11]),
 array([2.82002115e-12, 2.72620134e-12, 1.48187505e-12, 5.47202108e-13,
        5.87946611e-12]),
 array([1.15948423e-12, 1.23931937e-12, 3.92539799e-13, 2.09161057e-13,
        2.18048069e-12]),
 array([1.59892814e-11, 1.90659742e-11, 9.94048083e-12, 8.23779279e-13,
        4.04599253e-11])]

In [26]:
RANGE_MAX = 64800
SAMPLING_RATE = 128
EMG_THRESHOLD_FREQ = 30

def calculate_basic_stats(signal):
    mean = np.mean(signal)
    median = np.median(signal)
    min = np.min(signal)
    max = np.max(signal)
    std = np.std(signal)
    return np.array([mean,median,std,min,max])

def produce_features(eeg_1,eeg_2,emg_1):
    df_features = pd.DataFrame()
    for i in range(0, RANGE_MAX):
        eeg_features1 = eeg.eeg(signal=np.array(eeg_1.iloc[i,1:]).reshape(-1,1), sampling_rate=SAMPLING_RATE, show=False)
        eeg_features2 = eeg.eeg(signal=np.array(eeg_2.iloc[i,1:]).reshape(-1,1), sampling_rate=SAMPLING_RATE, show=False)
        emg_features = emg.emg(signal=emg_1.iloc[i,1:], sampling_rate=SAMPLING_RATE, frequency = EMG_THRESHOLD_FREQ ,show=False)
        _, _, _, theta, _, _, beta, gamma, alpha, delta, _, _ = eeg_features1
        _, _, _, theta2, _, _, beta2, gamma2, alpha2, delta2, _, _ = eeg_features2
        inter_1 = alpha / theta
        inter_2 = beta  / theta
        inter_3 = alpha / delta
        inter_4 = delta / theta
        inter_5 = (delta*alpha)/(beta*gamma)
        inter_6 = np.power(theta,2)/(delta*alpha)        
        inter_1_2 = alpha2 / theta2
        inter_2_2 = beta2  / theta2
        inter_3_2 = alpha2 / delta2
        inter_4_2 = delta2 / theta2
        inter_5_2 = (delta2*alpha2)/(beta2*gamma2)
        inter_6_2 = np.power(theta2,2)/(delta2*alpha2)  
        emg_features = emg_1.iloc[i, emg_features['onsets']]
        count_emg = np.size(emg_features)
        emg_stats = calculate_basic_stats(emg_features) if count_emg == 0 else np.zeros(5)
        signals = [theta, alpha, beta, gamma, delta, inter_1, inter_2, inter_3, inter_4, inter_5, inter_6,
                   theta2, alpha2, beta2, gamma2, delta2, inter_1_2, inter_2_2, inter_3_2, inter_4_2, inter_5_2, inter_6_2,
                   emg_stats]
        #my_features = [calculate_basic_stats(signal) for signal in signals]
        my_features = [signal.reshape(1,-1)[0] for signal in signals]
        my_features = np.array([item for subarray in my_features for item in subarray])
        my_features = np.append(my_features, count_emg)
        df_features = pd.concat([df_features, pd.DataFrame([my_features])])
    return df_features

In [None]:
features_tr = produce_features(eeg_1_tl, eeg_2_tl, emg_1_tl)
#features_tr.head()
features_tr.to_csv("features_all_tr_test.csv",index=False)
features_tr.shape

In [None]:
BMAC = balanced_accuracy_score(y_true, y_pred)

In [None]:
eeg_features1 = eeg.eeg(signal=np.array(eeg_2.iloc[0,1:]).reshape(-1,1), sampling_rate=128, show=False)

In [None]:
emg_features = emg.emg(signal=emg_1.iloc[1,1:], sampling_rate=128, frequency = 30 ,show=True)

In [None]:
emg_features['onsets'].shape

In [None]:
emg_features['onsets']

In [None]:
np.diff(emg_1.iloc[1,emg_features['onsets']])

In [None]:
features_df = pd.read_csv("features_all.csv")
y = pd.read_csv("../data/train_labels.csv").y

In [None]:
features_df.shape

In [None]:
#Removing NAs
#features_df = pd.DataFrame(np.nan_to_num(features_df))
X_sub_1_2 = features_df.iloc[:43200,:]
y_sub_1_2 = y[:43200]
X_sub_3 = features_df.iloc[43200:,:]
y_sub_3 = y[43200:]

In [None]:
cl = RandomForestClassifier(n_estimators=200)
cl.fit(X_sub_1_2,y_sub_1_2)

In [None]:
model = xgb.XGBClassifier(n_estimators=200, learning_rate = 0.1, max_depth = 3)
model.fit(X_sub_1_2,y_sub_1_2)

In [None]:
y_train_pred = model.predict(X_sub_3)
balanced_accuracy_score(y_sub_3, y_train_pred)