# EEG - Data from hands movements

In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings(action='ignore')
%matplotlib inline

In [None]:
from keras.utils.np_utils import to_categorical
import tensorflow as tf
import gc
gc.enable()


## Read dataset


In [None]:
dataset = pd.read_csv('../input/eeg-data-from-hands-movement/Dataset/user_a.csv', delimiter=',')

In [None]:
dataset_users = [pd.read_csv('../input/eeg-data-from-hands-movement/Dataset/user_'+user+'.csv', delimiter=',') for user in ['a','b','c','d']]

In [None]:
dataset.head()

In [None]:
dataset.info()

In [None]:
dataset.describe()


## Data Exploration


In [None]:

target = 'Class'


In [None]:
col = dataset.columns       # .columns gives columns names in data
features = col[1:]
print(features)

In [None]:
dataset[target].value_counts()

In [None]:
sns.countplot(x=target, data=dataset, palette="bone")
plt.show()

In [None]:
#plotScatterMatrix(dataset, 20, 10)


## Data Analysis


In [None]:

list_cor = pd.DataFrame(dataset[features].corr().unstack().abs().sort_values().drop_duplicates())
list_cor.columns = ['correlation_index']
list_corr_high = list(list_cor[-33:-1]['correlation_index'].index)
list_corr_high


#### Missing data


In [None]:

total = dataset[features].isnull().sum().sort_values(ascending = False)
percent = (dataset[features].isnull().sum()/dataset[features].isnull().count()*100).sort_values(ascending = False)
missing  = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing




## Data Preprocessing


In [None]:
def preprocess_inputs(df, features, target):        
    
    list_cor = pd.DataFrame(df[features].corr().unstack().abs().sort_values().drop_duplicates())
    list_cor.columns = ['correlation_index']
    list_corr_high = list(list_cor[-33:-1]['correlation_index'].index)
    list_corr_high
    
    for eletrods in list_corr_high:
        df['__'.join(list(eletrods))] = df.apply(lambda row: abs(row[eletrods[0]] - row[eletrods[1]]), axis=1)
    
    col = df.columns       # .columns gives columns names in data
    features = col[1:]
    
    y = df.drop(features, axis=1)
    y = to_categorical(y)
    X = df[features]
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=123)
    
    # Scale X with a standard scaler
    transformer = StandardScaler() 

    X_train_transformer = transformer.fit_transform(X_train)
    X_test_transformer = transformer.transform(X_test)

    return X_train_transformer, X_test_transformer, y_train, y_test


## Models


### RNN

In [None]:
def build_model(X):  
    k2 = int(X.shape[1]**(1/2))
    inputs = tf.keras.Input(shape=(X.shape[1],))
    expand_dims = tf.reshape(inputs, (-1,k2, k2), name=None)
    lstm = tf.keras.layers.LSTM(32, return_sequences=True)(expand_dims)
    drop = tf.keras.layers.Dropout(.4)(lstm)
    lstm = tf.keras.layers.LSTM(16, return_sequences=True)(expand_dims)
    drop = tf.keras.layers.Dropout(.4)(lstm)
    flatten = tf.keras.layers.Flatten()(lstm)
    outputs = tf.keras.layers.Dense(3, activation='softmax')(flatten)    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

In [None]:
def train_model(dataset, features, target, build_model=build_model):
    X_train, X_test, y_train, y_test = preprocess_inputs(dataset, features, target)

    class_model = build_model(X_train)

    history = class_model.fit(
        X_train,
        y_train,
        validation_split=0.3,
        batch_size=32,
        epochs=30,
        verbose=0,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(
                monitor='val_loss',
                patience=3,
                restore_best_weights=True
            )
        ]
    )

    print("Accuracy: %.2f%% (+/- %.2f%%)" % (np.mean(history.history['accuracy'])*100, np.std(history.history['accuracy'])*100)) 

    class_acc = class_model.evaluate(X_test, y_test, verbose=0)[1]
    print("Test Accuracy (Class Model): {:.2f}%".format(class_acc * 100))
    
    y_pred = np.array(list(map(lambda x: np.argmax(x), class_model.predict(X_test))))
    clr = classification_report(y_test.argmax(axis=-1) , y_pred)
#     print("Classification Report:\n----------------------\n", clr)
    
    return history

In [None]:
def plot_accuracy_history(history):
    # summarize history for accuracy
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

In [None]:
def plot_loss_history(history):
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

In [None]:
for dfs in dataset_users:
    history = train_model(dfs, features, target)
    plot_accuracy_history(history)
    plot_loss_history(history)