In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_selection import mutual_info_classif
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# The approach for solving this type of problem constitutes of below steps:
### 1. Explore the dataset *Check shape* of dataset
### 2. Look for the object/cat datatype among the columns
### 3. Fill in all of the Null Values
### 4. Select the best features for model-building
### 5. Normalize the features
### 6. Split the datasets
### 7. Build the Neural-Net(use 'softmax' activation for last layer when doing multi-class classification)
### 8. Compile the model
### 9. Fit the model
### 10. Evaluate!

# Importing and Exploring dataset

In [None]:
df=pd.read_csv('../input/eeg-brainwave-dataset-feeling-emotions/emotions.csv')

In [None]:
df.shape

In [None]:
df.columns

In [None]:
# These checks help in identifying imbalanced datasets
df['label'].value_counts()

### Dataset is balanced!

### Checking the dataset for *missing* values

In [None]:
for col in df.columns:
    if(df[col].isnull().sum()>0):
        print(col)

### No missing value in dataset

### Checking the total number of Categorical features in dataset

In [None]:
for col in df.columns:
    if(df[col].dtype=='object'):
        print(col)

## All of the feature-columns are numeric

# Identifying the top-features related to our dataset

In [None]:
#Separating features and columns
df_fea=df.drop('label',axis=1)
y=df['label']

In [None]:
#mutual_info_classif helps in defining the % of dependence between features and target variables
mi_score=mutual_info_classif(df_fea,y)

In [None]:
#Converting the labels to [0,1] format
y=pd.get_dummies(df['label'])

In [None]:
#Converting the scores to pandas-series and choosing the columns as the index of respected score
mi_score=pd.Series(mi_score,index=df_fea.columns)
mi_score=(mi_score*100).sort_values(ascending=False)

## Selecting features with more than 10% MI-score

In [None]:
#Using the index values(column names of original dataset) to drop the columns which can interfere with the overall model
top_fea=mi_score[:-367].index # last 367 columns were dropped because they were only a liability for our computation power

# Scaling the dataset

In [None]:
df_sc=StandardScaler().fit_transform(df_fea[top_fea])

# Splitting the dataset into
### * X_train, Y_train
### * X_test, Y_test
### * X_val, Y_val

In [None]:
Xtr,xte,Ytr,yte=train_test_split(df_sc,y,random_state=108,test_size=0.27)
xtr,xval,ytr,yval=train_test_split(Xtr,Ytr,random_state=108,test_size=0.27)

# Building/Compiling/Fitting the Neural-Net to identify the emotion

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import callbacks,layers

In [None]:
# Building the model
model=keras.Sequential([
    layers.Dense(units=2181,input_shape=(2181,),activation='relu'), #using the relu activation because it is great for hidden layers
    layers.BatchNormalization(), #BatchNormalization layer scales the dataset even further
    layers.Dropout(0.27), #Dropping-out the nodes to make our model more general
    layers.Dense(units=3181,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),  
    layers.Dense(units=4181,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.32),  
    layers.Dense(units=2581,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.27),  
    layers.Dense(units=2381,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.32),  
    layers.Dense(units=2181,activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.27),  
    layers.Dense(units=3,activation='softmax') #Softmax activation helps in multiclass-identification
])

# Compiling the model
adam=keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07)
adamax=keras.optimizers.Adamax(learning_rate=0.00085, beta_1=0.9, beta_2=0.999, epsilon=1e-07) #These are just general code. you can find them easily in tensorflow API guide
#Categorical_crossentropy will make sure if all the categories are getting identified
#Accuracy will help in identifying if correct labels are getting picked-up
model.compile(optimizer=adamax,loss='categorical_crossentropy',metrics=['accuracy'])

# Fitting the model
call=callbacks.EarlyStopping(patience=10,min_delta=0.0001,restore_best_weights=True)
# Defining earlystopping callback to save time.
history=model.fit(xtr,ytr,validation_data=(xval,yval),batch_size=28,epochs=50,callbacks=[call])

#### Time to *Evaluate*!

In [None]:
model.evaluate(xte,yte)

# More than **98%** accuracy!