**<center><font size=5>EEG Data Analysis</font></center>**

**<center>The Aura Project</center>**


#### Table of Contents

- <a href='#import'>1. Data Import</a>
- <a href='#pp'>2. Pre-Processing</a> 
- <a href='#fe'>3. Feature Extration</a> 
 - <a href='#s1'>3.1. Data Sample for "S1 obj" Stimulus</a>
 - <a href='#s2m'>3.2. Data Sample for "S2 match" Stimulus</a>
 - <a href='#s2nm'>3.3. Data Sample for "S2 nomatch" Stimulus</a>
- <a href='#d'>4. Decoding</a> 
 - <a href='#corr_sample'>4.1. Correlations between the Regions</a>
 - <a href='#corr_viz'>4.2. Vizualization of Correlations</a>
- <a href='#further'>5. Export our model for tflite</a> 
- <a href='#further'>6. Further steps</a>

# <a id='import'>1. Data Import</a>

In [None]:
import numpy as np
import pandas as pd 
import os
import random
from tqdm import tqdm
import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input
from tensorflow.data import Dataset


init_notebook_mode(connected=True) ## plotly init
seed = 123
random.seed = seed

# <a id='pp'>2. Pre-Processing</a>

In [None]:
print('Total amount of files in SMNI_CMI_TRAIN directory: ' + str(len(os.listdir('../input/SMNI_CMI_TRAIN/'))))

In [None]:
filenames_list = os.listdir('../input/SMNI_CMI_TRAIN/') ## list of file names in the directory
EEG_data = pd.DataFrame({}) ## create an empty df that will hold data from each file

for file_name in tqdm(filenames_list):
    temp_df = pd.read_csv('../input/SMNI_CMI_TRAIN/' + file_name) ## read from the file to df
    EEG_data = EEG_data.append(temp_df) ## add the file data to the main df
    
EEG_data = EEG_data.drop(['Unnamed: 0'], axis=1) ## remove the unused column
EEG_data.loc[EEG_data['matching condition'] == 'S2 nomatch,', 'matching condition'] =  'S2 nomatch' ## remove comma sign from stimulus name

In [None]:
## here is how the data looks like
EEG_data.head()

# <a id='fe'>3. Feature Extration</a>

## <a id='s1'>3.1. Data Sample for "S1 obj" Stimulus</a>

In [None]:
stimulus = 'S1 obj'
S1_sample_df = sample_data(stimulus=stimulus, random_id=1)

In [None]:
plot_3dSurface_and_heatmap(stimulus=stimulus, group='a', df=S1_sample_df)

In [None]:
plot_3dSurface_and_heatmap(stimulus=stimulus, group='c', df=S1_sample_df)

## <a id='s2m'>3.2. Data Sample for "S2 match" Stimulus</a>

In [None]:
stimulus = 'S2 match'
S2_m_sample_df = sample_data(stimulus=stimulus, random_id=1)

In [None]:
plot_3dSurface_and_heatmap(stimulus=stimulus, group='a', df=S2_m_sample_df)

In [None]:
plot_3dSurface_and_heatmap(stimulus=stimulus, group='c', df=S2_m_sample_df)

Some big spike happend here for control subject. High response values (which respond to front right region of the head) could be caused by eye movement or blink.

## <a id='s2nm'>3.3. Data Sample for "S2 nomatch" Stimulus</a>

In [None]:
stimulus = 'S2 nomatch'
S2_nm_sample_df = sample_data(stimulus=stimulus, random_id=1)

In [None]:
plot_3dSurface_and_heatmap(stimulus=stimulus, group='a', df=S2_nm_sample_df)

In [None]:
plot_3dSurface_and_heatmap(stimulus=stimulus, group='c', df=S2_nm_sample_df)



| **Freaked** | **Relaxed** |
|--|--|
| <img src="https://i.ibb.co/kBmwypK/s2-nm-a.gif" alt="s2-nm-a" border="0"> | <img src="https://i.ibb.co/R9SQG0j/s2-nm-c.gif" alt="s2-nm-c" border="0"> |

After the quick glimpse it looks like on average response values are higher for amplitede group and feed as parameter amongst features for controling aura devices. 

# <a href='d'>4. Decoding</a> 

## <a id='corr_sample'>4.1 Correlations between the Regions</a>

Next step will be to investigate the correlations between firing rates of each sensor position. If two brain regions have high correlation value that means that they trend to wire together.

In [None]:
## create the list of possible channel pairs
sample_corr_df = pd.pivot_table(S2_nm_sample_df[S2_nm_sample_df['subject identifier'] == 'a'], values='sensor value', index='sample num', columns='sensor position').corr()

list_of_pairs = []
j = 0
for column in sample_corr_df.columns:
    j += 1
    for i in range(j, len(sample_corr_df)):
        if column != sample_corr_df.index[i]:
            temp_pair = [column + '-' + sample_corr_df.index[i]]
            list_of_pairs.append(temp_pair)

In [None]:
def get_correlated_pairs_sample(threshold, correlation_df, group):
    ## create dictionary wheke keys are the pairs and values are the amount of high correlation pair
    corr_pairs_dict = {}
    for i in range(len(list_of_pairs)):
        temp_corr_pair = dict(zip(list_of_pairs[i], [0]))
        corr_pairs_dict.update(temp_corr_pair)

    j = 0
    for column in correlation_df.columns:
        j += 1
        for i in range(j, len(correlation_df)):
            if ((correlation_df[column][i] >= threshold) & (column != correlation_df.index[i])):
                corr_pairs_dict[column + '-' + correlation_df.index[i]] += 1

    corr_count = pd.DataFrame(corr_pairs_dict, index=['count']).T.reset_index(drop=False).rename(columns={'index': 'channel_pair'})
    print('Channel pairs that have correlation value >= ' + str(threshold) + ' (' + group + ' group):')
    print(corr_count['channel_pair'][corr_count['count'] > 0].tolist())

## <a id='corr_viz'>4.2 Vizualization of Correlations</a>

Next step will be to investigate the correlations between firing rates of each sensor position. If two brain regions have high correlation value that means that they trend to wire together.

In [None]:
def plot_sensors_correlation(df, threshold_value):
    """Funtion plots the the correlation plots between sensor positions for each group"""
    correlations_a = pd.pivot_table(df[df['subject identifier'] == 'a'], 
                                          values='sensor value', index='sample num', columns='sensor position').corr()

    correlations_control = pd.pivot_table(df[df['subject identifier'] == 'c'], 
                                          values='sensor value', index='sample num', columns='sensor position').corr()

    fig = plt.figure(figsize=(17,10))
    ax = fig.add_subplot(121)
    ax.set_title('Abnormal', fontsize=14)
    mask = np.zeros_like(correlations_a, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.heatmap(correlations_a, mask=mask, cmap=cmap, vmin=-1, vmax=1, center=0,
                square=True, linewidths=.5, cbar_kws={"shrink": .5})

    ax = fig.add_subplot(122)
    ax.set_title('Control group', fontsize=14)
    mask = np.zeros_like(correlations_control, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.heatmap(correlations_control, mask=mask, cmap=cmap, vmin=-1, vmax=1, center=0,
                square=True, linewidths=.5, cbar_kws={"shrink": .5})

    plt.suptitle('Correlation between Sensor Positions for ' + df['matching condition'].unique()[0] + ' stimulus', fontsize=16)
    plt.show()
    
    get_correlated_pairs_sample(threshold=threshold_value, correlation_df=correlations_a, group='Abnormal')
    print('\n')
    get_correlated_pairs_sample(threshold=threshold_value, correlation_df=correlations_control, group='Control')

In [None]:
plot_sensors_correlation(df=S1_sample_df, threshold_value=.97)

In [None]:
plot_sensors_correlation(df=S2_m_sample_df, threshold_value=.97)

In [None]:
plot_sensors_correlation(df=S2_nm_sample_df, threshold_value=.97)

# <a href='export'>5. Export our model for tflite </a> 
We need to convert our model into a tflite model

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
def representative_dataset_gen():
    for _ in range(10000):
        yield [
            np.array(
                [np.random.uniform(), np.random.uniform()]
            , dtype=np.float32)
        ]
converter.representative_dataset = representative_dataset_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
tflite_quant_model = converter.convert()
open("converted_model.tflite", "wb").write(tflite_quant_model)

# <a href='further'>6. Further Steps</a> 

##To convert to C++
We can then run this command to convert the model to c code.




In [None]:
xxd -i converted_model.tflite > model_data.cc