<a href="https://colab.research.google.com/github/lingelizabeth/senior-research/blob/master/Final_Notebook_RAVDESS_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**RAVDESS Training and Evaluation**
This notebook trains a neural network on analyzed RAVDESS (Ryerson Audio-Visual Database of Emotional Speech and Song) speech data to predict 8 emotion classes. It utilizes a tiered model structure that first trains a Random Forest to determine the possitive/negative affect of a sample, then uses that predicted label as input into a neural net. The final neural net accuracy is 81.25% across all classes.

##**Load and clean data**
The inputs are CSV files generated by openSMILE, an audio feature extraction package, and they include statistical measures of audio characteristics for each sample.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import math
import os
import io
import pandas as pd
import random
from sklearn import metrics
from google.colab import files

from scipy.interpolate import *
from scipy.signal import *
from scipy.io import *

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn import tree
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from itertools import cycle

from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc, confusion_matrix, accuracy_score
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [None]:
uploaded = files.upload() # upload CSV output of openSMILE run

Saving emobaseResults.csv to emobaseResults.csv


In [None]:
# read in CSV to pandas dataframe
df = pd.read_csv(io.StringIO(uploaded['emobaseResults.csv'].decode('utf-8')))

# use only every other line 
dropped = [i for i in range(1, 2804, 2)]
df = df.drop(dropped)
df = df.reindex(labels = range(1403), axis = 0, method = 'backfill')

# drop unneccessary columns
new_df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
new_df = new_df.drop([' name ', ' frameTime '], axis=1)

In [None]:
# assigns each row as "positive" or "negative" emotion, based on its actual classification
positive = [] # 1 for positive, 0 for negative
for index, row in df.iterrows():
  if(int(row[' emotion ']) in [1, 2, 3, 8]): 
    positive.append(1)
  else:
    positive.append(0)

# append this column to the dataframe for later validation
new_df['positive'] = pd.Series(positive)

In [None]:
# scale the data to have a mean of 0 and unit variance (standard deviation of 1)
scaler = StandardScaler()
scaled_new_df = scaler.fit_transform(new_df)

# fix indexing and drop labels for scaled dataframe
scaled_new_df = pd.DataFrame(scaled_new_df, index = range(0, scaled_new_df.shape[0]), columns = new_df.columns.values)
scaled_new_df = scaled_new_df.drop([' emotion ', 'positive'], axis=1) # doesn't include labels, so we use this as training
scaled_new_df

Unnamed: 0,pcm_intensity_sma_max,pcm_intensity_sma_min,pcm_intensity_sma_range,pcm_intensity_sma_maxPos,pcm_intensity_sma_minPos,pcm_intensity_sma_amean,pcm_intensity_sma_linregc1,pcm_intensity_sma_linregc2,pcm_intensity_sma_linregerrA,pcm_intensity_sma_linregerrQ,pcm_intensity_sma_stddev,pcm_intensity_sma_skewness,pcm_intensity_sma_kurtosis,pcm_intensity_sma_quartile1,pcm_intensity_sma_quartile2,pcm_intensity_sma_quartile3,pcm_intensity_sma_iqr1-2,pcm_intensity_sma_iqr2-3,pcm_intensity_sma_iqr1-3,pcm_loudness_sma_max,pcm_loudness_sma_min,pcm_loudness_sma_range,pcm_loudness_sma_maxPos,pcm_loudness_sma_minPos,pcm_loudness_sma_amean,pcm_loudness_sma_linregc1,pcm_loudness_sma_linregc2,pcm_loudness_sma_linregerrA,pcm_loudness_sma_linregerrQ,pcm_loudness_sma_stddev,pcm_loudness_sma_skewness,pcm_loudness_sma_kurtosis,pcm_loudness_sma_quartile1,pcm_loudness_sma_quartile2,pcm_loudness_sma_quartile3,pcm_loudness_sma_iqr1-2,pcm_loudness_sma_iqr2-3,pcm_loudness_sma_iqr1-3,mfcc_sma[1]_max,mfcc_sma[1]_min,...,voiceProb_sma_de_iqr2-3,voiceProb_sma_de_iqr1-3,F0_sma_de_max,F0_sma_de_min,F0_sma_de_range,F0_sma_de_maxPos,F0_sma_de_minPos,F0_sma_de_amean,F0_sma_de_linregc1,F0_sma_de_linregc2,F0_sma_de_linregerrA,F0_sma_de_linregerrQ,F0_sma_de_stddev,F0_sma_de_skewness,F0_sma_de_kurtosis,F0_sma_de_quartile1,F0_sma_de_quartile2,F0_sma_de_quartile3,F0_sma_de_iqr1-2,F0_sma_de_iqr2-3,F0_sma_de_iqr1-3,F0env_sma_de_max,F0env_sma_de_min,F0env_sma_de_range,F0env_sma_de_maxPos,F0env_sma_de_minPos,F0env_sma_de_amean,F0env_sma_de_linregc1,F0env_sma_de_linregc2,F0env_sma_de_linregerrA,F0env_sma_de_linregerrQ,F0env_sma_de_stddev,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3
0,-0.358004,-0.063948,-0.358004,-0.425966,-0.357600,-0.372978,-0.110233,-0.360054,-0.370920,-0.207733,-0.370229,1.541522,1.614191,-0.151970,-0.245872,-0.355657,-0.245038,-0.354900,-0.355645,-0.688046,-0.22482,-0.687917,-0.443099,-0.355088,-0.839638,-0.526969,-0.788326,-0.743815,-0.549534,-0.736192,0.643848,0.283861,-0.772470,-0.827298,-0.828167,-0.767562,-0.721929,-0.806741,0.818641,-1.720579,...,0.180522,0.189353,0.256959,1.270975,-0.581420,-0.455488,-0.427079,-0.419629,0.217784,-0.518448,-1.090196,-1.065740,-1.126694,1.592693,1.217624,0.765308,0.004354,-0.749861,-0.765465,-0.749870,-0.762834,0.820371,-0.240760,0.774369,1.051967,-0.355557,-0.831266,0.794192,-1.080115,-1.149000,0.077856,0.136447,1.663769,1.811967,0.736455,-0.134387,-0.885552,-0.733749,-0.888054,-0.855739
1,-0.361132,-0.063948,-0.361132,-1.073305,-0.357600,-0.371206,-0.114189,-0.356958,-0.368987,-0.207731,-0.369604,-0.353029,-0.548605,-0.151970,-0.245865,-0.355281,-0.245031,-0.354520,-0.355269,-0.635093,-0.22482,-0.634938,-1.084394,-0.355088,-0.791856,-0.515383,-0.738610,-0.700249,-0.527630,-0.663270,1.148368,0.694586,-0.772470,-0.824063,-0.839349,-0.763744,-0.736828,-0.818357,0.338367,-0.535794,...,-0.325261,-0.379922,-1.285977,2.629427,-2.154156,-0.446307,-0.390426,-0.419629,0.200300,-0.492623,-1.026204,-1.220737,-1.378064,2.441468,0.161574,0.765308,0.004354,-0.749861,-0.765465,-0.749870,-0.762834,-0.167529,0.528702,-0.324628,1.091382,-0.580352,-1.591666,0.990488,-1.452352,-1.002366,-0.611619,-0.535874,0.667934,0.641477,0.736455,-0.134387,-0.885552,-0.733749,-0.888054,-0.855739
2,-0.361132,-0.063948,-0.361132,-1.073305,-0.357600,-0.371206,-0.114189,-0.356958,-0.368987,-0.207731,-0.369604,-0.353029,-0.548605,-0.151970,-0.245865,-0.355281,-0.245031,-0.354520,-0.355269,-0.635093,-0.22482,-0.634938,-1.084394,-0.355088,-0.791856,-0.515383,-0.738610,-0.700249,-0.527630,-0.663270,1.148368,0.694586,-0.772470,-0.824063,-0.839349,-0.763744,-0.736828,-0.818357,0.338367,-0.535794,...,-0.325261,-0.379922,-1.285977,2.629427,-2.154156,-0.446307,-0.390426,-0.419629,0.200300,-0.492623,-1.026204,-1.220737,-1.378064,2.441468,0.161574,0.765308,0.004354,-0.749861,-0.765465,-0.749870,-0.762834,-0.167529,0.528702,-0.324628,1.091382,-0.580352,-1.591666,0.990488,-1.452352,-1.002366,-0.611619,-0.535874,0.667934,0.641477,0.736455,-0.134387,-0.885552,-0.733749,-0.888054,-0.855739
3,-0.353465,-0.063948,-0.353465,-0.955607,-0.288195,-0.370076,-0.112802,-0.356216,-0.368376,-0.207722,-0.367325,1.868230,2.095782,-0.151970,-0.245872,-0.349271,-0.245038,-0.348454,-0.349258,-0.529608,-0.22482,-0.529399,-0.967795,-0.286955,-0.733519,-0.499832,-0.678472,-0.616441,-0.517197,-0.631056,0.207418,0.074863,-0.772470,-0.834378,-0.653266,-0.775920,-0.504458,-0.625064,0.750789,-0.107583,...,-0.298794,-0.433784,-3.439340,3.418438,-3.725629,-0.207602,-0.216322,-0.419629,0.350098,-0.667709,-1.474935,-1.533535,-2.021954,-0.699495,0.268193,0.765308,0.004354,-0.749861,-0.765465,-0.749870,-0.762834,-2.316540,2.235815,-2.726869,1.051967,-0.018366,-1.771022,1.196268,-1.695392,-2.249022,-2.286712,-2.955392,0.624438,0.168367,0.736455,-0.134387,-0.885552,-0.733749,-0.888054,-0.855739
4,-0.353465,-0.063948,-0.353465,-0.955607,-0.288195,-0.370076,-0.112802,-0.356216,-0.368376,-0.207722,-0.367325,1.868230,2.095782,-0.151970,-0.245872,-0.349271,-0.245038,-0.348454,-0.349258,-0.529608,-0.22482,-0.529399,-0.967795,-0.286955,-0.733519,-0.499832,-0.678472,-0.616441,-0.517197,-0.631056,0.207418,0.074863,-0.772470,-0.834378,-0.653266,-0.775920,-0.504458,-0.625064,0.750789,-0.107583,...,-0.298794,-0.433784,-3.439340,3.418438,-3.725629,-0.207602,-0.216322,-0.419629,0.350098,-0.667709,-1.474935,-1.533535,-2.021954,-0.699495,0.268193,0.765308,0.004354,-0.749861,-0.765465,-0.749870,-0.762834,-2.316540,2.235815,-2.726869,1.051967,-0.018366,-1.771022,1.196268,-1.695392,-2.249022,-2.286712,-2.955392,0.624438,0.168367,0.736455,-0.134387,-0.885552,-0.733749,-0.888054,-0.855739
5,-0.362988,-0.063948,-0.362988,-0.975223,-0.315957,-0.373408,-0.111544,-0.360225,-0.371965,-0.207742,-0.373179,-0.350657,-0.386572,-0.151970,-0.245886,-0.348675,-0.245052,-0.347852,-0.348662,-0.776739,-0.22482,-0.776656,-1.026094,-0.314208,-0.784837,-0.479427,-0.738945,-0.691461,-0.542620,-0.712321,-0.272370,-0.485030,-0.772470,-0.834378,-0.697338,-0.775920,-0.558618,-0.670843,0.759848,0.434563,...,-0.622068,-0.451462,-2.308983,2.447618,-2.587175,-0.464669,0.544237,-0.419629,0.179490,-0.511459,-1.164215,-1.299180,-1.518313,0.777428,0.064389,0.765308,0.004354,-0.749861,-0.765465,-0.749870,-0.762834,-0.895039,0.777590,-1.023629,1.032259,-0.748947,-0.825216,1.278318,-1.529423,-1.590483,-1.424927,-1.493605,1.079000,1.008916,0.736455,-0.134387,-0.885552,-0.733749,-0.888054,-0.855739
6,-0.362988,-0.063948,-0.362988,-0.975223,-0.315957,-0.373408,-0.111544,-0.360225,-0.371965,-0.207742,-0.373179,-0.350657,-0.386572,-0.151970,-0.245886,-0.348675,-0.245052,-0.347852,-0.348662,-0.776739,-0.22482,-0.776656,-1.026094,-0.314208,-0.784837,-0.479427,-0.738945,-0.691461,-0.542620,-0.712321,-0.272370,-0.485030,-0.772470,-0.834378,-0.697338,-0.775920,-0.558618,-0.670843,0.759848,0.434563,...,-0.622068,-0.451462,-2.308983,2.447618,-2.587175,-0.464669,0.544237,-0.419629,0.179490,-0.511459,-1.164215,-1.299180,-1.518313,0.777428,0.064389,0.765308,0.004354,-0.749861,-0.765465,-0.749870,-0.762834,-0.895039,0.777590,-1.023629,1.032259,-0.748947,-0.825216,1.278318,-1.529423,-1.590483,-1.424927,-1.493605,1.079000,1.008916,0.736455,-0.134387,-0.885552,-0.733749,-0.888054,-0.855739
7,-0.366114,-0.063948,-0.366114,0.044825,-0.302076,-0.376902,-0.104166,-0.366045,-0.375122,-0.207747,-0.376191,-0.618224,-0.653243,-0.151970,-0.245851,-0.357483,-0.245017,-0.356743,-0.357471,-0.822302,-0.22482,-0.822241,0.023298,-0.300581,-0.877801,-0.327355,-0.875946,-0.837194,-0.571439,-0.818446,0.400580,0.015838,-0.772470,-0.787582,-0.939010,-0.720681,-0.872348,-0.921879,0.546205,0.668386,...,0.151907,-0.280243,-0.406671,2.037249,-1.360377,-0.418764,-0.069708,-0.419629,0.102720,-0.333421,-0.945551,-0.989294,-1.012321,2.307869,0.520749,0.765308,0.004354,-0.749861,-0.765465,-0.749870,-0.762834,0.451113,0.152947,0.326748,1.150506,0.712216,-1.498774,0.512383,-0.930507,-1.154224,-0.313801,-0.224493,1.520185,1.546403,0.736455,-0.134387,-0.885552,-0.733749,-0.888054,-0.855739
8,-0.366114,-0.063948,-0.366114,0.044825,-0.302076,-0.376902,-0.104166,-0.366045,-0.375122,-0.207747,-0.376191,-0.618224,-0.653243,-0.151970,-0.245851,-0.357483,-0.245017,-0.356743,-0.357471,-0.822302,-0.22482,-0.822241,0.023298,-0.300581,-0.877801,-0.327355,-0.875946,-0.837194,-0.571439,-0.818446,0.400580,0.015838,-0.772470,-0.787582,-0.939010,-0.720681,-0.872348,-0.921879,0.546205,0.668386,...,0.151907,-0.280243,-0.406671,2.037249,-1.360377,-0.418764,-0.069708,-0.419629,0.102720,-0.333421,-0.945551,-0.989294,-1.012321,2.307869,0.520749,0.765308,0.004354,-0.749861,-0.765465,-0.749870,-0.762834,0.451113,0.152947,0.326748,1.150506,0.712216,-1.498774,0.512383,-0.930507,-1.154224,-0.313801,-0.224493,1.520185,1.546403,0.736455,-0.134387,-0.885552,-0.733749,-0.888054,-0.855739
9,-0.364902,-0.063948,-0.364902,-1.073305,-0.357600,-0.376528,-0.107858,-0.364452,-0.374960,-0.207746,-0.375740,0.107446,0.005695,-0.151970,-0.245443,-0.355376,-0.244607,-0.354623,-0.355364,-0.761933,-0.22482,-0.761842,-1.084394,-0.355088,-0.827125,-0.326744,-0.821455,-0.804437,-0.569008,-0.808697,-0.398174,-0.320469,-0.772470,-0.698129,-0.818791,-0.615087,-0.756595,-0.797002,0.837342,1.483082,...,-0.100884,-0.361128,-1.991096,1.476945,-1.874040,0.132093,1.039058,-0.419629,-0.119403,-0.054719,-0.928249,-1.056052,-1.111167,-0.289066,0.132250,0.765308,0.004354,-0.749861,-0.765465,-0.749870,-0.762834,-0.877583,1.339674,-1.204082,1.111090,0.726266,-0.187370,1.172151,-1.216683,-1.366187,-1.545021,-1.656133,0.750519,0.748392,0.736455,-0.134387,-0.835473,-0.733749,-0.837634,-0.824847


## **Binary classification of positive and negative emotions**

My model architecture finding was a significant increase in emotion classification accuracy when first considering a binary classification: **is the sample "positive" or "negative"?** Emotions like happiness, surprise, neutral and calm were considered positive, while anger, fear, sadness, and disgust were considered negative. 

I use a Random Forest for binary classification for this dataset. The final accuracy is 92.88%


In [None]:
# Gridsearch to determine best parameters
parameters = {'min_samples_split':[6, 7, 9], 'min_samples_leaf':[3, 4], 'max_features': [18, 20, 50]}
rf = RandomForestClassifier(n_estimators = 99, criterion = 'gini', max_depth = 7)
clf = GridSearchCV(rf, parameters, cv=5)
clf.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=7, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=99, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'min_samples_split': [6, 7, 9], 'min_samples_leaf': [3, 4], 'max_features': [18, 20, 50]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [None]:
#prints best parameters from Grid Search
clf.best_params_

{'max_features': 50, 'min_samples_leaf': 3, 'min_samples_split': 6}

In [None]:
# split into training and validation data
X_train, X_val, y_train, y_val = train_test_split(scaled_new_df, new_df['positive'], test_size = .25, random_state = None)

# create random forest classifier
rf = RandomForestClassifier(n_estimators=99, criterion='gini', max_depth=7, min_samples_split = 6, min_samples_leaf=3, max_features=50, random_state=None)
rf.fit(X_train, y_train)
y_predict = rf.predict(X_val)

# evaluate model, accuracy of 92.8%
print(confusion_matrix(y_val, y_predict))
print(accuracy_score(y_val, y_predict))

[[171  12]
 [ 13 155]]
0.9287749287749287



### Confusion Matrix for Positive/Negative Classification


![Random Forest Conudsion Matrix](https://drive.google.com/uc?export=view&id=1HMGmkUdFmXY_KYRfD0EXe0TvzJQglu5m)

## **Classifying Emotions with a Neural Net**
Now, we will predict 8 emotion classes (happy, sad, neutral, calm, fear, anger, disgust, and surprise) using a neural net, with the audio features *and* the previous positive/negative prediction as input.

In [None]:
# add the random forest predictions as an input column 
predicted_pos = rf.predict(scaled_new_df)
scaled_new_df['positive'] = pd.Series(predicted_pos)

# split training and testing data
train_data, val_data, train_labels, val_labels = 
  train_test_split(
      scaled_new_df.loc[X_train.index],  # this training data excludes test data from the previous split
      new_df.loc[y_train.index][' emotion '].astype(int)-1, 
      test_size = .2, 
      random_state = None
  )
print(train_data.shape)

# reshape the data for training 
train_data = np.array(train_data).reshape((train_data.shape[0], train_data.shape[1], 1))
train_data.shape

In [None]:
# build model!
model = []
model = tf.keras.Sequential()
model.add(Dense(16, input_shape = train_data[0].shape))
model.add(Dense(16, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(tf.keras.layers.Dropout(.25))
model.add(Dense(4, activation='relu'))
model.add(tf.keras.layers.Dropout(.25))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(8, activation = tf.nn.softmax)) 
  
sgd = tf.keras.optimizers.SGD(lr=0.01, momentum=0.0, decay=0.001, nesterov=False)
model.compile(optimizer = sgd, loss = 'categorical_crossentropy', metrics =["accuracy"])
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 989, 16)           32        
_________________________________________________________________
dense_1 (Dense)              (None, 989, 16)           272       
_________________________________________________________________
dropout (Dropout)            (None, 989, 16)           0         
_________________________________________________________________
dense_2 (Dense)              (None, 989, 4)            68        
_________________________________________________________________
dropout_1 (Dropout)          (None, 989, 4)            0         
_________________________________________________________________
flatten (Flatten)    

In [None]:
y = model.fit(train_data, to_categorical(train_labels), verbose = True, epochs = 100, validation_split = .2) 

Train on 672 samples, validate on 169 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch

In [None]:
# reshape validation data for model
val_data = np.array(val_data).reshape((val_data.shape[0], val_data.shape[1], 1))

# predict on validation data and print evaluations
y_pred_proba = model.predict(val_data)
print(confusion_matrix(val_labels, np.argmax(y_pred_proba, axis=1)))
print(accuracy_score(val_labels, np.argmax(y_pred_proba, axis=1)))

[[18  2  0  1  1  0  0  0]
 [ 0 52  0  1  0  0  0  0]
 [ 0  0 42  0  4  2  0  2]
 [ 0  0  0 36  0  3  0  0]
 [ 0  0  0  0 35  0  2  1]
 [ 1  0  0  1  0 39  0  1]
 [ 0  0  0  1  0  2 50  0]
 [ 0  0  0  0  0  0  0 54]]
0.9287749287749287


### K Fold Cross Validation
The output of this section has been omitted for brevity. The K-Fold cross validation accuracy was 98.29%.

In [None]:
# get data and labels for K-fold cross validation 
data = np.array(scaled_new_df.loc[X_train.index])
all_labels= new_df.loc[y_train.index][' emotion '].astype(int)-1
print(len(data))

# prepare K-fold cross validation
kfold = KFold(3, False, 1)
# enumerate splits
for train, test in kfold.split(data):
  model.fit(data[train].reshape(len(train), data.shape[1], 1), to_categorical(all_labels.iloc[train]), verbose=True, epochs = 100, validation_split=.1)
  y_pred_temp = model.predict(data[test].reshape(len(test), data.shape[1], 1))
  print(accuracy_score(all_labels.iloc[test], np.argmax(y_pred_temp, axis=1))) # print validation accuracy per split
  model.reset_states()

### Final Model Evaluation
We evaluate the neural net test accuracy on data held back in the first data split, which neither the Random Forest or neural net has trained on.  

In [None]:
X_val_predicted_pos = rf.predict(X_val) #add positive column to validation data
X_val_w_positive = X_val
X_val_w_positive['positive'] = X_val_predicted_pos

#reshape the validation data to input into the model
X_val_w_positive = np.array(X_val_w_positive).reshape((X_val_w_positive.shape[0], X_val_w_positive.shape[1], 1)) 

# predict on the data and print evaluation metrics
y_pred_X_val = model.predict(X_val_w_positive)
print(confusion_matrix(new_df.loc[y_val.index][' emotion '].astype(int)-1, np.argmax(y_pred_X_val, axis=1))) 
print(accuracy_score(new_df.loc[y_val.index][' emotion '].astype(int)-1, np.argmax(y_pred_X_val, axis=1)))

[[15  2  0  3  0  0  1  2]
 [ 3 52  0  1  0  0  0  0]
 [ 1  3 38  1  0  0  0  1]
 [ 0  3  0 35  0  0  3  2]
 [ 0  0  3  2 35  0  2  0]
 [ 0  3  1  0  0 41  0  1]
 [ 3  1  1  2  2  3 37  3]
 [ 0  1  1  3  0  3  0 37]]
0.8262108262108262


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


![alt text](https://drive.google.com/uc?export=view&id=1WdRm1pI_de0eWPIEpiRastAUfwmp5OQJ)