In [1]:
# Imports
import json, os, re, shutil, sys, time
import seaborn as sns
from importlib import reload
import collections, itertools
import unittest
from IPython.display import display, HTML
from collections import defaultdict
import xmltodict
import untangle
import xml.etree.ElementTree as ET
# NLTK for NLP utils and corpora
import nltk
from nltk.corpus import treebank
from nltk.text import Text
from nltk.tokenize import word_tokenize
from nltk.tokenize.toktok import ToktokTokenizer
from nltk.corpus import wordnet
from nltk.corpus import stopwords 
from nltk.stem import WordNetLemmatizer

# Import spacy
import spacy

import pickle

import sklearn
from sklearn.model_selection import train_test_split

# NumPy, Pandas and TensorFlow
import numpy as np
import pandas as pd
import tensorflow as tf

from bs4 import BeautifulSoup
from contractions import CONTRACTION_MAP
import unicodedata

from numpy.random import seed
from pandas import read_csv, DataFrame
from sklearn.preprocessing import minmax_scale

# Keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Flatten, LSTM, Dropout, Activation
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.optimizers import SGD
from keras.layers.embeddings import Embedding
import keras
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding, Activation, Dropout, Conv1D, MaxPooling1D, Bidirectional, Flatten, TimeDistributed

from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence

from tensorflow.keras.models import Sequential

from sklearn.model_selection import train_test_split

## Plotly
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import average_precision_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import cohen_kappa_score
import keras.backend as K
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from datetime import datetime


Using TensorFlow backend.


## AEG Long Essay Sentence Level Classification

In [2]:
# Read the data
aeg_long = pd.read_csv("../data-DNC/AEG/training_set_rel3.tsv",sep='\t',encoding = "latin1")
#aeg_long.head(5)

In [3]:
# Split the data into train and test. We need to do this first to ensure that when we split to 
# sentence level, we have sentences of a given essay in either training or test but not on both.

train_comb,test_comb = train_test_split(aeg_long,test_size=0.2, random_state=42)

In [4]:
# Create train and test data frames with relevant fields

x_train_df = train_comb.filter(['essay_id','essay_set','essay','domain1_score'], axis=1)
x_test_df = test_comb.filter(['essay_id','essay_set','essay','domain1_score'], axis=1)

x_train_df = x_train_df.reset_index()
x_test_df = x_test_df.reset_index()
print(x_train_df.shape)
print(x_test_df.shape)
print(x_train_df.essay_id[:5].values)
print(x_test_df.essay_id[:5].values)

(10380, 5)
(2596, 5)
[ 3567  4233  6518 15174 18855]
[ 9908  9872   305 12771  6839]


In [8]:
# Each essay set has a different scoring range. We need to normalize the scores to a standard scale for training.
def normalize_score(essay):
    """ Normalizes the domain score based on percentage"""
    score = 0
    score = float(essay.domain1_score)
    essay_set = essay.essay_set
    if essay_set == 1:
        div = 12
    elif essay_set == 2:
        div = 5
    elif essay_set == 3:
        div = 3
    elif essay_set == 4:
        div = 3
    elif essay_set == 5:
        div = 4
    elif essay_set == 6:
        div = 4
    elif essay_set == 7:
        div = 25
    elif essay_set == 8:
        div = 50
    return score/div

In [9]:
x_train_df['Norm_Score'] = x_train_df.apply(normalize_score,axis=1)
x_test_df['Norm_Score'] = x_test_df.apply(normalize_score,axis=1)

#### Split essay into sentences

In [10]:
# We will use spacy to split the essay into sentences. 
# Load spacy large english module
nlp = spacy.load('en_core_web_lg')

In [11]:
# Define a function to split essay into sentences
def create_sentences_df(df):
    """ Function to split essay data into individual sentences. Returns a dataframe"""
    start=datetime.now()
    aeg_long_sentence = pd.DataFrame(columns=['essay_id','essay_set','sentence','domain1_score','Norm_Score'])
    for i in range(len(df)):
        if i%1000 == 0:
            print("At iteration :",i)
            print("Duration: ",datetime.now()-start)
        sentence = nlp(df.essay[i])
        for s in sentence.sents:
            aeg_long_sentence = aeg_long_sentence.append({'essay_id' : df.essay_id[i],
                                                          'essay_set' : df.essay_set[i],'sentence' : s.text, 
                                                          'domain1_score' : df.domain1_score[i],
                                                          'Norm_Score' : df.Norm_Score[i]},
                                                         ignore_index=True)
    return aeg_long_sentence

In [12]:
# Split train data into sentences
x_train_sentence_df = create_sentences_df(x_train_df)

At iteration : 0
Duration:  0:00:00.000932
At iteration : 1000
Duration:  0:01:14.121585
At iteration : 2000
Duration:  0:02:45.327758
At iteration : 3000
Duration:  0:04:51.602966
At iteration : 4000
Duration:  0:07:44.236425
At iteration : 5000
Duration:  0:11:15.450161
At iteration : 6000
Duration:  0:15:42.782550
At iteration : 7000
Duration:  0:20:41.656108
At iteration : 8000
Duration:  5:19:13.096613
At iteration : 9000
Duration:  9:07:48.924929
At iteration : 10000
Duration:  9:15:03.240570


In [13]:
# Split test data into sentences
x_test_sentence_df = create_sentences_df(x_test_df)

At iteration : 0
Duration:  0:00:00.001320
At iteration : 1000
Duration:  0:01:17.885944
At iteration : 2000
Duration:  0:03:07.950674


In [50]:
x_train_sentence_df.head(5)

Unnamed: 0,essay_id,essay_set,sentence,domain1_score,Norm_Score
0,3567,2,There are many types of reading materials for ...,4,0.8
1,3567,2,"You can find things on cars, trucks, sports, a...",4,0.8
2,3567,2,There are some materials in a library though t...,4,0.8
3,3567,2,But should those materials be removed from the...,4,0.8
4,3567,2,Some think that they should and others think t...,4,0.8


In [177]:
# Store this dataset in pickle format so that we don't have to redo the above steps.
x_train_sentence_df.to_pickle("./x_train_sentence_df.pkl")
x_test_sentence_df.to_pickle("./x_test_sentence_df.pkl")

In [20]:
# Open the pickled version
x_train_sentence_df = pd.read_pickle("./x_train_sentence_df.pkl")
x_test_sentence_df = pd.read_pickle("./x_test_sentence_df.pkl")
print(x_train_sentence_df.shape)
print(x_test_sentence_df.shape)

(147123, 5)
(36778, 5)


In [9]:
#def append_sentence(sid,sentence):
#    sentence = sid_dict[sid] + sentence
#    return sentence

In [17]:
#sid_dict = {1:"One. ",2:"Two. ",3:"Three. ",4:"Four. ",5:"Five ",6:"Six. ",7:"Seven. ",8:"Eight. "}

In [18]:
#x_train_sentence_df['mod_sentence'] = np.vectorize(append_sentence)(x_train_sentence_df['essay_set'],
#                                                                    x_train_sentence_df['sentence'])

#x_test_sentence_df['mod_sentence'] = np.vectorize(append_sentence)(x_test_sentence_df['essay_set'],
#                                                                    x_test_sentence_df['sentence'])

In [6]:
#x_train_sentence_df.head(5)

In [21]:
# Create train and test text and labels

x_train = x_train_sentence_df['sentence'].values
y_train = x_train_sentence_df['Norm_Score'].values
x_test = x_test_sentence_df['sentence'].values
y_test = x_test_sentence_df['Norm_Score'].values
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(147123,)
(147123,)
(36778,)
(36778,)


In [22]:
vocabulary_size = 50000
tokenizer = Tokenizer(num_words= vocabulary_size)
tokenizer.fit_on_texts(x_train)
train_seq = tokenizer.texts_to_sequences(x_train)
train_data = pad_sequences(train_seq)

In [23]:
max_len_class = train_data.shape[1]
max_words_class = vocabulary_size

In [24]:
test_seq = tokenizer.texts_to_sequences(x_test)
test_data = pad_sequences(test_seq, maxlen=max_len_class)
test_data.shape

(36778, 175)

In [25]:
embeddings_index = dict()
f = open('/Users/kurapati/W266/data/glove.42B.300d.txt')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Loaded %s word vectors.' % len(embeddings_index))

Loaded 1917494 word vectors.


In [26]:
embedding_matrix = np.zeros((vocabulary_size, 300))
for word, index in tokenizer.word_index.items():
    if index > vocabulary_size - 1:
        break
    else:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[index] = embedding_vector

In [39]:
# Common Functions:

# Define function to get the multiplication factor. We can get it back from the essay set, but
# it is better to do it from the score, because there are scores with value 0, and its MF should be 0

def find_mult_factor(x):
    """ Function to find the multiplication factor for denormalizing"""
    if x[1] == 0:
        return 0
    else:
        return np.around(x[0]/x[1])
    
def denormalize(x):
    """ Function to Denormalize the score"""
    return np.around(x[2] * x[3])

def find_max_min_mean_score(df):
    """ Function to find the max, min and rounded mean of sentence scores"""
    new_df = pd.DataFrame(columns=['essay_id','essay_set','Orig_Score','Norm_Orig_Score',
                                   'Max_Score','Min_Score','Mean_Score','Norm_Mean_Score'])
    essay_ids = np.unique(df.essay_id)
    for e_id in essay_ids:
        df_temp = df[df.essay_id == e_id]
        max_score = np.max(df_temp.pred_score)
        min_score = np.min(df_temp.pred_score)
        # we need to round the mean so that kappa score doesnt complain
        mean_score = np.around(np.mean(df_temp.pred_score))
        norm_mean_score = np.mean(df_temp.norm_pred_score)
        new_df = new_df.append({'essay_id':e_id,'essay_set':int(np.unique(df_temp.essay_set)),
                                'Orig_Score':int(np.unique(df_temp.orig_score)),
                                'Norm_Orig_Score' : np.unique(df_temp.norm_orig_score),
                                'Max_Score':max_score,'Min_Score':min_score,
                                'Mean_Score':mean_score,'Norm_Mean_Score':norm_mean_score},ignore_index=True)
    return new_df

def essay_set_metrics(df):
    """ Calculate per essay set metrics"""
    set_df = pd.DataFrame(columns=['essay_set','RMSE','Kappa','Kappa_Q','Accuracy','Norm_RMSE'])
    e_sets = np.unique(df.essay_set)
    for e_s in e_sets:
        df_s = df[df.essay_set == e_s]
        original_score = df_s.Orig_Score.values.astype(int)
        norm_original_score = df_s.Norm_Orig_Score.values.astype(float)
        predicted_score = df_s.Mean_Score.values.astype(int)
        norm_predicted_score = df_s.Norm_Mean_Score.values.astype(float)
        rmse = RMSE(original_score,predicted_score)
        kappa = cohen_kappa_score(original_score,predicted_score)
        kappa_q = cohen_kappa_score(original_score,predicted_score,weights='quadratic')
        accuracy = accuracy_score(original_score,predicted_score)
        norm_rmse = RMSE(norm_original_score,norm_predicted_score)
        set_df = set_df.append({'essay_set':e_s,'RMSE':rmse,'Kappa':kappa,'Kappa_Q':kappa_q,
                                'Accuracy':accuracy,'Norm_RMSE':norm_rmse},ignore_index=True)
    return set_df


def sentence_count(df):
    """ Returns the number of sentences in an essay """
    essay_count = df.groupby('essay_id').count()
    essay_count = essay_count.drop(['sentence','domain1_score','Norm_Score'],axis=1)
    essay_count.columns = ['Number_of_Sentences']
    return essay_count

In [28]:
def RMSE(actual, predict):
    diff = actual - predict
    diff = sum(diff**2) / len(actual)
    return np.sqrt(diff)

In [36]:
#model definitions

def FF_1_NN():
    """ Simple feed forward NN"""
    model_ff = Sequential()
    model_ff.add(tf.keras.layers.Embedding(max_words_class, 300, input_length=max_len_class, weights=[embedding_matrix], trainable=False))
    model_ff.add(tf.keras.layers.Flatten())
    model_ff.add(tf.keras.layers.Dense(50,activation='tanh'))
    model_ff.add(tf.keras.layers.Dropout(0.1))
    model_ff.add(tf.keras.layers.Dense(1,activation='sigmoid'))
    model_ff.compile(optimizer=tf.train.AdamOptimizer(),loss='mse',metrics=['accuracy'])
    return model_ff

def FF_2_NN():
    """ Simple feed forward NN"""
    model_ff = Sequential()
    model_ff.add(tf.keras.layers.Embedding(max_words_class, 300, input_length=max_len_class, weights=[embedding_matrix], trainable=False))
    model_ff.add(tf.keras.layers.Flatten())
    model_ff.add(tf.keras.layers.Dense(50,activation='tanh'))
    model_ff.add(tf.keras.layers.Dense(50,activation='tanh'))
    model_ff.add(tf.keras.layers.Dropout(0.1))
    model_ff.add(tf.keras.layers.Dense(1,activation='sigmoid'))
    model_ff.compile(optimizer=tf.train.AdamOptimizer(),loss='mse',metrics=['accuracy'])
    return model_ff

def FF_3_NN():
    """ Simple feed forward NN"""
    model_ff = Sequential()
    model_ff.add(tf.keras.layers.Embedding(max_words_class, 300, input_length=max_len_class, weights=[embedding_matrix], trainable=False))
    model_ff.add(tf.keras.layers.Flatten())
    model_ff.add(tf.keras.layers.Dense(50,activation='tanh'))
    model_ff.add(tf.keras.layers.Dense(50,activation='tanh'))
    model_ff.add(tf.keras.layers.Dense(50,activation='tanh'))
    model_ff.add(tf.keras.layers.Dropout(0.1))
    model_ff.add(tf.keras.layers.Dense(1,activation='sigmoid'))
    model_ff.compile(optimizer=tf.train.AdamOptimizer(),loss='mse',metrics=['accuracy'])
    return model_ff

def GRU_32():
    """ Gated Recurrent Unit"""
    model_gru = Sequential()
    model_gru.add(tf.keras.layers.Embedding(max_words_class, 300, input_length=max_len_class, weights=[embedding_matrix], trainable=False))
    model_gru.add(tf.keras.layers.GRU(32,activation='tanh'))
    model_gru.add(tf.keras.layers.Dropout(0.1))
    model_gru.add(tf.keras.layers.Dense(1,name='out_layer'))
    model_gru.compile(optimizer=tf.train.AdamOptimizer(),loss='mse',metrics=['accuracy'])
    return model_gru

def GRU_50():
    """ Gated Recurrent Unit"""
    model_gru = Sequential()
    model_gru.add(tf.keras.layers.Embedding(max_words_class, 300, input_length=max_len_class, weights=[embedding_matrix], trainable=False))
    model_gru.add(tf.keras.layers.GRU(32,activation='tanh'))
    model_gru.add(tf.keras.layers.Dropout(0.1))
    model_gru.add(tf.keras.layers.Dense(1,name='out_layer'))
    model_gru.compile(optimizer=tf.train.AdamOptimizer(),loss='mse',metrics=['accuracy'])
    return model_gru

def CNN_FF():
    """ CNN with Feed Forward NN """
    model_conv = Sequential()
    model_conv.add(tf.keras.layers.Embedding(max_words_class, 300, input_length=max_len_class, weights=[embedding_matrix], trainable=False))
    model_conv.add(tf.keras.layers.Conv1D(64, 5, activation='relu'))
    model_conv.add(tf.keras.layers.MaxPooling1D(pool_size=4))
    model_conv.add(tf.keras.layers.Flatten())
    model_conv.add(tf.keras.layers.Dense(100))
    model_conv.add(tf.keras.layers.Dropout(0.1))
    model_conv.add(tf.keras.layers.Dense(1, kernel_initializer='normal'))
    model_conv.compile(loss = 'mse', optimizer = tf.train.AdamOptimizer(), metrics = ['accuracy'])
    return model_conv

def CNN_lstm():
    """ CNN with single layer LSTM & Feed Forward NN"""
    model_conv = Sequential()
    model_conv.add(tf.keras.layers.Embedding(max_words_class, 300, input_length=max_len_class, weights=[embedding_matrix], trainable=False))
    #model_conv.add(tf.keras.layers.Dropout(0.1))
    model_conv.add(tf.keras.layers.Conv1D(64, 5, activation='relu'))
    model_conv.add(tf.keras.layers.MaxPooling1D(pool_size=4))
    model_conv.add(tf.keras.layers.LSTM(100))
    model_conv.add(tf.keras.layers.Dropout(0.1))
    model_conv.add(tf.keras.layers.Dense(100))
    model_conv.add(tf.keras.layers.Dense(1, kernel_initializer='normal'))
    model_conv.compile(loss = 'mse', optimizer = tf.train.AdamOptimizer(), metrics = ['accuracy'])
    return model_conv

def stack_lstm():
    """ Three layered stacked LSTM."""
    model_conv = Sequential()
    model_conv.add(tf.keras.layers.Embedding(max_words_class, 300, input_length=max_len_class, weights=[embedding_matrix], trainable=False))
    model_conv.add(tf.keras.layers.LSTM(32,return_sequences=True))
    model_conv.add(tf.keras.layers.LSTM(32, return_sequences=True))
    model_conv.add(tf.keras.layers.Dropout(0.2))
    model_conv.add(tf.keras.layers.LSTM(32))
    model_conv.add(tf.keras.layers.Dense(1, kernel_initializer='normal'))
    model_conv.compile(loss = 'mse', optimizer = tf.train.AdamOptimizer(), metrics = ['accuracy'])
    return model_conv

def stateful_stacked_lstm():
    # In stateful, total samples needs to be divisible by batch size
    # we have 147026 samples, so selecting 6683 (6683*22=147026)
    # The test sample need to be a multiple of 6683 as well
    batch_size=2
    model_conv = Sequential()
    # In stateful, we have to pass batch_input_shape to the first layer
    model_conv.add(tf.keras.layers.Embedding(max_words_class, 300, input_length=max_len_class, weights=[embedding_matrix], 
                                             trainable=False,batch_input_shape=(batch_size,max_len_class)))
    model_conv.add(tf.keras.layers.LSTM(32,stateful=True,return_sequences=True))
    model_conv.add(tf.keras.layers.Dropout(0.1))
    model_conv.add(tf.keras.layers.LSTM(32))
    model_conv.add(tf.keras.layers.Dropout(0.1))
    model_conv.add(tf.keras.layers.Dense(100))
    model_conv.add(tf.keras.layers.Dense(1, kernel_initializer='normal'))
    #sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
    model_conv.compile(loss = 'mse', optimizer = tf.train.AdamOptimizer(), metrics = ['accuracy'])
    #model_conv.compile(optimizer=tf.train.AdamOptimizer(),loss='mse',metrics=['accuracy'])
    return model_conv


### Simple Feed Forward NN

#### FNN 1 Layer

In [53]:
# Train the model
estimator_nn = KerasRegressor(build_fn=FF_1_NN, epochs=20, batch_size=500)
estimator_nn.fit(train_data, y_train)

# Predict for test data
prediction_nn=estimator_nn.predict(test_data)
rmse_val = RMSE(y_test,prediction_nn)
# Find the overall RMSE value. This is not very relevant considering different scale for each set.
print("RMSE: ",rmse_val)

# Construct pandas data frame of scores of each sentences
nn_df = pd.DataFrame([x_test_sentence_df['domain1_score'].astype(np.double), y_test.astype(np.double),
                      prediction_nn.astype(np.double)]).transpose()
nn_df.columns = ['Orig_Score','Norm_Score','Pred_Score']
# Find Multiplication factor using the function we defined before
nn_df['Mult_Factor'] = nn_df.apply(find_mult_factor,axis=1)
# Find the denormalized predicted score
nn_df['Denorm_Pred_Score'] = nn_df.apply(denormalize,axis=1)

# Extract the scores of each sentences
orig_score = nn_df.Orig_Score.as_matrix(columns=None)
orig_score = orig_score.astype(np.int)
pred_score = nn_df.Denorm_Pred_Score.as_matrix(columns=None)
pred_score = pred_score.astype(np.int)

# Provide the metrics at sentence level
rmse_nn = RMSE(orig_score,pred_score)
cohen_kappa = cohen_kappa_score(orig_score,pred_score,weights='quadratic')
accuracy = accuracy_score(orig_score,pred_score)
print("**** METRICS BASED ON DENORMALIZED SCORE FOR FEED FORWARD NN****")
print("**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****")
print("RMSE: ",rmse_nn)
print("Kappa Quadratic Weighting: ",cohen_kappa)
print("Accuracy: ",accuracy)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
RMSE:  0.17892304080303573
**** METRICS BASED ON DENORMALIZED SCORE FOR FEED FORWARD NN****
**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****
RMSE:  2.9969939804064736
Kappa Quadratic Weighting:  0.9714422223685433
Accuracy:  0.30966882375333077


In [54]:
# We will combine the sentences back to essay in this section

result_df = pd.DataFrame([x_test_sentence_df.essay_id.values,x_test_sentence_df.essay_set.values,
                         x_test_sentence_df.sentence.values,nn_df.Orig_Score.values,
                          nn_df.Norm_Score,nn_df.Denorm_Pred_Score.values,
                          nn_df.Pred_Score.values]).transpose()
result_df.columns = ['essay_id','essay_set','sentence','orig_score','norm_orig_score',
                     'pred_score','norm_pred_score']
nn_test_df = find_max_min_mean_score(result_df)

#Convert the dataframe parameters into integers as they are returned as floats
nn_test_df.essay_id = nn_test_df.essay_id.astype(int)
nn_test_df.essay_set = nn_test_df.essay_set.astype(int)
nn_test_df.Orig_Score = nn_test_df.Orig_Score.astype(int)
nn_test_df.Max_Score = nn_test_df.Max_Score.astype(int)
nn_test_df.Min_Score = nn_test_df.Min_Score.astype(int)
nn_test_df.Mean_Score = nn_test_df.Mean_Score.astype(int)

# Extract the scores
orig_score = nn_test_df.Orig_Score.values
max_pred_score = nn_test_df.Max_Score.values
min_pred_score = nn_test_df.Min_Score.values
mean_pred_score = nn_test_df.Mean_Score.values

# Compare the metrics if Max score of all sentences are taken
rmse_max_nn = RMSE(orig_score,max_pred_score)
cohen_kappa_max_nn = cohen_kappa_score(orig_score,max_pred_score)
accuracy_max_nn = accuracy_score(orig_score,max_pred_score)

# Compare the metrics if Min score of all sentences are taken
rmse_min_nn = RMSE(orig_score,min_pred_score)
cohen_kappa_min_nn = cohen_kappa_score(orig_score,min_pred_score)
accuracy_min_nn = accuracy_score(orig_score,min_pred_score)

# Compare the metrics if Mean score of all sentences are taken
rmse_mean_nn = RMSE(orig_score,mean_pred_score)
cohen_kappa_mean_nn = cohen_kappa_score(orig_score,mean_pred_score)
cohen_kappa_mean_q_nn = cohen_kappa_score(orig_score,mean_pred_score,weights='quadratic')
accuracy_mean_nn = accuracy_score(orig_score,mean_pred_score)

print("### Simple FF NN : Essay level Metrics : After taking MAX score all Sentences ###")
print("RMSE: ",rmse_max_nn)
print("Cohen Kappa: ",cohen_kappa_max_nn)
print("Accuracy: ",accuracy_max_nn)

print("### Simple FF NN : Essay level Metrics : After taking Min Score of all Sentences ###")
print("RMSE: ",rmse_min_nn)
print("Cohen Kappa: ",cohen_kappa_min_nn)
print("Accuracy: ",accuracy_min_nn)

print("### Simple FF NN : Essay level Metrics : After taking Mean Score of all Sentences ###")
print("RMSE: ",rmse_mean_nn)
print("Cohen Kappa: ",cohen_kappa_mean_nn)
print("Cohen Kappa Quadratic Weight: ",cohen_kappa_mean_q_nn)
print("Accuracy: ",accuracy_mean_nn)

# Provide per-essay-set metrics based on mean score
essay_set_results = essay_set_metrics(nn_test_df)
print(essay_set_results)
print("Mean Quadratic Kappa: ",np.mean(essay_set_results.Kappa_Q))

### Simple FF NN : Essay level Metrics : After taking MAX score all Sentences ###
RMSE:  3.2677746724196335
Cohen Kappa:  0.1956655687947143
Accuracy:  0.2854391371340524
### Simple FF NN : Essay level Metrics : After taking Min Score of all Sentences ###
RMSE:  3.2258943648242147
Cohen Kappa:  0.21794020433411565
Accuracy:  0.32126348228043144
### Simple FF NN : Essay level Metrics : After taking Mean Score of all Sentences ###
RMSE:  2.13859176337009
Cohen Kappa:  0.2775055325740141
Cohen Kappa Quadratic Weight:  0.9708101984383357
Accuracy:  0.37057010785824346
   essay_set      RMSE     Kappa   Kappa_Q  Accuracy  Norm_RMSE
0        1.0  1.442902  0.042139  0.193314  0.245902   0.117325
1        2.0  0.901573  0.150354  0.184940  0.489305   0.154051
2        3.0  0.787249  0.063316  0.233452  0.398204   0.280280
3        4.0  0.651920  0.387496  0.725265  0.575000   0.323910
4        5.0  0.904097  0.120953  0.339377  0.408696   0.225493
5        6.0  0.863731  0.088229  0.448227  0

#### FNN 2 Layers

In [55]:
# Train the model
estimator_nn = KerasRegressor(build_fn=FF_2_NN, epochs=20, batch_size=500)
estimator_nn.fit(train_data, y_train)

# Predict for test data
prediction_nn=estimator_nn.predict(test_data)
rmse_val = RMSE(y_test,prediction_nn)
# Find the overall RMSE value. This is not very relevant considering different scale for each set.
print("RMSE: ",rmse_val)

# Construct pandas data frame of scores of each sentences
nn_df = pd.DataFrame([x_test_sentence_df['domain1_score'].astype(np.double), y_test.astype(np.double),
                      prediction_nn.astype(np.double)]).transpose()
nn_df.columns = ['Orig_Score','Norm_Score','Pred_Score']
# Find Multiplication factor using the function we defined before
nn_df['Mult_Factor'] = nn_df.apply(find_mult_factor,axis=1)
# Find the denormalized predicted score
nn_df['Denorm_Pred_Score'] = nn_df.apply(denormalize,axis=1)

# Extract the scores of each sentences
orig_score = nn_df.Orig_Score.as_matrix(columns=None)
orig_score = orig_score.astype(np.int)
pred_score = nn_df.Denorm_Pred_Score.as_matrix(columns=None)
pred_score = pred_score.astype(np.int)

# Provide the metrics at sentence level
rmse_nn = RMSE(orig_score,pred_score)
cohen_kappa = cohen_kappa_score(orig_score,pred_score,weights='quadratic')
accuracy = accuracy_score(orig_score,pred_score)
print("**** METRICS BASED ON DENORMALIZED SCORE FOR FEED FORWARD NN****")
print("**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****")
print("RMSE: ",rmse_nn)
print("Kappa Quadratic Weighting: ",cohen_kappa)
print("Accuracy: ",accuracy)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
RMSE:  0.1987534469357875
**** METRICS BASED ON DENORMALIZED SCORE FOR FEED FORWARD NN****
**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****
RMSE:  3.4676005580065636
Kappa Quadratic Weighting:  0.9615604623049535
Accuracy:  0.2816085703409647


In [56]:
# We will combine the sentences back to essay in this section

result_df = pd.DataFrame([x_test_sentence_df.essay_id.values,x_test_sentence_df.essay_set.values,
                         x_test_sentence_df.sentence.values,nn_df.Orig_Score.values,
                          nn_df.Norm_Score,nn_df.Denorm_Pred_Score.values,
                          nn_df.Pred_Score.values]).transpose()
result_df.columns = ['essay_id','essay_set','sentence','orig_score','norm_orig_score',
                     'pred_score','norm_pred_score']
nn_test_df = find_max_min_mean_score(result_df)

#Convert the dataframe parameters into integers as they are returned as floats
nn_test_df.essay_id = nn_test_df.essay_id.astype(int)
nn_test_df.essay_set = nn_test_df.essay_set.astype(int)
nn_test_df.Orig_Score = nn_test_df.Orig_Score.astype(int)
nn_test_df.Max_Score = nn_test_df.Max_Score.astype(int)
nn_test_df.Min_Score = nn_test_df.Min_Score.astype(int)
nn_test_df.Mean_Score = nn_test_df.Mean_Score.astype(int)

# Extract the scores
orig_score = nn_test_df.Orig_Score.values
max_pred_score = nn_test_df.Max_Score.values
min_pred_score = nn_test_df.Min_Score.values
mean_pred_score = nn_test_df.Mean_Score.values

# Compare the metrics if Max score of all sentences are taken
rmse_max_nn = RMSE(orig_score,max_pred_score)
cohen_kappa_max_nn = cohen_kappa_score(orig_score,max_pred_score)
accuracy_max_nn = accuracy_score(orig_score,max_pred_score)

# Compare the metrics if Min score of all sentences are taken
rmse_min_nn = RMSE(orig_score,min_pred_score)
cohen_kappa_min_nn = cohen_kappa_score(orig_score,min_pred_score)
accuracy_min_nn = accuracy_score(orig_score,min_pred_score)

# Compare the metrics if Mean score of all sentences are taken
rmse_mean_nn = RMSE(orig_score,mean_pred_score)
cohen_kappa_mean_nn = cohen_kappa_score(orig_score,mean_pred_score)
cohen_kappa_mean_q_nn = cohen_kappa_score(orig_score,mean_pred_score,weights='quadratic')
accuracy_mean_nn = accuracy_score(orig_score,mean_pred_score)

print("### Simple FF NN 2 Layers : Essay level Metrics : After taking MAX score all Sentences ###")
print("RMSE: ",rmse_max_nn)
print("Cohen Kappa: ",cohen_kappa_max_nn)
print("Accuracy: ",accuracy_max_nn)

print("### Simple FF NN 2 Layers : Essay level Metrics : After taking Min Score of all Sentences ###")
print("RMSE: ",rmse_min_nn)
print("Cohen Kappa: ",cohen_kappa_min_nn)
print("Accuracy: ",accuracy_min_nn)

print("### Simple FF NN 2 Layers : Essay level Metrics : After taking Mean Score of all Sentences ###")
print("RMSE: ",rmse_mean_nn)
print("Cohen Kappa: ",cohen_kappa_mean_nn)
print("Cohen Kappa Quadratic Weight: ",cohen_kappa_mean_q_nn)
print("Accuracy: ",accuracy_mean_nn)

# Provide per-essay-set metrics based on mean score
essay_set_results = essay_set_metrics(nn_test_df)
print(essay_set_results)
print("Mean Quadratic Kappa: ",np.mean(essay_set_results.Kappa_Q))

### Simple FF NN 2 Layers : Essay level Metrics : After taking MAX score all Sentences ###
RMSE:  3.765875486765087
Cohen Kappa:  0.12265187146196521
Accuracy:  0.20955315870570107
### Simple FF NN 2 Layers : Essay level Metrics : After taking Min Score of all Sentences ###
RMSE:  4.545947584970648
Cohen Kappa:  0.164573051079265
Accuracy:  0.26617873651771956
### Simple FF NN 2 Layers : Essay level Metrics : After taking Mean Score of all Sentences ###
RMSE:  2.138051328261422
Cohen Kappa:  0.29140433677059774
Cohen Kappa Quadratic Weight:  0.9704768417636869
Accuracy:  0.38366718027734975
   essay_set      RMSE     Kappa   Kappa_Q  Accuracy  Norm_RMSE
0        1.0  1.389854  0.047714  0.237458  0.281421   0.115911
1        2.0  0.860605  0.136477  0.235721  0.486631   0.151464
2        3.0  0.831635  0.063564  0.240894  0.389222   0.284691
3        4.0  0.670820  0.381893  0.722539  0.566667   0.314357
4        5.0  0.932505  0.189240  0.311011  0.455072   0.229856
5        6.0  0.88

#### FNN 3 Layers

In [57]:
# Train the model
estimator_nn = KerasRegressor(build_fn=FF_3_NN, epochs=20, batch_size=500)
estimator_nn.fit(train_data, y_train)

# Predict for test data
prediction_nn=estimator_nn.predict(test_data)
rmse_val = RMSE(y_test,prediction_nn)
# Find the overall RMSE value. This is not very relevant considering different scale for each set.
print("RMSE: ",rmse_val)

# Construct pandas data frame of scores of each sentences
nn_df = pd.DataFrame([x_test_sentence_df['domain1_score'].astype(np.double), y_test.astype(np.double),
                      prediction_nn.astype(np.double)]).transpose()
nn_df.columns = ['Orig_Score','Norm_Score','Pred_Score']
# Find Multiplication factor using the function we defined before
nn_df['Mult_Factor'] = nn_df.apply(find_mult_factor,axis=1)
# Find the denormalized predicted score
nn_df['Denorm_Pred_Score'] = nn_df.apply(denormalize,axis=1)

# Extract the scores of each sentences
orig_score = nn_df.Orig_Score.as_matrix(columns=None)
orig_score = orig_score.astype(np.int)
pred_score = nn_df.Denorm_Pred_Score.as_matrix(columns=None)
pred_score = pred_score.astype(np.int)

# Provide the metrics at sentence level
rmse_nn = RMSE(orig_score,pred_score)
cohen_kappa = cohen_kappa_score(orig_score,pred_score,weights='quadratic')
accuracy = accuracy_score(orig_score,pred_score)
print("**** METRICS BASED ON DENORMALIZED SCORE FOR FEED FORWARD NN****")
print("**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****")
print("RMSE: ",rmse_nn)
print("Kappa Quadratic Weighting: ",cohen_kappa)
print("Accuracy: ",accuracy)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
RMSE:  0.20030693721720727
**** METRICS BASED ON DENORMALIZED SCORE FOR FEED FORWARD NN****
**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****
RMSE:  3.5044970837010014
Kappa Quadratic Weighting:  0.9606625047370698
Accuracy:  0.28353907227146663


In [58]:
# We will combine the sentences back to essay in this section

result_df = pd.DataFrame([x_test_sentence_df.essay_id.values,x_test_sentence_df.essay_set.values,
                         x_test_sentence_df.sentence.values,nn_df.Orig_Score.values,
                          nn_df.Norm_Score,nn_df.Denorm_Pred_Score.values,
                          nn_df.Pred_Score.values]).transpose()
result_df.columns = ['essay_id','essay_set','sentence','orig_score','norm_orig_score',
                     'pred_score','norm_pred_score']
nn_test_df = find_max_min_mean_score(result_df)

#Convert the dataframe parameters into integers as they are returned as floats
nn_test_df.essay_id = nn_test_df.essay_id.astype(int)
nn_test_df.essay_set = nn_test_df.essay_set.astype(int)
nn_test_df.Orig_Score = nn_test_df.Orig_Score.astype(int)
nn_test_df.Max_Score = nn_test_df.Max_Score.astype(int)
nn_test_df.Min_Score = nn_test_df.Min_Score.astype(int)
nn_test_df.Mean_Score = nn_test_df.Mean_Score.astype(int)

# Extract the scores
orig_score = nn_test_df.Orig_Score.values
max_pred_score = nn_test_df.Max_Score.values
min_pred_score = nn_test_df.Min_Score.values
mean_pred_score = nn_test_df.Mean_Score.values

# Compare the metrics if Max score of all sentences are taken
rmse_max_nn = RMSE(orig_score,max_pred_score)
cohen_kappa_max_nn = cohen_kappa_score(orig_score,max_pred_score)
accuracy_max_nn = accuracy_score(orig_score,max_pred_score)

# Compare the metrics if Min score of all sentences are taken
rmse_min_nn = RMSE(orig_score,min_pred_score)
cohen_kappa_min_nn = cohen_kappa_score(orig_score,min_pred_score)
accuracy_min_nn = accuracy_score(orig_score,min_pred_score)

# Compare the metrics if Mean score of all sentences are taken
rmse_mean_nn = RMSE(orig_score,mean_pred_score)
cohen_kappa_mean_nn = cohen_kappa_score(orig_score,mean_pred_score)
cohen_kappa_mean_q_nn = cohen_kappa_score(orig_score,mean_pred_score,weights='quadratic')
accuracy_mean_nn = accuracy_score(orig_score,mean_pred_score)

print("### Simple FF NN : Essay level Metrics : After taking MAX score all Sentences ###")
print("RMSE: ",rmse_max_nn)
print("Cohen Kappa: ",cohen_kappa_max_nn)
print("Accuracy: ",accuracy_max_nn)

print("### Simple FF NN : Essay level Metrics : After taking Min Score of all Sentences ###")
print("RMSE: ",rmse_min_nn)
print("Cohen Kappa: ",cohen_kappa_min_nn)
print("Accuracy: ",accuracy_min_nn)

print("### Simple FF NN : Essay level Metrics : After taking Mean Score of all Sentences ###")
print("RMSE: ",rmse_mean_nn)
print("Cohen Kappa: ",cohen_kappa_mean_nn)
print("Cohen Kappa Quadratic Weight: ",cohen_kappa_mean_q_nn)
print("Accuracy: ",accuracy_mean_nn)

# Provide per-essay-set metrics based on mean score
essay_set_results = essay_set_metrics(nn_test_df)
print(essay_set_results)
print("Mean Quadratic Kappa: ",np.mean(essay_set_results.Kappa_Q))

### Simple FF NN : Essay level Metrics : After taking MAX score all Sentences ###
RMSE:  3.663630437354458
Cohen Kappa:  0.1387628988492543
Accuracy:  0.22573189522342066
### Simple FF NN : Essay level Metrics : After taking Min Score of all Sentences ###
RMSE:  4.738663383545806
Cohen Kappa:  0.14152613564101657
Accuracy:  0.24499229583975346
### Simple FF NN : Essay level Metrics : After taking Mean Score of all Sentences ###
RMSE:  2.107200217845796
Cohen Kappa:  0.2899175128646725
Cohen Kappa Quadratic Weight:  0.9711809537478122
Accuracy:  0.3825115562403698
   essay_set      RMSE     Kappa   Kappa_Q  Accuracy  Norm_RMSE
0        1.0  1.377017  0.041800  0.256676  0.275956   0.112741
1        2.0  0.827340  0.152816  0.290354  0.497326   0.148604
2        3.0  0.831635  0.061087  0.226526  0.389222   0.279158
3        4.0  0.662487  0.385855  0.729390  0.569444   0.307384
4        5.0  0.915249  0.160521  0.332653  0.437681   0.228587
5        6.0  0.889385  0.138091  0.457819  0.

### RNN GRU

#### RNN Dim=32

In [59]:
# Train the model
estimator = KerasRegressor(build_fn=GRU_32, epochs=20, batch_size=500)
estimator.fit(train_data, y_train)

# Predict for test data
prediction_rnn=estimator.predict(test_data)
rmse_val = RMSE(y_test,prediction_rnn)
# Find the overall RMSE value. This is not very relevant considering different scale for each set.
print("RMSE: ",rmse_val)

# Construct pandas data frame of scores of each sentences
rnn_df = pd.DataFrame([x_test_sentence_df['domain1_score'].astype(np.double), y_test.astype(np.double),
                      prediction_rnn.astype(np.double)]).transpose()
rnn_df.columns = ['Orig_Score','Norm_Score','Pred_Score']
# Find Multiplication factor using the function we defined before
rnn_df['Mult_Factor'] = rnn_df.apply(find_mult_factor,axis=1)
# Find the denormalized predicted score
rnn_df['Denorm_Pred_Score'] = rnn_df.apply(denormalize,axis=1)

# Extract the scores of each sentences
orig_score = rnn_df.Orig_Score.as_matrix(columns=None)
orig_score = orig_score.astype(np.int)
pred_score = rnn_df.Denorm_Pred_Score.as_matrix(columns=None)
pred_score = pred_score.astype(np.int)

# Provide the metrics at sentence level
rmse_rnn = RMSE(orig_score,pred_score)
cohen_kappa = cohen_kappa_score(orig_score,pred_score,weights='quadratic')
accuracy = accuracy_score(orig_score,pred_score)

print("**** METRICS BASED ON DENORMALIZED SCORE FOR SIMPLE RNN****")
print("**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****")
print("RMSE: ",rmse_rnn)
print("Kappa Quadratic Weighting: ",cohen_kappa)
print("Accuracy: ",accuracy)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
RMSE:  0.16365551566355543
**** METRICS BASED ON DENORMALIZED SCORE FOR SIMPLE RNN****
**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****
RMSE:  2.6811852130169274
Kappa Quadratic Weighting:  0.9766798798950704
Accuracy:  0.331013105660993


In [60]:
# We will combine the sentences back to essay in this section

result_df = pd.DataFrame([x_test_sentence_df.essay_id.values,x_test_sentence_df.essay_set.values,
                         x_test_sentence_df.sentence.values,rnn_df.Orig_Score.values,
                          rnn_df.Norm_Score,rnn_df.Denorm_Pred_Score.values,
                          rnn_df.Pred_Score.values]).transpose()
result_df.columns = ['essay_id','essay_set','sentence','orig_score','norm_orig_score',
                     'pred_score','norm_pred_score']
rnn_test_df = find_max_min_mean_score(result_df)

#Convert the dataframe parameters into integers as they are returned as floats
rnn_test_df.essay_id = rnn_test_df.essay_id.astype(int)
rnn_test_df.essay_set = rnn_test_df.essay_set.astype(int)
rnn_test_df.Orig_Score = rnn_test_df.Orig_Score.astype(int)
rnn_test_df.Max_Score = rnn_test_df.Max_Score.astype(int)
rnn_test_df.Min_Score = rnn_test_df.Min_Score.astype(int)
rnn_test_df.Mean_Score = rnn_test_df.Mean_Score.astype(int)

# Extract the scores
orig_score = rnn_test_df.Orig_Score.values
max_pred_score = rnn_test_df.Max_Score.values
min_pred_score = rnn_test_df.Min_Score.values
mean_pred_score = rnn_test_df.Mean_Score.values

# Compare the metrics if Max score of all sentences are taken
rmse_max_rnn = RMSE(orig_score,max_pred_score)
cohen_kappa_max_rnn = cohen_kappa_score(orig_score,max_pred_score)
accuracy_max_rnn = accuracy_score(orig_score,max_pred_score)

# Compare the metrics if Min score of all sentences are taken
rmse_min_rnn = RMSE(orig_score,min_pred_score)
cohen_kappa_min_rnn = cohen_kappa_score(orig_score,min_pred_score)
accuracy_min_rnn = accuracy_score(orig_score,min_pred_score)

# Compare the metrics if Mean score of all sentences are taken
rmse_mean_rnn = RMSE(orig_score,mean_pred_score)
cohen_kappa_mean_rnn = cohen_kappa_score(orig_score,mean_pred_score)
cohen_kappa_mean_q_rnn = cohen_kappa_score(orig_score,mean_pred_score,weights='quadratic')
accuracy_mean_rnn = accuracy_score(orig_score,mean_pred_score)

print("### RNN GRU Results : MAX Score of all Sentences ###")
print("RMSE: ",rmse_max_rnn)
print("Cohen Kappa: ",cohen_kappa_max_rnn)
print("Accuracy: ",accuracy_max_rnn)

print("### RNN GRU Results : Min Score of all Sentences ###")
print("RMSE: ",rmse_min_rnn)
print("Cohen Kappa: ",cohen_kappa_min_rnn)
print("Accuracy: ",accuracy_min_rnn)

print("### RNN GRU Results : Mean Score of all Sentences ###")
print("RMSE: ",rmse_mean_rnn)
print("Cohen Kappa: ",cohen_kappa_mean_rnn)
print("Cohen Kappa Quadratic: ",cohen_kappa_mean_q_rnn)
print("Accuracy: ",accuracy_mean_rnn)

# Provide per-essay-set metrics based on mean score
essay_set_results = essay_set_metrics(rnn_test_df)
print(essay_set_results)
print("Mean Quadratic Kappa: ",np.mean(essay_set_results.Kappa_Q))

### RNN GRU Results : MAX Score of all Sentences ###
RMSE:  2.6864953599349906
Cohen Kappa:  0.23309685332559504
Accuracy:  0.32550077041602465
### RNN GRU Results : Min Score of all Sentences ###
RMSE:  2.5121430822999735
Cohen Kappa:  0.2906816168095623
Accuracy:  0.38751926040061635
### RNN GRU Results : Mean Score of all Sentences ###
RMSE:  2.050123074963098
Cohen Kappa:  0.2888152272008384
Cohen Kappa Quadratic:  0.972806765680875
Accuracy:  0.38212634822804314
   essay_set      RMSE     Kappa   Kappa_Q  Accuracy  Norm_RMSE
0        1.0  1.413247  0.068420  0.207540  0.289617   0.112577
1        2.0  0.814311  0.200855  0.318971  0.524064   0.144942
2        3.0  0.777683  0.071328  0.245712  0.404192   0.262931
3        4.0  0.649786  0.394100  0.732489  0.577778   0.278861
4        5.0  0.905699  0.103213  0.337147  0.397101   0.218914
5        6.0  0.834127  0.097415  0.479666  0.478836   0.221532
6        7.0  4.157894 -0.015868  0.269119  0.062914   0.166398
7        8.0  5.

#### RNN Dim=50

In [61]:
# Train the model
estimator = KerasRegressor(build_fn=GRU_50, epochs=20, batch_size=500)
estimator.fit(train_data, y_train)

# Predict for test data
prediction_rnn=estimator.predict(test_data)
rmse_val = RMSE(y_test,prediction_rnn)
# Find the overall RMSE value. This is not very relevant considering different scale for each set.
print("RMSE: ",rmse_val)

# Construct pandas data frame of scores of each sentences
rnn_df = pd.DataFrame([x_test_sentence_df['domain1_score'].astype(np.double), y_test.astype(np.double),
                      prediction_rnn.astype(np.double)]).transpose()
rnn_df.columns = ['Orig_Score','Norm_Score','Pred_Score']
# Find Multiplication factor using the function we defined before
rnn_df['Mult_Factor'] = rnn_df.apply(find_mult_factor,axis=1)
# Find the denormalized predicted score
rnn_df['Denorm_Pred_Score'] = rnn_df.apply(denormalize,axis=1)

# Extract the scores of each sentences
orig_score = rnn_df.Orig_Score.as_matrix(columns=None)
orig_score = orig_score.astype(np.int)
pred_score = rnn_df.Denorm_Pred_Score.as_matrix(columns=None)
pred_score = pred_score.astype(np.int)

# Provide the metrics at sentence level
rmse_rnn = RMSE(orig_score,pred_score)
cohen_kappa = cohen_kappa_score(orig_score,pred_score,weights='quadratic')
accuracy = accuracy_score(orig_score,pred_score)

print("**** METRICS BASED ON DENORMALIZED SCORE FOR SIMPLE RNN****")
print("**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****")
print("RMSE: ",rmse_rnn)
print("Kappa Quadratic Weighting: ",cohen_kappa)
print("Accuracy: ",accuracy)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
RMSE:  0.16643325815132692
**** METRICS BASED ON DENORMALIZED SCORE FOR SIMPLE RNN****
**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****
RMSE:  2.6847727623524444
Kappa Quadratic Weighting:  0.9772108742704733
Accuracy:  0.32399804230790147


In [62]:
# We will combine the sentences back to essay in this section

result_df = pd.DataFrame([x_test_sentence_df.essay_id.values,x_test_sentence_df.essay_set.values,
                         x_test_sentence_df.sentence.values,rnn_df.Orig_Score.values,
                          rnn_df.Norm_Score,rnn_df.Denorm_Pred_Score.values,
                          rnn_df.Pred_Score.values]).transpose()
result_df.columns = ['essay_id','essay_set','sentence','orig_score','norm_orig_score',
                     'pred_score','norm_pred_score']
rnn_test_df = find_max_min_mean_score(result_df)

#Convert the dataframe parameters into integers as they are returned as floats
rnn_test_df.essay_id = rnn_test_df.essay_id.astype(int)
rnn_test_df.essay_set = rnn_test_df.essay_set.astype(int)
rnn_test_df.Orig_Score = rnn_test_df.Orig_Score.astype(int)
rnn_test_df.Max_Score = rnn_test_df.Max_Score.astype(int)
rnn_test_df.Min_Score = rnn_test_df.Min_Score.astype(int)
rnn_test_df.Mean_Score = rnn_test_df.Mean_Score.astype(int)

# Extract the scores
orig_score = rnn_test_df.Orig_Score.values
max_pred_score = rnn_test_df.Max_Score.values
min_pred_score = rnn_test_df.Min_Score.values
mean_pred_score = rnn_test_df.Mean_Score.values

# Compare the metrics if Max score of all sentences are taken
rmse_max_rnn = RMSE(orig_score,max_pred_score)
cohen_kappa_max_rnn = cohen_kappa_score(orig_score,max_pred_score)
accuracy_max_rnn = accuracy_score(orig_score,max_pred_score)

# Compare the metrics if Min score of all sentences are taken
rmse_min_rnn = RMSE(orig_score,min_pred_score)
cohen_kappa_min_rnn = cohen_kappa_score(orig_score,min_pred_score)
accuracy_min_rnn = accuracy_score(orig_score,min_pred_score)

# Compare the metrics if Mean score of all sentences are taken
rmse_mean_rnn = RMSE(orig_score,mean_pred_score)
cohen_kappa_mean_rnn = cohen_kappa_score(orig_score,mean_pred_score)
cohen_kappa_mean_q_rnn = cohen_kappa_score(orig_score,mean_pred_score,weights='quadratic')
accuracy_mean_rnn = accuracy_score(orig_score,mean_pred_score)

print("### RNN GRU Results : MAX Score of all Sentences ###")
print("RMSE: ",rmse_max_rnn)
print("Cohen Kappa: ",cohen_kappa_max_rnn)
print("Accuracy: ",accuracy_max_rnn)

print("### RNN GRU Results : Min Score of all Sentences ###")
print("RMSE: ",rmse_min_rnn)
print("Cohen Kappa: ",cohen_kappa_min_rnn)
print("Accuracy: ",accuracy_min_rnn)

print("### RNN GRU Results : Mean Score of all Sentences ###")
print("RMSE: ",rmse_mean_rnn)
print("Cohen Kappa: ",cohen_kappa_mean_rnn)
print("Cohen Kappa Quadratic: ",cohen_kappa_mean_q_rnn)
print("Accuracy: ",accuracy_mean_rnn)

# Provide per-essay-set metrics based on mean score
essay_set_results = essay_set_metrics(rnn_test_df)
print(essay_set_results)
print("Mean Quadratic Kappa: ",np.mean(essay_set_results.Kappa_Q))

### RNN GRU Results : MAX Score of all Sentences ###
RMSE:  2.8784438742372678
Cohen Kappa:  0.22072116917554718
Accuracy:  0.31240369799691836
### RNN GRU Results : Min Score of all Sentences ###
RMSE:  2.3368983697959584
Cohen Kappa:  0.3229052481778496
Accuracy:  0.41486902927580893
### RNN GRU Results : Mean Score of all Sentences ###
RMSE:  2.155546112864239
Cohen Kappa:  0.264022917076957
Cohen Kappa Quadratic:  0.9707118844163327
Accuracy:  0.3578582434514638
   essay_set      RMSE     Kappa   Kappa_Q  Accuracy  Norm_RMSE
0        1.0  1.483976  0.030455  0.161689  0.224044   0.119711
1        2.0  0.922099  0.054183  0.161778  0.427807   0.156690
2        3.0  0.783436  0.056695  0.230449  0.395210   0.272872
3        4.0  0.636832  0.417011  0.740798  0.594444   0.284176
4        5.0  0.930949  0.062430  0.297812  0.368116   0.228753
5        6.0  0.854493  0.100703  0.450928  0.484127   0.232807
6        7.0  4.415131 -0.000082  0.222391  0.069536   0.176454
7        8.0  5.4

#### CNN FF

In [63]:
# Train the model
estimator = KerasRegressor(build_fn=CNN_FF, epochs=20, batch_size=500)
estimator.fit(train_data, y_train)

# Precit for test data
prediction_cnn_glove=estimator.predict(test_data)
rmse_val = RMSE(y_test,prediction_cnn_glove)
print("RMSE BEFORE DENORMALIZING: ",rmse_val)

# Construct data frame for denormalizing the score
cnn_df = pd.DataFrame([x_test_sentence_df['domain1_score'].astype(np.double), y_test.astype(np.double),
                      prediction_cnn_glove.astype(np.double)]).transpose()
cnn_df.columns = ['Orig_Score','Norm_Score','Pred_Score']
cnn_df['Mult_Factor'] = cnn_df.apply(find_mult_factor,axis=1)
cnn_df['Denorm_Pred_Score'] = cnn_df.apply(denormalize,axis=1)

# Extract the scores
orig_score = cnn_df.Orig_Score.as_matrix(columns=None)
orig_score = orig_score.astype(np.int)
pred_score = cnn_df.Denorm_Pred_Score.as_matrix(columns=None)
pred_score = pred_score.astype(np.int)
rmse_cnn = RMSE(orig_score,pred_score)
cohen_kappa = cohen_kappa_score(orig_score,pred_score,weights='quadratic')
accuracy = accuracy_score(orig_score,pred_score)

print("**** METRICS BASED ON DENORMALIZED SCORE FOR SIMPLE CNN****")
print("**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****")
print("RMSE: ",rmse_cnn)
print("Kappa Quadratic Weighting: ",cohen_kappa)
print("Accuracy: ",accuracy)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
RMSE BEFORE DENORMALIZING:  0.1758869827106889
**** METRICS BASED ON DENORMALIZED SCORE FOR SIMPLE CNN****
**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****
RMSE:  2.8358339918405613
Kappa Quadratic Weighting:  0.9737685459253101
Accuracy:  0.2984392843547773


In [64]:
# We will combine the sentences back to essay in this section

result_df = pd.DataFrame([x_test_sentence_df.essay_id.values,x_test_sentence_df.essay_set.values,
                         x_test_sentence_df.sentence.values,cnn_df.Orig_Score.values,
                          cnn_df.Norm_Score,cnn_df.Denorm_Pred_Score.values,
                          cnn_df.Pred_Score.values]).transpose()
result_df.columns = ['essay_id','essay_set','sentence','orig_score','norm_orig_score',
                     'pred_score','norm_pred_score']
cnn_test_df = find_max_min_mean_score(result_df)

#Convert the dataframe parameters into integers as they are returned as floats
cnn_test_df.essay_id = cnn_test_df.essay_id.astype(int)
cnn_test_df.essay_set = cnn_test_df.essay_set.astype(int)
cnn_test_df.Orig_Score = cnn_test_df.Orig_Score.astype(int)
cnn_test_df.Max_Score = cnn_test_df.Max_Score.astype(int)
cnn_test_df.Min_Score = cnn_test_df.Min_Score.astype(int)
cnn_test_df.Mean_Score = cnn_test_df.Mean_Score.astype(int)

# Extract the scores
orig_score = cnn_test_df.Orig_Score.values
max_pred_score = cnn_test_df.Max_Score.values
min_pred_score = cnn_test_df.Min_Score.values
mean_pred_score = cnn_test_df.Mean_Score.values

# Compare the metrics if Max score of all sentences are taken
rmse_max_cnn = RMSE(orig_score,max_pred_score)
cohen_kappa_max_cnn = cohen_kappa_score(orig_score,max_pred_score)
accuracy_max_cnn = accuracy_score(orig_score,max_pred_score)

# Compare the metrics if Min score of all sentences are taken
rmse_min_cnn = RMSE(orig_score,min_pred_score)
cohen_kappa_min_cnn = cohen_kappa_score(orig_score,min_pred_score)
accuracy_min_cnn = accuracy_score(orig_score,min_pred_score)

# Compare the metrics if Mean score of all sentences are taken
rmse_mean_cnn = RMSE(orig_score,mean_pred_score)
cohen_kappa_mean_cnn = cohen_kappa_score(orig_score,mean_pred_score)
cohen_kappa_mean_q_cnn = cohen_kappa_score(orig_score,mean_pred_score,weights='quadratic')
accuracy_mean_cnn = accuracy_score(orig_score,mean_pred_score)

print("### CNN FF Results : MAX Score of all Sentences ###")
print("RMSE: ",rmse_max_cnn)
print("Cohen Kappa: ",cohen_kappa_max_cnn)
print("Accuracy: ",accuracy_max_cnn)

print("### CNN FF Results : Min Score of all Sentences ###")
print("RMSE: ",rmse_min_cnn)
print("Cohen Kappa: ",cohen_kappa_min_cnn)
print("Accuracy: ",accuracy_min_cnn)

print("### CNN FF Results : Mean Score of all Sentences ###")
print("RMSE: ",rmse_mean_cnn)
print("Cohen Kappa: ",cohen_kappa_mean_cnn)
print("Cohen Kappa Quadratic: ",cohen_kappa_mean_q_cnn)
print("Accuracy: ",accuracy_mean_cnn)

# Provide per-essay-set metrics based on mean score
essay_set_results = essay_set_metrics(cnn_test_df)
print(essay_set_results)
print("Mean Quadratic Kappa: ",np.mean(essay_set_results.Kappa_Q))

### CNN FF Results : MAX Score of all Sentences ###
RMSE:  3.058683866531078
Cohen Kappa:  0.21222482149594712
Accuracy:  0.3050847457627119
### CNN FF Results : Min Score of all Sentences ###
RMSE:  2.7251468345110523
Cohen Kappa:  0.22569578709087035
Accuracy:  0.3320493066255778
### CNN FF Results : Mean Score of all Sentences ###
RMSE:  2.2783059421644176
Cohen Kappa:  0.24075520769163195
Cohen Kappa Quadratic:  0.9664854808356727
Accuracy:  0.33705701078582434
   essay_set      RMSE     Kappa   Kappa_Q  Accuracy  Norm_RMSE
0        1.0  1.545308  0.005206  0.041260  0.191257   0.120954
1        2.0  1.015916  0.004141  0.001950  0.393048   0.162239
2        3.0  0.777683  0.055626  0.228426  0.395210   0.290574
3        4.0  0.674949  0.344481  0.708150  0.544444   0.359828
4        5.0  1.025755  0.026208  0.147858  0.339130   0.249424
5        6.0  0.863731  0.098845  0.436751  0.484127   0.245049
6        7.0  4.736431  0.011247  0.081560  0.079470   0.188964
7        8.0  5.68

#### CNN_LSTM

In [77]:
# Train the model
estimator = KerasRegressor(build_fn=CNN_lstm, epochs=20, batch_size=500)
estimator.fit(train_data, y_train)

# Precit for test data
prediction_cnn_lstm=estimator.predict(test_data)
rmse_val = RMSE(y_test,prediction_cnn_lstm)
print("RMSE BEFORE DENORMALIZING: ",rmse_val)

# Construct data frame for denormalizing the score
cnn_df = pd.DataFrame([x_test_sentence_df['domain1_score'].astype(np.double), y_test.astype(np.double),
                      prediction_cnn_lstm.astype(np.double)]).transpose()
cnn_df.columns = ['Orig_Score','Norm_Score','Pred_Score']
cnn_df['Mult_Factor'] = cnn_df.apply(find_mult_factor,axis=1)
cnn_df['Denorm_Pred_Score'] = cnn_df.apply(denormalize,axis=1)

# Extract the scores
orig_score = cnn_df.Orig_Score.as_matrix(columns=None)
orig_score = orig_score.astype(np.int)
pred_score = cnn_df.Denorm_Pred_Score.as_matrix(columns=None)
pred_score = pred_score.astype(np.int)
rmse_cnn = RMSE(orig_score,pred_score)
cohen_kappa = cohen_kappa_score(orig_score,pred_score,weights='quadratic')
accuracy = accuracy_score(orig_score,pred_score)

print("**** METRICS BASED ON DENORMALIZED SCORE FOR CNN+LSTM****")
print("**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****")
print("RMSE: ",rmse_cnn)
print("Kappa Quadratic Weighting: ",cohen_kappa)
print("Accuracy: ",accuracy)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
RMSE BEFORE DENORMALIZING:  0.1806041868547733
**** METRICS BASED ON DENORMALIZED SCORE FOR CNN+LSTM****
**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****
RMSE:  3.110587926129043
Kappa Quadratic Weighting:  0.9678594490860415
Accuracy:  0.30618848224482026


In [78]:
# We will combine the sentences back to essay in this section

result_df = pd.DataFrame([x_test_sentence_df.essay_id.values,x_test_sentence_df.essay_set.values,
                         x_test_sentence_df.sentence.values,cnn_df.Orig_Score.values,
                          cnn_df.Norm_Score,cnn_df.Denorm_Pred_Score.values,
                          cnn_df.Pred_Score.values]).transpose()
result_df.columns = ['essay_id','essay_set','sentence','orig_score','norm_orig_score',
                     'pred_score','norm_pred_score']
cnn_test_df = find_max_min_mean_score(result_df)

#Convert the dataframe parameters into integers as they are returned as floats
cnn_test_df.essay_id = cnn_test_df.essay_id.astype(int)
cnn_test_df.essay_set = cnn_test_df.essay_set.astype(int)
cnn_test_df.Orig_Score = cnn_test_df.Orig_Score.astype(int)
cnn_test_df.Max_Score = cnn_test_df.Max_Score.astype(int)
cnn_test_df.Min_Score = cnn_test_df.Min_Score.astype(int)
cnn_test_df.Mean_Score = cnn_test_df.Mean_Score.astype(int)

# Extract the scores
orig_score = cnn_test_df.Orig_Score.values
max_pred_score = cnn_test_df.Max_Score.values
min_pred_score = cnn_test_df.Min_Score.values
mean_pred_score = cnn_test_df.Mean_Score.values

# Compare the metrics if Max score of all sentences are taken
rmse_max_cnn = RMSE(orig_score,max_pred_score)
cohen_kappa_max_cnn = cohen_kappa_score(orig_score,max_pred_score)
accuracy_max_cnn = accuracy_score(orig_score,max_pred_score)

# Compare the metrics if Min score of all sentences are taken
rmse_min_cnn = RMSE(orig_score,min_pred_score)
cohen_kappa_min_cnn = cohen_kappa_score(orig_score,min_pred_score)
accuracy_min_cnn = accuracy_score(orig_score,min_pred_score)

# Compare the metrics if Mean score of all sentences are taken
rmse_mean_cnn = RMSE(orig_score,mean_pred_score)
cohen_kappa_mean_cnn = cohen_kappa_score(orig_score,mean_pred_score)
cohen_kappa_mean_q_cnn = cohen_kappa_score(orig_score,mean_pred_score,weights='quadratic')
accuracy_mean_cnn = accuracy_score(orig_score,mean_pred_score)

print("### CNN LSTM Results : MAX Score of all Sentences ###")
print("RMSE: ",rmse_max_cnn)
print("Cohen Kappa: ",cohen_kappa_max_cnn)
print("Accuracy: ",accuracy_max_cnn)

print("### CNN LSTM Results : Min Score of all Sentences ###")
print("RMSE: ",rmse_min_cnn)
print("Cohen Kappa: ",cohen_kappa_min_cnn)
print("Accuracy: ",accuracy_min_cnn)

print("### CNN LSTM Results : Mean Score of all Sentences ###")
print("RMSE: ",rmse_mean_cnn)
print("Cohen Kappa: ",cohen_kappa_mean_cnn)
print("Cohen Kappa Quadratic: ",cohen_kappa_mean_q_cnn)
print("Accuracy: ",accuracy_mean_cnn)

# Provide per-essay-set metrics based on mean score
essay_set_results = essay_set_metrics(cnn_test_df)
print(essay_set_results)
print("Mean Quadratic Kappa: ",np.mean(essay_set_results.Kappa_Q))

### CNN LSTM Results : MAX Score of all Sentences ###
RMSE:  3.2244013789688863
Cohen Kappa:  0.2216361717035582
Accuracy:  0.31278890600924497
### CNN LSTM Results : Min Score of all Sentences ###
RMSE:  3.7567094574390114
Cohen Kappa:  0.18549105763328888
Accuracy:  0.2862095531587057
### CNN LSTM Results : Mean Score of all Sentences ###
RMSE:  2.0855176017430965
Cohen Kappa:  0.3079471136540092
Cohen Kappa Quadratic:  0.971148842517859
Accuracy:  0.40177195685670264
   essay_set      RMSE     Kappa   Kappa_Q  Accuracy  Norm_RMSE
0        1.0  1.362054  0.039019  0.247982  0.333333   0.112168
1        2.0  0.745754  0.257717  0.385655  0.569519   0.143538
2        3.0  0.785345  0.065146  0.229398  0.401198   0.262401
3        4.0  0.625833  0.448674  0.749393  0.616667   0.285012
4        5.0  0.947919  0.093976  0.270812  0.394203   0.223022
5        6.0  0.788475  0.157477  0.548532  0.497354   0.219715
6        7.0  4.272583 -0.012110  0.207912  0.066225   0.170708
7        8.0 

#### Stacked LSTM

In [79]:
# Train the model
estimator_lstm = KerasRegressor(build_fn=stack_lstm, epochs=20, batch_size=500)
estimator_lstm.fit(train_data, y_train)

# Predict for test data
prediction_lstm=estimator_lstm.predict(test_data)
rmse_val = RMSE(y_test,prediction_lstm)
# Find the overall RMSE value. This is not very relevant considering different scale for each set.
print("RMSE: ",rmse_val)

# Construct pandas data frame of scores of each sentences
lstm_df = pd.DataFrame([x_test_sentence_df['domain1_score'].astype(np.double), y_test.astype(np.double),
                      prediction_lstm.astype(np.double)]).transpose()
lstm_df.columns = ['Orig_Score','Norm_Score','Pred_Score']
# Find Multiplication factor using the function we defined before
lstm_df['Mult_Factor'] = lstm_df.apply(find_mult_factor,axis=1)
# Find the denormalized predicted score
lstm_df['Denorm_Pred_Score'] = lstm_df.apply(denormalize,axis=1)

# Extract the scores of each sentences
orig_score = lstm_df.Orig_Score.as_matrix(columns=None)
orig_score = orig_score.astype(np.int)
pred_score = lstm_df.Denorm_Pred_Score.as_matrix(columns=None)
pred_score = pred_score.astype(np.int)

# Provide the metrics at sentence level
rmse_lstm = RMSE(orig_score,pred_score)
cohen_kappa = cohen_kappa_score(orig_score,pred_score,weights='quadratic')
accuracy = accuracy_score(orig_score,pred_score)

print("**** METRICS BASED ON DENORMALIZED SCORE FOR SIMPLE CNN****")
print("**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****")
print("RMSE: ",rmse_lstm)
print("Kappa Quadratic Weighting: ",cohen_kappa)
print("Accuracy: ",accuracy)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
RMSE:  0.1667374396558812
**** METRICS BASED ON DENORMALIZED SCORE FOR SIMPLE CNN****
**** NOTE: THIS IS STILL AT SENTENCE LEVEL. NEXT SECTION WILL MERGE SENTENCE SCORES****
RMSE:  2.670780538038357
Kappa Quadratic Weighting:  0.977294121470357
Accuracy:  0.32943607591494917


In [80]:
# We will combine the sentences back to essay in this section

result_df = pd.DataFrame([x_test_sentence_df.essay_id.values,x_test_sentence_df.essay_set.values,
                         x_test_sentence_df.sentence.values,lstm_df.Orig_Score.values,
                          lstm_df.Norm_Score,lstm_df.Denorm_Pred_Score.values,
                          lstm_df.Pred_Score.values]).transpose()
result_df.columns = ['essay_id','essay_set','sentence','orig_score','norm_orig_score',
                     'pred_score','norm_pred_score']
lstm_test_df = find_max_min_mean_score(result_df)

#Convert the dataframe parameters into integers as they are returned as floats
lstm_test_df.essay_id = lstm_test_df.essay_id.astype(int)
lstm_test_df.essay_set = lstm_test_df.essay_set.astype(int)
lstm_test_df.Orig_Score = lstm_test_df.Orig_Score.astype(int)
lstm_test_df.Max_Score = lstm_test_df.Max_Score.astype(int)
lstm_test_df.Min_Score = lstm_test_df.Min_Score.astype(int)
lstm_test_df.Mean_Score = lstm_test_df.Mean_Score.astype(int)

# Extract the scores
orig_score = lstm_test_df.Orig_Score.values
max_pred_score = lstm_test_df.Max_Score.values
min_pred_score = lstm_test_df.Min_Score.values
mean_pred_score = lstm_test_df.Mean_Score.values

# Compare the metrics if Max score of all sentences are taken
rmse_max_lstm = RMSE(orig_score,max_pred_score)
cohen_kappa_max_lstm = cohen_kappa_score(orig_score,max_pred_score)
accuracy_max_lstm = accuracy_score(orig_score,max_pred_score)

# Compare the metrics if Min score of all sentences are taken
rmse_min_lstm = RMSE(orig_score,min_pred_score)
cohen_kappa_min_lstm = cohen_kappa_score(orig_score,min_pred_score)
accuracy_min_lstm = accuracy_score(orig_score,min_pred_score)

# Compare the metrics if Mean score of all sentences are taken
rmse_mean_lstm = RMSE(orig_score,mean_pred_score)
cohen_kappa_mean_lstm = cohen_kappa_score(orig_score,mean_pred_score)
cohen_kappa_mean_q_lstm = cohen_kappa_score(orig_score,mean_pred_score,weights='quadratic')
accuracy_mean_lstm = accuracy_score(orig_score,mean_pred_score)

print("### Stacked LSTM Results : MAX Score of all Sentences ###")
print("RMSE: ",rmse_max_lstm)
print("Cohen Kappa: ",cohen_kappa_max_lstm)
print("Accuracy: ",accuracy_max_lstm)

print("### Stacked LSTM Results : Min Score of all Sentences ###")
print("RMSE: ",rmse_min_lstm)
print("Cohen Kappa: ",cohen_kappa_min_lstm)
print("Accuracy: ",accuracy_min_lstm)

print("### Stacked LSTM Results : Mean Score of all Sentences ###")
print("RMSE: ",rmse_mean_lstm)
print("Cohen Kappa: ",cohen_kappa_mean_lstm)
print("Cohen Kappa Quadratic: ",cohen_kappa_mean_q_lstm)
print("Accuracy: ",accuracy_mean_lstm)

# Provide per-essay-set metrics based on mean score
essay_set_results = essay_set_metrics(lstm_test_df)
print(essay_set_results)
print("Mean Quadratic Kappa: ",np.mean(essay_set_results.Kappa_Q))

### Stacked LSTM Results : MAX Score of all Sentences ###
RMSE:  2.827950413977182
Cohen Kappa:  0.20716939224632758
Accuracy:  0.3020030816640986
### Stacked LSTM Results : Min Score of all Sentences ###
RMSE:  2.4956047650414077
Cohen Kappa:  0.30119786959949246
Accuracy:  0.39599383667180277
### Stacked LSTM Results : Mean Score of all Sentences ###
RMSE:  2.0995083436495148
Cohen Kappa:  0.2767664946759322
Cohen Kappa Quadratic:  0.9720147097476838
Accuracy:  0.3701848998459168
   essay_set      RMSE     Kappa   Kappa_Q  Accuracy  Norm_RMSE
0        1.0  1.442902  0.037363  0.186567  0.243169   0.115721
1        2.0  0.862157  0.140598  0.252677  0.483957   0.150902
2        3.0  0.769944  0.094725  0.293567  0.416168   0.269017
3        4.0  0.664580  0.371431  0.730797  0.558333   0.308396
4        5.0  0.915249  0.128036  0.319211  0.414493   0.219347
5        6.0  0.843588  0.099913  0.474886  0.478836   0.236111
6        7.0  4.285352 -0.010010  0.252494  0.062914   0.169518
7

In [67]:
ff_1 = FF_1_NN()
ff_2 = FF_2_NN()
ff_3 = FF_3_NN()
ff_1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_17 (Embedding)     (None, 175, 300)          15000000  
_________________________________________________________________
flatten_13 (Flatten)         (None, 52500)             0         
_________________________________________________________________
dense_33 (Dense)             (None, 50)                2625050   
_________________________________________________________________
dropout_17 (Dropout)         (None, 50)                0         
_________________________________________________________________
dense_34 (Dense)             (None, 1)                 51        
Total params: 17,625,101
Trainable params: 2,625,101
Non-trainable params: 15,000,000
_________________________________________________________________


In [69]:
ff_3.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_19 (Embedding)     (None, 175, 300)          15000000  
_________________________________________________________________
flatten_15 (Flatten)         (None, 52500)             0         
_________________________________________________________________
dense_38 (Dense)             (None, 50)                2625050   
_________________________________________________________________
dense_39 (Dense)             (None, 50)                2550      
_________________________________________________________________
dense_40 (Dense)             (None, 50)                2550      
_________________________________________________________________
dropout_19 (Dropout)         (None, 50)                0         
_________________________________________________________________
dense_41 (Dense)             (None, 1)                 51        
Total para

In [70]:
gru_32 = GRU_32()
gru_50 = GRU_50()


In [71]:
gru_32.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_20 (Embedding)     (None, 175, 300)          15000000  
_________________________________________________________________
gru_4 (GRU)                  (None, 32)                31968     
_________________________________________________________________
dropout_20 (Dropout)         (None, 32)                0         
_________________________________________________________________
out_layer (Dense)            (None, 1)                 33        
Total params: 15,032,001
Trainable params: 32,001
Non-trainable params: 15,000,000
_________________________________________________________________


In [72]:
gru_50.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_21 (Embedding)     (None, 175, 300)          15000000  
_________________________________________________________________
gru_5 (GRU)                  (None, 32)                31968     
_________________________________________________________________
dropout_21 (Dropout)         (None, 32)                0         
_________________________________________________________________
out_layer (Dense)            (None, 1)                 33        
Total params: 15,032,001
Trainable params: 32,001
Non-trainable params: 15,000,000
_________________________________________________________________


In [73]:
cnn_ff = CNN_FF()
cnn_lstm = CNN_lstm()
s_lstm = stack_lstm()

In [74]:
cnn_ff.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_22 (Embedding)     (None, 175, 300)          15000000  
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 171, 64)           96064     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 42, 64)            0         
_________________________________________________________________
flatten_16 (Flatten)         (None, 2688)              0         
_________________________________________________________________
dense_42 (Dense)             (None, 100)               268900    
_________________________________________________________________
dropout_22 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_43 (Dense)             (None, 1)                 101       
Total para

In [75]:
cnn_lstm.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_23 (Embedding)     (None, 175, 300)          15000000  
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 171, 64)           96064     
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 42, 64)            0         
_________________________________________________________________
lstm (LSTM)                  (None, 100)               66000     
_________________________________________________________________
dropout_23 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_44 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_45 (Dense)             (None, 1)                 101       
Total para

In [76]:
s_lstm.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_24 (Embedding)     (None, 175, 300)          15000000  
_________________________________________________________________
lstm_1 (LSTM)                (None, 175, 32)           42624     
_________________________________________________________________
lstm_2 (LSTM)                (None, 175, 32)           8320      
_________________________________________________________________
dropout_24 (Dropout)         (None, 175, 32)           0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dense_46 (Dense)             (None, 1)                 33        
Total params: 15,059,297
Trainable params: 59,297
Non-trainable params: 15,000,000
___________________________________________________________