In [1]:
# Imports
import json, os, re, shutil, sys, time
import seaborn as sns
from importlib import reload
import collections, itertools
import unittest
from IPython.display import display, HTML
from collections import defaultdict

# NLTK for NLP utils and corpora
import nltk
from nltk.corpus import treebank
from nltk.text import Text
from nltk.tokenize import word_tokenize
from nltk.tokenize.toktok import ToktokTokenizer
from nltk.corpus import wordnet
from nltk.corpus import stopwords 
from nltk.stem import WordNetLemmatizer

# Import spacy
import spacy

import sklearn
from sklearn.model_selection import train_test_split

# NumPy, Pandas and TensorFlow
import numpy as np
import pandas as pd
import tensorflow as tf

import unicodedata

from numpy.random import seed
from pandas import read_csv, DataFrame
from sklearn.preprocessing import minmax_scale

# Keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Flatten, LSTM, Dropout, Activation
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.optimizers import SGD
from keras.layers.embeddings import Embedding
import keras
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding, Activation, Dropout, Conv1D, MaxPooling1D, Bidirectional, Flatten, TimeDistributed

from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence

from tensorflow.keras.models import Sequential

from sklearn.model_selection import train_test_split

## Plotly
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import average_precision_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import cohen_kappa_score
import keras.backend as K
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from datetime import datetime


Using TensorFlow backend.


## AEG Long Essay Sentence Level Classification

In [2]:
# Read the data
aeg_long = pd.read_csv("../data-DNC/AEG/training_set_rel3.tsv",sep='\t',encoding = "latin1")
aeg_long.head(5)

Unnamed: 0,essay_id,essay_set,essay,rater1_domain1,rater2_domain1,rater3_domain1,domain1_score,rater1_domain2,rater2_domain2,domain2_score,...,rater2_trait3,rater2_trait4,rater2_trait5,rater2_trait6,rater3_trait1,rater3_trait2,rater3_trait3,rater3_trait4,rater3_trait5,rater3_trait6
0,1,1,"Dear local newspaper, I think effects computer...",4,4,,8,,,,...,,,,,,,,,,
1,2,1,"Dear @CAPS1 @CAPS2, I believe that using compu...",5,4,,9,,,,...,,,,,,,,,,
2,3,1,"Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl...",4,3,,7,,,,...,,,,,,,,,,
3,4,1,"Dear Local Newspaper, @CAPS1 I have found that...",5,5,,10,,,,...,,,,,,,,,,
4,5,1,"Dear @LOCATION1, I know having computers has a...",4,4,,8,,,,...,,,,,,,,,,


In [3]:
# Split the data into train and test. We need to do this first to ensure that when we split to 
# sentence level, we have sentences of a given essay in either training or test but not on both.

tr_essay,ts_essay,tr_domain_score,ts_domain_score,tr_essay_id,ts_essay_id,tr_essay_set,ts_essay_set=train_test_split(
    np.asarray(aeg_long.essay),np.asarray(aeg_long.domain1_score),
    np.asarray(aeg_long.essay_id),np.asarray(aeg_long.essay_set),test_size=0.2)

In [4]:
# Check the shapes of training and test datasets
print(tr_essay.shape)
print(ts_essay.shape)

(10380,)
(2596,)


In [5]:
# Create train and test data frames with relevant fields

x_train_df = pd.DataFrame([tr_essay_id,tr_essay_set,tr_essay,tr_domain_score.astype(np.double)]).transpose()
x_train_df.columns = ['essay_id','essay_set','essay','domain1_score']
x_test_df = pd.DataFrame([ts_essay_id,ts_essay_set,ts_essay,ts_domain_score.astype(np.double)]).transpose()
x_test_df.columns = ['essay_id','essay_set','essay','domain1_score']
#print(x_train_df.head(5))
#print(x_test_df.head(5))

#### Split essay into sentences

In [6]:
# We will use spacy to split the essay into sentences. 
# Load spacy large english module
nlp = spacy.load('en_core_web_lg')

In [7]:
# Define a function to split essay into sentences
def create_sentences_df(df):
    """ Function to split essay data into individual sentences. Returns a dataframe"""
    start=datetime.now()
    aeg_long_sentence = pd.DataFrame(columns=['essay_id','essay_set','sentence','domain1_score'])
    for i in range(len(df)):
        if i%1000 == 0:
            print("At iteration :",i)
            print("Duration: ",datetime.now()-start)
        sentence = nlp(df.essay[i])
        for s in sentence.sents:
            aeg_long_sentence = aeg_long_sentence.append({'essay_id' : df.essay_id[i],
                                                          'essay_set' : df.essay_set[i],'sentence' : s.text, 
                                                          'domain1_score' : df.domain1_score[i]},
                                                         ignore_index=True)
    return aeg_long_sentence

In [8]:
# Split train data into sentences
x_train_sentence_df = create_sentences_df(x_train_df)

At iteration : 0
Duration:  0:00:00.004551
At iteration : 1000
Duration:  0:01:45.409653
At iteration : 2000
Duration:  0:04:20.453870
At iteration : 3000
Duration:  0:07:43.922045
At iteration : 4000
Duration:  0:11:32.393720
At iteration : 5000
Duration:  0:16:11.581768
At iteration : 6000
Duration:  0:21:47.227340
At iteration : 7000
Duration:  0:28:03.155920
At iteration : 8000
Duration:  0:35:43.530792
At iteration : 9000
Duration:  0:44:05.430469
At iteration : 10000
Duration:  0:53:15.997550


In [9]:
# Split test data into sentences
x_test_sentence_df = create_sentences_df(x_test_df)

At iteration : 0
Duration:  0:00:00.003777
At iteration : 1000
Duration:  0:01:42.543824
At iteration : 2000
Duration:  0:04:14.396282


#### Normalize the scores

In [10]:
# Each essay set has a different scoring range. We need to normalize the scores to a standard scale for training.
def normalize_score(essay):
    """ Normalizes the domain score based on percentage"""
    score = 0
    score = float(essay[3])
    essay_set = essay[1]
    if essay_set == 1:
        div = 12
    elif essay_set == 2:
        div = 5
    elif essay_set == 3:
        div = 3
    elif essay_set == 4:
        div = 3
    elif essay_set == 5:
        div = 4
    elif essay_set == 6:
        div = 4
    elif essay_set == 7:
        div = 25
    elif essay_set == 8:
        div = 50
    return score/div

In [11]:
x_train_sentence_df['Norm_Score'] = x_train_sentence_df.apply(normalize_score,axis=1)
x_test_sentence_df['Norm_Score'] = x_test_sentence_df.apply(normalize_score,axis=1)

In [12]:
x_train_sentence_df.head(5)

Unnamed: 0,essay_id,essay_set,sentence,domain1_score,Norm_Score
0,159,1,"Imagine standing outside, the warm sun dancing...",10.0,0.833333
1,159,1,"But no, that won't happen.",10.0,0.833333
2,159,1,Because children and their parents are spendin...,10.0,0.833333
3,159,1,"People aren't getting enough excercise, it can...",10.0,0.833333
4,159,1,I think people need to realize that computers ...,10.0,0.833333


In [14]:
# Store this dataset in pickle format so that we don't have to redo the above steps.
import pickle
x_train_sentence_df.to_pickle("./x_train_sentence_df.pkl")
x_test_sentence_df.to_pickle("./x_test_sentence_df.pkl")

In [15]:
# Create train and test text and labels

x_train = x_train_sentence_df['sentence'].values
y_train = x_train_sentence_df['Norm_Score'].values
x_test = x_test_sentence_df['sentence'].values
y_test = x_test_sentence_df['Norm_Score'].values
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(147026,)
(147026,)
(36875,)
(36875,)


In [16]:
vocabulary_size = 50000
tokenizer = Tokenizer(num_words= vocabulary_size)
tokenizer.fit_on_texts(x_train)
train_seq = tokenizer.texts_to_sequences(x_train)
train_data = pad_sequences(train_seq)

In [17]:
max_len_class = train_data.shape[1]
max_words_class = vocabulary_size

In [18]:
test_seq = tokenizer.texts_to_sequences(x_test)
test_data = pad_sequences(test_seq, maxlen=max_len_class)
test_data.shape

(36875, 175)

In [20]:
embeddings_index = dict()
f = open('/home/pkurapati/W266-NLP-Project/data-DNC/glove.42B.300d.txt')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Loaded %s word vectors.' % len(embeddings_index))

Loaded 1917494 word vectors.


In [21]:
embedding_matrix = np.zeros((vocabulary_size, 300))
for word, index in tokenizer.word_index.items():
    if index > vocabulary_size - 1:
        break
    else:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[index] = embedding_vector

In [22]:
def RMSE(actual, predict):
    diff = actual - predict
    diff = sum(diff**2) / len(actual)
    return np.sqrt(diff)

In [23]:
#Cohen Kappa score as defined by the kaggle challenge/wikipedia
def CohenKappa(actual, predict):
    CohenDF = pd.DataFrame([actual.astype(np.double).round(), np.around(predict.astype(np.double))]).transpose()
    count = len(CohenDF)
    CohenDF.columns = ['actual','predict']
    correct = len(CohenDF[CohenDF.actual==CohenDF.predict])
    acc = correct / count
    pe = 0
    for value in CohenDF.actual.unique():
        pe += len(CohenDF[CohenDF.actual == value]) * len(CohenDF[CohenDF.predict == value])
    pe = pe / np.square(count)
    return(1 - (1-acc)/(1-pe))

In [67]:
#model definitions

def CNN_lstm():
    """ CNN with single layer LSTM"""
    model_conv = Sequential()
    model_conv.add(tf.keras.layers.Embedding(max_words_class, 300, input_length=max_len_class, weights=[embedding_matrix], trainable=False))
    model_conv.add(tf.keras.layers.Dropout(0.1))
    model_conv.add(tf.keras.layers.Conv1D(64, 5, activation='relu'))
    model_conv.add(tf.keras.layers.MaxPooling1D(pool_size=4))
    model_conv.add(tf.keras.layers.LSTM(100))
    model_conv.add(tf.keras.layers.Dense(100))
    model_conv.add(tf.keras.layers.Dense(1, kernel_initializer='normal'))
    #sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
    model_conv.compile(loss = 'mse', optimizer = tf.train.AdamOptimizer(), metrics = ['accuracy'])
    #model_conv.compile(optimizer=tf.train.AdamOptimizer(),loss='mse',metrics=['accuracy'])
    return model_conv

def stack_lstm():
    """ Stacked LSTM"""
    model_conv = Sequential()
    model_conv.add(tf.keras.layers.Embedding(max_words_class, 300, input_length=max_len_class, weights=[embedding_matrix], trainable=False))
    #model_conv.add(tf.keras.layers.Dropout(0.1))
    #model_conv.add(tf.keras.layers.Conv1D(64, 5, activation='relu'))
    #model_conv.add(tf.keras.layers.MaxPooling1D(pool_size=4))
    #model_conv.add(tf.keras.layers.Dropout(0.2))
    model_conv.add(tf.keras.layers.LSTM(32,return_sequences=True))
    model_conv.add(tf.keras.layers.LSTM(32, return_sequences=True))
    model_conv.add(tf.keras.layers.Dropout(0.2))
    model_conv.add(tf.keras.layers.LSTM(32))
    model_conv.add(tf.keras.layers.Dense(1, kernel_initializer='normal'))
    #sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
    model_conv.compile(loss = 'mse', optimizer = tf.train.AdamOptimizer(), metrics = ['accuracy'])
    #model_conv.compile(optimizer=tf.train.AdamOptimizer(),loss='mse',metrics=['accuracy'])
    return model_conv

def stateful_stacked_lstm():
    # In stateful, total samples needs to be divisible by batch size
    # we have 147026 samples, so selecting 6683 (6683*22=147026)
    batch_size=6683
    model_conv = Sequential()
    # In stateful, we have to pass batch_input_shape to the first layer
    model_conv.add(tf.keras.layers.Embedding(max_words_class, 300, input_length=max_len_class, weights=[embedding_matrix], 
                                             trainable=False,batch_input_shape=(batch_size,max_len_class)))
    model_conv.add(tf.keras.layers.LSTM(32,stateful=True,return_sequences=True))
    model_conv.add(tf.keras.layers.Dropout(0.1))
    model_conv.add(tf.keras.layers.LSTM(32))
    model_conv.add(tf.keras.layers.Dropout(0.1))
    model_conv.add(tf.keras.layers.Dense(100))
    model_conv.add(tf.keras.layers.Dense(1, kernel_initializer='normal'))
    #sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
    model_conv.compile(loss = 'mse', optimizer = tf.train.AdamOptimizer(), metrics = ['accuracy'])
    #model_conv.compile(optimizer=tf.train.AdamOptimizer(),loss='mse',metrics=['accuracy'])
    return model_conv

#bidirectional RNN is screwed up atm
def Bidirectional_RNN():
    inputs = Input(name='inputs',shape=[max_len_class])
    layer = Embedding(max_words,200,input_length=max_len_class)(inputs)
    layer = Bidirectional(LSTM(10))(layer)
    layer = Dense(32,name='FC1')(layer)
    layer = Activation('relu')(layer)
    layer = Dropout(0.5)(layer)
    layer = Dense(1,name='out_layer')(layer)
    model = Model(inputs=inputs,outputs=layer)
    return model

def feedforward_NN():
    model_ff = Sequential()
    model_ff.add(tf.keras.layers.Embedding(max_words_class,200,input_length=max_len_class))
    model_ff.add(tf.keras.layers.Flatten())
    model_ff.add(tf.keras.layers.Dense(50,activation='tanh'))
    model_ff.add(tf.keras.layers.Dense(1,activation='sigmoid'))
    model_ff.compile(optimizer=tf.train.AdamOptimizer(),loss='mse',metrics=['accuracy'])
    return model_ff

def RNN():
    model_rnn = Sequential()
    model_rnn.add(tf.keras.layers.Embedding(max_words_class,200,input_length=max_len_class))
    model_rnn.add(tf.keras.layers.LSTM(10))
    model_rnn.add(tf.keras.layers.Dense(1,name='out_layer'))
    model_rnn.compile(optimizer=tf.train.AdamOptimizer(),loss='mse',metrics=['accuracy'])
    return model_rnn

In [26]:
estimator = KerasRegressor(build_fn=CNN_lstm, epochs=20, batch_size=500)
#kfold = KFold(n_splits=5, random_state=43)
#results = np.sqrt(-1*cross_val_score(estimator, train_data, train_y_sentence,scoring= "neg_mean_squared_error", cv=kfold))
#print("Training RMSE mean and std from CV: {} {}".format(results.mean(),results.std()))

In [27]:
estimator.fit(train_data, y_train)
prediction_cnn_glove=estimator.predict(test_data)
rmse_val = RMSE(y_test,prediction_cnn_glove)
print("RMSE: ",rmse_val)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
RMSE:  0.17643804092942159


In [29]:
# Define function to get the multiplication factor. We can get it back from the essay set, but
# it is better to do it from the score, because there are scores with value 0, and its MF should be 0

def find_mult_factor(x):
    """ Function to find the multiplication factor for denormalizing"""
    if x[1] == 0:
        return 0
    else:
        return np.around(x[0]/x[1])

In [30]:
cnn_df = pd.DataFrame([x_test_sentence_df['domain1_score'].astype(np.double), y_test.astype(np.double),
                      prediction_cnn_glove.astype(np.double)]).transpose()
cnn_df.columns = ['Orig_Score','Norm_Score','Pred_Score']
cnn_df.head(10)

Unnamed: 0,Orig_Score,Norm_Score,Pred_Score
0,9.0,0.75,0.709597
1,9.0,0.75,0.732755
2,9.0,0.75,0.73898
3,9.0,0.75,0.665324
4,9.0,0.75,0.712178
5,9.0,0.75,0.695024
6,9.0,0.75,0.660436
7,9.0,0.75,0.75196
8,9.0,0.75,0.727825
9,9.0,0.75,0.757067


In [31]:
cnn_df['Mult_Factor'] = cnn_df.apply(find_mult_factor,axis=1)
cnn_df.head(10)

Unnamed: 0,Orig_Score,Norm_Score,Pred_Score,Mult_Factor
0,9.0,0.75,0.709597,12.0
1,9.0,0.75,0.732755,12.0
2,9.0,0.75,0.73898,12.0
3,9.0,0.75,0.665324,12.0
4,9.0,0.75,0.712178,12.0
5,9.0,0.75,0.695024,12.0
6,9.0,0.75,0.660436,12.0
7,9.0,0.75,0.75196,12.0
8,9.0,0.75,0.727825,12.0
9,9.0,0.75,0.757067,12.0


In [32]:
def denormalize(x):
    return np.around(x[2] * x[3])
cnn_df['Denorm_Pred_Score'] = cnn_df.apply(denormalize,axis=1)
cnn_df.head(10)

Unnamed: 0,Orig_Score,Norm_Score,Pred_Score,Mult_Factor,Denorm_Pred_Score
0,9.0,0.75,0.709597,12.0,9.0
1,9.0,0.75,0.732755,12.0,9.0
2,9.0,0.75,0.73898,12.0,9.0
3,9.0,0.75,0.665324,12.0,8.0
4,9.0,0.75,0.712178,12.0,9.0
5,9.0,0.75,0.695024,12.0,8.0
6,9.0,0.75,0.660436,12.0,8.0
7,9.0,0.75,0.75196,12.0,9.0
8,9.0,0.75,0.727825,12.0,9.0
9,9.0,0.75,0.757067,12.0,9.0


In [33]:
orig_score = cnn_df.Orig_Score.as_matrix(columns=None)
orig_score = orig_score.astype(np.int)
pred_score = cnn_df.Denorm_Pred_Score.as_matrix(columns=None)
pred_score = pred_score.astype(np.int)
rmse_cnn = RMSE(orig_score,pred_score)
print("RMSE: ",rmse_cnn)

RMSE:  3.215597580016842



Method .as_matrix will be removed in a future version. Use .values instead.


Method .as_matrix will be removed in a future version. Use .values instead.



In [34]:
cohen_kappa = cohen_kappa_score(orig_score,pred_score)
print("Cohen Kappa: ",cohen_kappa)

Cohen Kappa:  0.24919366467431903


In [35]:
accuracy = accuracy_score(orig_score,pred_score)
print("Accuracy: ",accuracy)

Accuracy:  0.3097220338983051


In [46]:
estimator_lstm = KerasRegressor(build_fn=stack_lstm, epochs=100, batch_size=5000)
estimator_lstm.fit(train_data, y_train)
prediction_lstm=estimator_lstm.predict(test_data)
rmse_val_lstm = RMSE(y_test,prediction_lstm)
print("RMSE: ",rmse_val_lstm)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
RMSE:  0.16791693140939992


In [47]:
s_lstm_df = pd.DataFrame([x_test_sentence_df['domain1_score'].astype(np.double), y_test.astype(np.double),
                      prediction_lstm.astype(np.double)]).transpose()
s_lstm_df.columns = ['Orig_Score','Norm_Score','Pred_Score']

In [48]:
s_lstm_df['Mult_Factor'] = s_lstm_df.apply(find_mult_factor,axis=1)
s_lstm_df.head(10)

Unnamed: 0,Orig_Score,Norm_Score,Pred_Score,Mult_Factor
0,9.0,0.75,0.697532,12.0
1,9.0,0.75,0.775524,12.0
2,9.0,0.75,0.740411,12.0
3,9.0,0.75,0.737734,12.0
4,9.0,0.75,0.691238,12.0
5,9.0,0.75,0.743685,12.0
6,9.0,0.75,0.722204,12.0
7,9.0,0.75,0.779992,12.0
8,9.0,0.75,0.707823,12.0
9,9.0,0.75,0.765121,12.0


In [49]:
s_lstm_df['Denorm_Pred_Score'] = cnn_df.apply(denormalize,axis=1)
s_lstm_df.head(10)

Unnamed: 0,Orig_Score,Norm_Score,Pred_Score,Mult_Factor,Denorm_Pred_Score
0,9.0,0.75,0.697532,12.0,9.0
1,9.0,0.75,0.775524,12.0,9.0
2,9.0,0.75,0.740411,12.0,9.0
3,9.0,0.75,0.737734,12.0,8.0
4,9.0,0.75,0.691238,12.0,9.0
5,9.0,0.75,0.743685,12.0,8.0
6,9.0,0.75,0.722204,12.0,8.0
7,9.0,0.75,0.779992,12.0,9.0
8,9.0,0.75,0.707823,12.0,9.0
9,9.0,0.75,0.765121,12.0,9.0


In [54]:
orig_score = s_lstm_df.Orig_Score.as_matrix(columns=None)
orig_score = orig_score.astype(np.int)
pred_score = s_lstm_df.Denorm_Pred_Score.as_matrix(columns=None)
pred_score = pred_score.astype(np.int)
rmse_lstm = RMSE(orig_score,pred_score)
cohen_kappa = cohen_kappa_score(orig_score,pred_score)
accuracy = accuracy_score(orig_score,pred_score)
print("### Stacked LSTM ###")
print("RMSE : ",rmse_lstm)
print("Cohen Kappa: ",cohen_kappa)
print("Accuracy: ",accuracy)

### Stacked LSTM ###
RMSE :  3.215597580016842
Cohen Kappa:  0.24919366467431903
Accuracy:  0.3097220338983051



Method .as_matrix will be removed in a future version. Use .values instead.


Method .as_matrix will be removed in a future version. Use .values instead.



In [51]:
cohen_kappa = cohen_kappa_score(orig_score,pred_score)
print("Cohen Kappa: ",cohen_kappa)

Cohen Kappa:  0.24919366467431903


In [52]:
accuracy = accuracy_score(orig_score,pred_score)
print("Accuracy: ",accuracy)

Accuracy:  0.3097220338983051


In [68]:
# Ignore this.. For stateful LSTM, our test sample should be a multiple of batch_size
# Need to run this again

estimator_stateful_lstm = KerasRegressor(build_fn=stateful_stacked_lstm, epochs=100, batch_size=6683)
estimator_stateful_lstm.fit(train_data, y_train)
prediction_stateful_lstm=estimator_stateful_lstm.predict(test_data)
rmse_stateful_lstm = RMSE(y_test,prediction_stateful_lstm)
print("RMSE: ",rmse_stateful_lstm)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


InvalidArgumentError: Incompatible shapes: [3460,32] vs. [6683,32]
	 [[{{node lstm_14/while/add_3}} = Add[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](lstm_14/while/BiasAdd_1, lstm_14/while/MatMul_5)]]
	 [[{{node dense_9/BiasAdd/_857}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_536_dense_9/BiasAdd", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

### Calculating results per essay

#### Stacked LSTM

In [70]:
result_df = pd.DataFrame([x_test_sentence_df.essay_id.values,x_test_sentence_df.essay_set.values,
                         x_test_sentence_df.sentence.values,s_lstm_df.Orig_Score.values,
                          s_lstm_df.Denorm_Pred_Score.values]).transpose()
result_df.columns = ['essay_id','essay_set','sentence','orig_score','pred_score']
result_df.head(10)

Unnamed: 0,essay_id,essay_set,sentence,orig_score,pred_score
0,1722,1,"Dear local newspaper, @CAPS1 people are talkin...",9,9
1,1722,1,People believe that using the computer is a wa...,9,9
2,1722,1,I believe that computers aren't a bad thing.,9,9
3,1722,1,Using the computers can help us learn about ot...,9,8
4,1722,1,It can also show us the lives about people fro...,9,9
5,1722,1,We can also talk to our friends online.,9,8
6,1722,1,"If you want to know why computers are useful, ...",9,8
7,1722,1,People say that computers waste our lives.,9,9
8,1722,1,I disagree with that opinion.,9,9
9,1722,1,The internet is a great way to learn about dif...,9,9


In [71]:
def find_max_min_mean_score(df):
    new_df = pd.DataFrame(columns=['essay_id','essay_set','Orig_Score','Max_Score','Min_Score','Mean_Score'])
    essay_ids = np.unique(df.essay_id)
    for e_id in essay_ids:
        df_temp = df[df.essay_id == e_id]
        max_score = np.max(df_temp.pred_score)
        min_score = np.min(df_temp.pred_score)
        # we need to round the mean so that kappa score doesnt complain
        mean_score = np.around(np.mean(df_temp.pred_score))
        new_df = new_df.append({'essay_id':e_id,'essay_set':int(np.unique(df_temp.essay_set)),
                                'Orig_Score':int(np.unique(df_temp.orig_score)),'Max_Score':max_score,
                                'Min_Score':min_score,'Mean_Score':mean_score},ignore_index=True)
    return new_df

In [72]:
s_lstm_test_df = find_max_min_mean_score(result_df)
s_lstm_test_df.essay_id = s_lstm_test_df.essay_id.astype(int)
s_lstm_test_df.essay_set = s_lstm_test_df.essay_set.astype(int)
s_lstm_test_df.Orig_Score = s_lstm_test_df.Orig_Score.astype(int)
s_lstm_test_df.Max_Score = s_lstm_test_df.Max_Score.astype(int)
s_lstm_test_df.Min_Score = s_lstm_test_df.Min_Score.astype(int)
s_lstm_test_df.Mean_Score = s_lstm_test_df.Mean_Score.astype(int)

print(s_lstm_test_df.shape)
s_lstm_test_df.head(10)

(2596, 6)


Unnamed: 0,essay_id,essay_set,Orig_Score,Max_Score,Min_Score,Mean_Score
0,2,1,9,10,7,9
1,5,1,8,11,6,9
2,9,1,9,10,7,9
3,14,1,6,9,6,8
4,25,1,8,10,8,9
5,31,1,10,10,8,9
6,33,1,6,9,7,8
7,38,1,8,10,8,9
8,39,1,10,10,7,8
9,41,1,2,9,8,9


In [77]:
orig_score = s_lstm_test_df.Orig_Score.values
max_pred_score = s_lstm_test_df.Max_Score.values
min_pred_score = s_lstm_test_df.Min_Score.values
mean_pred_score = s_lstm_test_df.Mean_Score.values

rmse_max_s_lstm = RMSE(orig_score,max_pred_score)
cohen_kappa_max_s_lstm = cohen_kappa_score(orig_score,max_pred_score)
accuracy_max_s_lstm = accuracy_score(orig_score,max_pred_score)

rmse_min_s_lstm = RMSE(orig_score,min_pred_score)
cohen_kappa_min_s_lstm = cohen_kappa_score(orig_score,min_pred_score)
accuracy_min_s_lstm = accuracy_score(orig_score,min_pred_score)

rmse_mean_s_lstm = RMSE(orig_score,mean_pred_score)
cohen_kappa_mean_s_lstm = cohen_kappa_score(orig_score,mean_pred_score)
accuracy_mean_s_lstm = accuracy_score(orig_score,mean_pred_score)

print("### Stacked_LSTM Results : MAX Score for all Sentence ###")
print("RMSE: ",rmse_max_s_lstm)
print("Cohen Kappa: ",cohen_kappa_max_s_lstm)
print("Accuracy: ",accuracy_max_s_lstm)

print("### Stacked_LSTM Results : Min Score for all Sentence ###")
print("RMSE: ",rmse_min_s_lstm)
print("Cohen Kappa: ",cohen_kappa_min_s_lstm)
print("Accuracy: ",accuracy_min_s_lstm)

print("### Stacked_LSTM Results : Mean Score for all Sentence ###")
print("RMSE: ",rmse_mean_s_lstm)
print("Cohen Kappa: ",cohen_kappa_mean_s_lstm)
print("Accuracy: ",accuracy_mean_s_lstm)

### Stacked_LSTM Results : MAX Score for all Sentence ###
RMSE:  3.1274913027854225
Cohen Kappa:  0.19453306187010388
Accuracy:  0.2842835130970724
### Stacked_LSTM Results : Min Score for all Sentence ###
RMSE:  3.4656580651219975
Cohen Kappa:  0.211439054050241
Accuracy:  0.31163328197226503
### Stacked_LSTM Results : Mean Score for all Sentence ###
RMSE:  2.2545109719516185
Cohen Kappa:  0.2981747205219023
Accuracy:  0.3882896764252696


In [75]:
# Since Mean Score is giving us best result, let us consider this as the main parameter 
# and calculate the per essay set scores

def essay_set_metrics(df):
    set_df = pd.DataFrame(columns=['essay_set','RMSE','Kappa','Accuracy'])
    e_sets = np.unique(df.essay_set)
    for e_s in e_sets:
        df_s = df[df.essay_set == e_s]
        original_score = df_s.Orig_Score.values.astype(int)
        predicted_score = df_s.Mean_Score.values.astype(int)
        rmse = RMSE(original_score,predicted_score)
        kappa = cohen_kappa_score(original_score,predicted_score)
        accuracy = accuracy_score(original_score,predicted_score)
        set_df = set_df.append({'essay_set':e_s,'RMSE':rmse,'Kappa':kappa,'Accuracy':accuracy},
                              ignore_index=True)
    return set_df

In [76]:
essay_set_results = essay_set_metrics(s_lstm_test_df)
essay_set_results

Unnamed: 0,essay_set,RMSE,Kappa,Accuracy
0,1.0,1.434505,0.082459,0.300578
1,2.0,0.877058,0.124204,0.46978
2,3.0,0.771269,0.112052,0.424437
3,4.0,0.628014,0.410937,0.605598
4,5.0,0.931051,0.121352,0.413598
5,6.0,0.833238,0.130024,0.511429
6,7.0,4.714671,-0.004672,0.084375
7,8.0,5.140174,0.021376,0.069182


#### CNN+LSTM

In [78]:
result_df = pd.DataFrame([x_test_sentence_df.essay_id.values,x_test_sentence_df.essay_set.values,
                         x_test_sentence_df.sentence.values,cnn_df.Orig_Score.values,
                          cnn_df.Denorm_Pred_Score.values]).transpose()
result_df.columns = ['essay_id','essay_set','sentence','orig_score','pred_score']
result_df.head(10)

Unnamed: 0,essay_id,essay_set,sentence,orig_score,pred_score
0,1722,1,"Dear local newspaper, @CAPS1 people are talkin...",9,9
1,1722,1,People believe that using the computer is a wa...,9,9
2,1722,1,I believe that computers aren't a bad thing.,9,9
3,1722,1,Using the computers can help us learn about ot...,9,8
4,1722,1,It can also show us the lives about people fro...,9,9
5,1722,1,We can also talk to our friends online.,9,8
6,1722,1,"If you want to know why computers are useful, ...",9,8
7,1722,1,People say that computers waste our lives.,9,9
8,1722,1,I disagree with that opinion.,9,9
9,1722,1,The internet is a great way to learn about dif...,9,9


In [79]:
cnn_test_df = find_max_min_mean_score(result_df)
cnn_test_df.essay_id = cnn_test_df.essay_id.astype(int)
cnn_test_df.essay_set = cnn_test_df.essay_set.astype(int)
cnn_test_df.Orig_Score = cnn_test_df.Orig_Score.astype(int)
cnn_test_df.Max_Score = cnn_test_df.Max_Score.astype(int)
cnn_test_df.Min_Score = cnn_test_df.Min_Score.astype(int)
cnn_test_df.Mean_Score = cnn_test_df.Mean_Score.astype(int)

print(cnn_test_df.shape)
cnn_test_df.head(10)

(2596, 6)


Unnamed: 0,essay_id,essay_set,Orig_Score,Max_Score,Min_Score,Mean_Score
0,2,1,9,10,7,9
1,5,1,8,11,6,9
2,9,1,9,10,7,9
3,14,1,6,9,6,8
4,25,1,8,10,8,9
5,31,1,10,10,8,9
6,33,1,6,9,7,8
7,38,1,8,10,8,9
8,39,1,10,10,7,8
9,41,1,2,9,8,9


In [80]:
orig_score = cnn_test_df.Orig_Score.values
max_pred_score = cnn_test_df.Max_Score.values
min_pred_score = cnn_test_df.Min_Score.values
mean_pred_score = cnn_test_df.Mean_Score.values

rmse_max_cnn = RMSE(orig_score,max_pred_score)
cohen_kappa_max_cnn = cohen_kappa_score(orig_score,max_pred_score)
accuracy_max_cnn = accuracy_score(orig_score,max_pred_score)

rmse_min_cnn = RMSE(orig_score,min_pred_score)
cohen_kappa_min_cnn = cohen_kappa_score(orig_score,min_pred_score)
accuracy_min_cnn = accuracy_score(orig_score,min_pred_score)

rmse_mean_cnn = RMSE(orig_score,mean_pred_score)
cohen_kappa_mean_cnn = cohen_kappa_score(orig_score,mean_pred_score)
accuracy_mean_cnn = accuracy_score(orig_score,mean_pred_score)

print("### CNN_LSTM Results : MAX Score of all Sentences ###")
print("RMSE: ",rmse_max_cnn)
print("Cohen Kappa: ",cohen_kappa_max_cnn)
print("Accuracy: ",accuracy_max_cnn)

print("### CNN_LSTM Results : Min Score of all Sentences ###")
print("RMSE: ",rmse_min_cnn)
print("Cohen Kappa: ",cohen_kappa_min_cnn)
print("Accuracy: ",accuracy_min_cnn)

print("### CNN_LSTM Results : Mean Score of all Sentences ###")
print("RMSE: ",rmse_mean_cnn)
print("Cohen Kappa: ",cohen_kappa_mean_cnn)
print("Accuracy: ",accuracy_mean_cnn)

### CNN_LSTM Results : MAX Score of all Sentences ###
RMSE:  3.1274913027854225
Cohen Kappa:  0.19453306187010388
Accuracy:  0.2842835130970724
### CNN_LSTM Results : Min Score of all Sentences ###
RMSE:  3.4656580651219975
Cohen Kappa:  0.211439054050241
Accuracy:  0.31163328197226503
### CNN_LSTM Results : Mean Score of all Sentences ###
RMSE:  2.2545109719516185
Cohen Kappa:  0.2981747205219023
Accuracy:  0.3882896764252696


In [81]:
essay_set_results = essay_set_metrics(cnn_test_df)
essay_set_results

Unnamed: 0,essay_set,RMSE,Kappa,Accuracy
0,1.0,1.434505,0.082459,0.300578
1,2.0,0.877058,0.124204,0.46978
2,3.0,0.771269,0.112052,0.424437
3,4.0,0.628014,0.410937,0.605598
4,5.0,0.931051,0.121352,0.413598
5,6.0,0.833238,0.130024,0.511429
6,7.0,4.714671,-0.004672,0.084375
7,8.0,5.140174,0.021376,0.069182
