In [33]:
# Commented out IPython magic to ensure Python compatibility.
# %cd /content/ease/src/nltk
import nltk

# Commented out IPython magic to ensure Python compatibility.
# %cd /content/ease/ease

## IMPORTS ##
from essay_set import EssaySet

from feature_extractor import FeatureExtractor
from predictor_set import PredictorSet
from predictor_extractor import PredictorExtractor
from sklearn.svm import SVR
import pickle
import pandas as pd
import csv

import numpy as np
from sklearn.metrics import confusion_matrix

import nltk
#nltk.download('punkt')
#nltk.download('averaged_perceptron_tagger')

# The following 3 functions have been taken from Ben Hamner's github repository
# https://github.com/benhamner/Metrics
def Cmatrix(rater_a, rater_b, min_rating=None, max_rating=None):
    """
    Returns the confusion matrix between rater's ratings
    """
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(rater_a + rater_b)
    if max_rating is None:
        max_rating = max(rater_a + rater_b)
    num_ratings = int(max_rating - min_rating + 1)
    conf_mat = [[0 for i in range(num_ratings)]
                for j in range(num_ratings)]
    for a, b in zip(rater_a, rater_b):
        conf_mat[a - min_rating][b - min_rating] += 1
    return conf_mat


def histogram(ratings, min_rating=None, max_rating=None):
    """
    Returns the counts of each type of rating that a rater made
    """
    if min_rating is None:
        min_rating = min(ratings)
    if max_rating is None:
        max_rating = max(ratings)
    num_ratings = int(max_rating - min_rating + 1)
    hist_ratings = [0 for x in range(num_ratings)]
    for r in ratings:
        hist_ratings[r - min_rating] += 1
    return hist_ratings


def quadratic_weighted_kappa(y, y_pred):
    """
    Calculates the quadratic weighted kappa
    axquadratic_weighted_kappa calculates the quadratic weighted kappa
    value, which is a measure of inter-rater agreement between two raters
    that provide discrete numeric ratings.  Potential values range from -1
    (representing complete disagreement) to 1 (representing complete
    agreement).  A kappa value of 0 is expected if all agreement is due to
    chance.
    quadratic_weighted_kappa(rater_a, rater_b), where rater_a and rater_b
    each correspond to a list of integer ratings.  These lists must have the
    same length.
    The ratings should be integers, and it is assumed that they contain
    the complete range of possible ratings.
    quadratic_weighted_kappa(X, min_rating, max_rating), where min_rating
    is the minimum possible rating, and max_rating is the maximum possible
    rating
    """
    rater_a = y
    rater_b = y_pred
    min_rating=None
    max_rating=None
    rater_a = np.array(rater_a, dtype=int)
    rater_b = np.array(rater_b, dtype=int)
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(min(rater_a), min(rater_b))
    if max_rating is None:
        max_rating = max(max(rater_a), max(rater_b))
    conf_mat = Cmatrix(rater_a, rater_b,
                                min_rating, max_rating)
    num_ratings = len(conf_mat)
    num_scored_items = float(len(rater_a))

    hist_rater_a = histogram(rater_a, min_rating, max_rating)
    hist_rater_b = histogram(rater_b, min_rating, max_rating)

    numerator = 0.0
    denominator = 0.0

    for i in range(num_ratings):
        for j in range(num_ratings):
            expected_count = (hist_rater_a[i] * hist_rater_b[j]
                              / num_scored_items)
            d = pow(i - j, 2.0) / pow(num_ratings - 1, 2.0)
            numerator += d * conf_mat[i][j] / num_scored_items
            denominator += d * expected_count / num_scored_items

    return (1.0 - numerator / denominator)

train_set = pd.read_csv('/home/mehar/github/ease/ease/train_adv/prompt8/nocontractions_8_train_valid.csv', sep=',')
x = train_set.to_numpy()
tester = x.tolist()
print(len(tester))
essaylist = []
scorelist = []
for i in range(0, len(tester)):
    z = tester[i]
#     print(z)
#     y = z[0].split(', ', 1)
# #     #print(y)
    scorelist.append(float(z[0]))
    essaylist.append(z[1])

train = EssaySet()
print("Done1")
for i in range(0, len(essaylist)):
    train.add_essay(essaylist[i], scorelist[i])

print("Done2")
features=FeatureExtractor()
features.initialize_dictionaries(train)
X = features.gen_feats(train)
print("features train", X)
print("Done3")



577
Done1
Done2
features train [[1.853e+03 2.950e+02 6.000e+00 ... 0.000e+00 0.000e+00 0.000e+00]
 [3.226e+03 6.880e+02 1.400e+01 ... 3.000e+00 0.000e+00 0.000e+00]
 [2.369e+03 4.990e+02 1.600e+01 ... 9.000e+00 0.000e+00 0.000e+00]
 ...
 [3.882e+03 8.170e+02 5.000e+01 ... 5.000e+00 1.000e+00 0.000e+00]
 [3.960e+03 9.050e+02 4.500e+01 ... 4.000e+00 0.000e+00 0.000e+00]
 [3.706e+03 8.270e+02 2.400e+01 ... 6.000e+00 1.000e+00 0.000e+00]]
Done3


In [34]:
## TESTING FEATURES ##
test_set = pd.read_csv('/home/mehar/github/ease/aes_data/essay8/fold_0/test.txt', sep='\t')
x = test_set.to_numpy()
tester = x.tolist()

test_scorelist = []
test_essaylist = []

for i in range(0, len(tester)):
    z = tester[i]
    y = z[0].split(', ', 1)
    test_scorelist.append(float(y[0]))
    test_essaylist.append(y[1])
count = 0

test = EssaySet(essaytype="test")
for i in range(0, len(test_essaylist)):
    test.add_essay(test_essaylist[i], test_scorelist[i])
    
Y = features.gen_feats(test)

## SCALING
scaled_train = []
for i in range(0, len(scorelist)):
    scaled_train.append(float((np.clip((scorelist[i]), a_min=0, a_max=60)/60)))

## TRAINING & PREDICTING
clf = SVR(C=1, gamma=0.00001,kernel='rbf') 
clf.fit(X, scaled_train)

final = clf.predict(Y)   
## INVERSE_SCALING
finals = (np.clip(final,a_min=0,a_max=1)*60)
finals_list = finals.tolist()
## QWK Score
print("QWK", quadratic_weighted_kappa(test_scorelist,finals))

df_pred = pd.DataFrame(finals_list)
df_org = pd.DataFrame(test_scorelist)
# print(df_pred.head(5))
#     print("org", df_org.head(5))

df_org["diff"] = abs(df_org[0] - df_pred[0])
mean_diff = df_org["diff"].mean()
print(""+"{:.3f}".format(mean_diff))

df_pred.to_csv('adv_test_scores/prompt8/NoContractionsTest8.csv', index = False, header=None)



QWK 0.18099371558309574
4.718


### No Change

In [35]:
train_set = pd.read_csv('/home/mehar/github/ease/ease/train_adv/prompt8/all_nochange_8_train_valid.csv', sep=',')
x = train_set.to_numpy()
tester = x.tolist()
print(len(tester))
essaylist = []
scorelist = []
for i in range(0, len(tester)):
    z = tester[i]
#     print(z)
#     y = z[0].split(', ', 1)
# #     #print(y)
    scorelist.append(float(z[0]))
    essaylist.append(z[1])

train = EssaySet()
print("Done1")
for i in range(0, len(essaylist)):
    train.add_essay(essaylist[i], scorelist[i])

print("Done2")
features=FeatureExtractor()
features.initialize_dictionaries(train)
X = features.gen_feats(train)
print("features train", X)
print("Done3")

test_set = pd.read_csv('/home/mehar/github/ease/aes_data/essay8/fold_0/test.txt', sep='\t')
x = test_set.to_numpy()
tester = x.tolist()

test_scorelist = []
test_essaylist = []

for i in range(0, len(tester)):
    z = tester[i]
    y = z[0].split(', ', 1)
    test_scorelist.append(float(y[0]))
    test_essaylist.append(y[1])
count = 0

test = EssaySet(essaytype="test")
for i in range(0, len(test_essaylist)):
    test.add_essay(test_essaylist[i], test_scorelist[i])
    
Y = features.gen_feats(test)

## SCALING
scaled_train = []
for i in range(0, len(scorelist)):
    scaled_train.append(float((np.clip((scorelist[i]), a_min=0, a_max=60)/60)))

## TRAINING & PREDICTING
clf = SVR(C=1, gamma=0.00001,kernel='rbf') 
clf.fit(X, scaled_train)

final = clf.predict(Y)   
## INVERSE_SCALING
finals = (np.clip(final,a_min=0,a_max=1)*60)
finals_list = finals.tolist()
## QWK Score
print("QWK", quadratic_weighted_kappa(test_scorelist,finals))

df_pred = pd.DataFrame(finals_list)
df_org = pd.DataFrame(test_scorelist)
# print(df_pred.head(5))
#     print("org", df_org.head(5))

df_org["diff"] = abs(df_org[0] - df_pred[0])
mean_diff = df_org["diff"].mean()
print(""+"{:.3f}".format(mean_diff))

df_pred.to_csv('adv_test_scores/prompt8/ContractionsAndSynonymsTest8.csv', index = False, header=None)

577
Done1
Done2
features train [[2.884e+03 6.000e+02 1.800e+01 ... 1.000e+00 0.000e+00 0.000e+00]
 [1.224e+03 2.780e+02 1.300e+01 ... 0.000e+00 0.000e+00 0.000e+00]
 [1.851e+03 3.840e+02 5.000e+00 ... 1.000e+00 0.000e+00 0.000e+00]
 ...
 [2.634e+03 4.090e+02 0.000e+00 ... 0.000e+00 0.000e+00 0.000e+00]
 [9.370e+02 2.140e+02 0.000e+00 ... 0.000e+00 0.000e+00 0.000e+00]
 [3.925e+03 8.130e+02 5.000e+00 ... 1.000e+00 0.000e+00 0.000e+00]]
Done3
QWK 0.5001161047898505
3.525


# mixture - Disfluency, Incorrect Grammar, Shuffle

In [36]:
train_set = pd.read_csv('/home/mehar/github/ease/ease/train_adv/prompt8/mixture_all_8_train_valid_reduce.csv', sep=',')
x = train_set.to_numpy()
tester = x.tolist()
print(len(tester))
essaylist = []
scorelist = []
for i in range(0, len(tester)):
    z = tester[i]
#     print(z)
#     y = z[0].split(', ', 1)
# #     #print(y)
    scorelist.append(float(z[0]))
    essaylist.append(z[1])

train = EssaySet()
print("Done1")
for i in range(0, len(essaylist)):
    train.add_essay(essaylist[i], scorelist[i])

print("Done2")
features=FeatureExtractor()
features.initialize_dictionaries(train)
X = features.gen_feats(train)
print("features train", X)
print("Done3")

test_set = pd.read_csv('/home/mehar/github/ease/aes_data/essay8/fold_0/test.txt', sep='\t')
x = test_set.to_numpy()
tester = x.tolist()

test_scorelist = []
test_essaylist = []

for i in range(0, len(tester)):
    z = tester[i]
    y = z[0].split(', ', 1)
    test_scorelist.append(float(y[0]))
    test_essaylist.append(y[1])
count = 0

test = EssaySet(essaytype="test")
for i in range(0, len(test_essaylist)):
    test.add_essay(test_essaylist[i], test_scorelist[i])
    
Y = features.gen_feats(test)

## SCALING
scaled_train = []
for i in range(0, len(scorelist)):
    scaled_train.append(float((np.clip((scorelist[i]), a_min=0, a_max=60)/60)))

## TRAINING & PREDICTING
clf = SVR(C=1, gamma=0.00001,kernel='rbf') 
clf.fit(X, scaled_train)

final = clf.predict(Y)   
## INVERSE_SCALING
finals = (np.clip(final,a_min=0,a_max=1)*60)
finals_list = finals.tolist()
## QWK Score
print("QWK", quadratic_weighted_kappa(test_scorelist,finals))

df_pred = pd.DataFrame(finals_list)
df_org = pd.DataFrame(test_scorelist)
# print(df_pred.head(5))
#     print("org", df_org.head(5))

df_org["diff"] = abs(df_org[0] - df_pred[0])
mean_diff = df_org["diff"].mean()
print(""+"{:.3f}".format(mean_diff))

df_pred.to_csv('adv_test_scores/prompt8/MixtureTest8.csv', index = False, header=None)

577
Done1
Done2
features train [[3.268e+03 7.200e+02 9.000e+00 ... 0.000e+00 0.000e+00 0.000e+00]
 [3.903e+03 8.420e+02 2.700e+01 ... 1.000e+00 0.000e+00 8.000e+00]
 [4.365e+03 9.190e+02 3.200e+01 ... 0.000e+00 0.000e+00 5.000e+00]
 ...
 [4.530e+03 9.290e+02 3.400e+01 ... 0.000e+00 1.000e+00 0.000e+00]
 [3.061e+03 6.530e+02 4.000e+00 ... 0.000e+00 0.000e+00 6.000e+00]
 [3.970e+03 9.060e+02 1.100e+01 ... 0.000e+00 0.000e+00 2.000e+00]]
Done3
QWK 0.5481830759239916
3.344


# mixture - Incorrect Grammar, Shuffle; No Disfluency

In [39]:
train_set = pd.read_csv('/home/mehar/github/ease/ease/train_adv/prompt8/noDisfluency_8_train_valid_reduce.csv', sep=',')
x = train_set.to_numpy()
tester = x.tolist()
print(len(tester))
essaylist = []
scorelist = []
for i in range(0, len(tester)):
    z = tester[i]
#     print(z)
#     y = z[0].split(', ', 1)
# #     #print(y)
    scorelist.append(float(z[0]))
    essaylist.append(z[1])

train = EssaySet()
print("Done1")
for i in range(0, len(essaylist)):
    train.add_essay(essaylist[i], scorelist[i])

print("Done2")
features=FeatureExtractor()
features.initialize_dictionaries(train)
X = features.gen_feats(train)
print("features train", X)
print("Done3")

test_set = pd.read_csv('/home/mehar/github/ease/aes_data/essay8/fold_0/test.txt', sep='\t')
x = test_set.to_numpy()
tester = x.tolist()

test_scorelist = []
test_essaylist = []

for i in range(0, len(tester)):
    z = tester[i]
    y = z[0].split(', ', 1)
    test_scorelist.append(float(y[0]))
    test_essaylist.append(y[1])
count = 0

test = EssaySet(essaytype="test")
for i in range(0, len(test_essaylist)):
    test.add_essay(test_essaylist[i], test_scorelist[i])
    
Y = features.gen_feats(test)

## SCALING
scaled_train = []
for i in range(0, len(scorelist)):
    scaled_train.append(float((np.clip((scorelist[i]), a_min=0, a_max=60)/60)))

## TRAINING & PREDICTING
clf = SVR(C=1, gamma=0.00001,kernel='rbf') 
clf.fit(X, scaled_train)

final = clf.predict(Y)   
## INVERSE_SCALING
finals = (np.clip(final,a_min=0,a_max=1)*0)
finals_list = finals.tolist()
## QWK Score
print("QWK", quadratic_weighted_kappa(test_scorelist,finals))

df_pred = pd.DataFrame(finals_list)
df_org = pd.DataFrame(test_scorelist)
# print(df_pred.head(5))
#     print("org", df_org.head(5))

df_org["diff"] = abs(df_org[0] - df_pred[0])
mean_diff = df_org["diff"].mean()
print(""+"{:.3f}".format(mean_diff))

df_pred.to_csv('adv_test_scores/prompt8/MixtureNoDisfluencyTest8.csv', index = False, header=None)

577
Done1
Done2
features train [[3.957e+03 8.830e+02 1.200e+01 ... 0.000e+00 9.000e+00 3.000e+00]
 [2.765e+03 5.720e+02 1.000e+01 ... 0.000e+00 0.000e+00 0.000e+00]
 [3.408e+03 6.890e+02 4.200e+01 ... 0.000e+00 4.000e+00 1.000e+00]
 ...
 [3.903e+03 8.420e+02 2.700e+01 ... 0.000e+00 8.000e+00 0.000e+00]
 [3.358e+03 7.590e+02 1.300e+01 ... 0.000e+00 0.000e+00 0.000e+00]
 [2.478e+03 5.610e+02 1.300e+01 ... 1.000e+00 3.000e+00 0.000e+00]]
Done3
QWK 0.0
37.271


# mixture - Shuffle; No grammar, disfluency

In [38]:
train_set = pd.read_csv('/home/mehar/github/ease/ease/train_adv/prompt8/noDisfluency&grammar_8_train_valid_reduce.csv', sep=',')
x = train_set.to_numpy()
tester = x.tolist()
print(len(tester))
essaylist = []
scorelist = []
for i in range(0, len(tester)):
    z = tester[i]
#     print(z)
#     y = z[0].split(', ', 1)
# #     #print(y)
    scorelist.append(float(z[0]))
    essaylist.append(z[1])

train = EssaySet()
print("Done1")
for i in range(0, len(essaylist)):
    train.add_essay(essaylist[i], scorelist[i])

print("Done2")
features=FeatureExtractor()
features.initialize_dictionaries(train)
X = features.gen_feats(train)
print("features train", X)
print("Done3")

test_set = pd.read_csv('/home/mehar/github/ease/aes_data/essay8/fold_0/test.txt', sep='\t')
x = test_set.to_numpy()
tester = x.tolist()

test_scorelist = []
test_essaylist = []

for i in range(0, len(tester)):
    z = tester[i]
    y = z[0].split(', ', 1)
    test_scorelist.append(float(y[0]))
    test_essaylist.append(y[1])
count = 0

test = EssaySet(essaytype="test")
for i in range(0, len(test_essaylist)):
    test.add_essay(test_essaylist[i], test_scorelist[i])
    
Y = features.gen_feats(test)

## SCALING
scaled_train = []
for i in range(0, len(scorelist)):
    scaled_train.append(float((np.clip((scorelist[i]), a_min=0, a_max=60)/60)))

## TRAINING & PREDICTING
clf = SVR(C=1, gamma=0.00001,kernel='rbf') 
clf.fit(X, scaled_train)

final = clf.predict(Y)   
## INVERSE_SCALING
finals = (np.clip(final,a_min=0,a_max=1)*60)
finals_list = finals.tolist()
## QWK Score
print("QWK", quadratic_weighted_kappa(test_scorelist,finals))

df_pred = pd.DataFrame(finals_list)
df_org = pd.DataFrame(test_scorelist)
# print(df_pred.head(5))
#     print("org", df_org.head(5))

df_org["diff"] = abs(df_org[0] - df_pred[0])
mean_diff = df_org["diff"].mean()
print(""+"{:.3f}".format(mean_diff))

df_pred.to_csv('adv_test_scores/prompt8/MixtureNoDisfluencyNoGrammarTest8.csv', index = False, header=None)

577
Done1
Done2
features train [[3.911e+03 8.450e+02 2.500e+01 ... 1.000e+00 0.000e+00 0.000e+00]
 [2.205e+03 4.550e+02 2.700e+01 ... 4.000e+00 1.000e+00 0.000e+00]
 [1.980e+03 4.190e+02 2.100e+01 ... 3.000e+00 0.000e+00 0.000e+00]
 ...
 [1.907e+03 3.790e+02 9.000e+00 ... 3.000e+00 0.000e+00 1.000e+00]
 [3.782e+03 8.190e+02 4.900e+01 ... 2.000e+00 0.000e+00 1.000e+00]
 [2.163e+03 4.460e+02 1.000e+01 ... 5.000e+00 0.000e+00 1.000e+00]]
Done3
QWK 0.584511319134678
3.274


In [12]:
finals

array([11.,  9., 10.,  8.,  7.,  3., 10.,  9.,  9.,  9.,  8.,  9.,  9.,
        9.,  7., 10.,  9.,  8.,  9.,  9.,  8.,  9.,  4.,  8.,  8.,  9.,
        9., 10., 10., 10.,  9.,  8.,  8.,  8.,  5.,  7.,  6.,  9.,  9.,
        7.,  9., 10.,  9.,  9.,  8., 10.,  9.,  8.,  9.,  8.,  8.,  9.,
        7.,  8.,  4.,  7.,  9., 11., 10., 10.,  9.,  9.,  9.,  8.,  9.,
        7.,  9.,  9.,  8., 11.,  7.,  8.,  6.,  9., 11.,  7.,  9.,  9.,
        9.,  9.,  9.,  7., 10., 10., 10., 10.,  9.,  8.,  7.,  7.,  8.,
        8., 10.,  9.,  9.,  8.,  7.,  9.,  9., 10.,  8.,  9.,  8.,  9.,
       10., 10., 11.,  8.,  8.,  7.,  9., 10.,  6.,  7.,  7.,  9.,  7.,
        8.,  7.,  9.,  8.,  7.,  7.,  9.,  7.,  7.,  8.,  9., 10.,  8.,
        9.,  7.,  9.,  9.,  8.,  9.,  7.,  9.,  7.,  9.,  9.,  9., 10.,
        7.,  7.,  8.,  9.,  9., 10., 10.,  9.,  7.,  9.,  7.,  7.,  9.,
        9.,  9.,  8.,  9.,  8.,  9.,  9.,  8.,  9.,  8.,  8., 11., 10.,
        6., 10.,  9.,  8.,  9.,  9.,  7.,  8.,  7.,  7.,  9.,  7