In [1]:
import nltk

# Commented out IPython magic to ensure Python compatibility.
# %cd /content/ease/ease

## IMPORTS ##
from essay_set import EssaySet

from feature_extractor import FeatureExtractor
from predictor_set import PredictorSet
from predictor_extractor import PredictorExtractor
from sklearn.svm import SVR
import pickle
import pandas as pd
import csv

import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import learning_curve,GridSearchCV
#from sklearn import svm, grid_search

import nltk
#nltk.download('punkt')
#nltk.download('averaged_perceptron_tagger')

# The following 3 functions have been taken from Ben Hamner's github repository
# https://github.com/benhamner/Metrics
def Cmatrix(rater_a, rater_b, min_rating=None, max_rating=None):
    """
    Returns the confusion matrix between rater's ratings
    """
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(rater_a + rater_b)
    if max_rating is None:
        max_rating = max(rater_a + rater_b)
    num_ratings = int(max_rating - min_rating + 1)
    conf_mat = [[0 for i in range(num_ratings)]
                for j in range(num_ratings)]
    for a, b in zip(rater_a, rater_b):
        conf_mat[a - min_rating][b - min_rating] += 1
    return conf_mat


def histogram(ratings, min_rating=None, max_rating=None):
    """
    Returns the counts of each type of rating that a rater made
    """
    if min_rating is None:
        min_rating = min(ratings)
    if max_rating is None:
        max_rating = max(ratings)
    num_ratings = int(max_rating - min_rating + 1)
    hist_ratings = [0 for x in range(num_ratings)]
    for r in ratings:
        hist_ratings[r - min_rating] += 1
    return hist_ratings


def quadratic_weighted_kappa(y, y_pred):
    """
    Calculates the quadratic weighted kappa
    axquadratic_weighted_kappa calculates the quadratic weighted kappa
    value, which is a measure of inter-rater agreement between two raters
    that provide discrete numeric ratings.  Potential values range from -1
    (representing complete disagreement) to 1 (representing complete
    agreement).  A kappa value of 0 is expected if all agreement is due to
    chance.
    quadratic_weighted_kappa(rater_a, rater_b), where rater_a and rater_b
    each correspond to a list of integer ratings.  These lists must have the
    same length.
    The ratings should be integers, and it is assumed that they contain
    the complete range of possible ratings.
    quadratic_weighted_kappa(X, min_rating, max_rating), where min_rating
    is the minimum possible rating, and max_rating is the maximum possible
    rating
    """
    rater_a = y
    rater_b = y_pred
    min_rating=None
    max_rating=None
    rater_a = np.array(rater_a, dtype=int)
    rater_b = np.array(rater_b, dtype=int)
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(min(rater_a), min(rater_b))
    if max_rating is None:
        max_rating = max(max(rater_a), max(rater_b))
    conf_mat = Cmatrix(rater_a, rater_b,
                                min_rating, max_rating)
    num_ratings = len(conf_mat)
    num_scored_items = float(len(rater_a))

    hist_rater_a = histogram(rater_a, min_rating, max_rating)
    hist_rater_b = histogram(rater_b, min_rating, max_rating)

    numerator = 0.0
    denominator = 0.0

    for i in range(num_ratings):
        for j in range(num_ratings):
            expected_count = (hist_rater_a[i] * hist_rater_b[j]
                              / num_scored_items)
            d = pow(i - j, 2.0) / pow(num_ratings - 1, 2.0)
            numerator += d * conf_mat[i][j] / num_scored_items
            denominator += d * expected_count / num_scored_items

    return (1.0 - numerator / denominator)

In [21]:
train_set = pd.read_csv('/home/mehar/github/ease/aes_data/essay7/fold_0/train.txt', sep='\t')
x = train_set.to_numpy()
tester = x.tolist()
print(len(tester))
essaylist = []
scorelist = []
for i in range(0, len(tester)):
    z = tester[i]
    y = z[0].split(', ', 1)
    #print(y)
    scorelist.append(float(y[0]))
    essaylist.append(y[1])

train = EssaySet()
print("Done1")
for i in range(0, len(essaylist)):
    train.add_essay(essaylist[i], scorelist[i])

print("Done2")
features=FeatureExtractor()
features.initialize_dictionaries(train)
X = features.gen_feats(train)
print("features train", X)
print("Done3")


941
Done1
Done2
features train [[4.460e+02 1.020e+02 2.000e+00 ... 0.000e+00 0.000e+00 1.000e+00]
 [8.320e+02 1.930e+02 0.000e+00 ... 0.000e+00 0.000e+00 0.000e+00]
 [9.140e+02 2.080e+02 3.000e+00 ... 0.000e+00 0.000e+00 0.000e+00]
 ...
 [5.170e+02 1.180e+02 2.000e+00 ... 0.000e+00 0.000e+00 0.000e+00]
 [7.920e+02 1.760e+02 2.000e+00 ... 0.000e+00 0.000e+00 0.000e+00]
 [1.188e+03 2.400e+02 4.000e+00 ... 0.000e+00 0.000e+00 0.000e+00]]
Done3


In [22]:
test_score_set = pd.read_csv('scores/test7.csv', encoding = 'utf=8')
x_2 = test_score_set.to_numpy()
tester_2 = x_2.tolist()

test_scorelist = []

for i in range(0, len(tester_2)):
    z = tester_2[i]
    test_scorelist.append(float(z[0]))


In [23]:
import os 
import statistics
file_path = "AES_testcases/prompt7/"
files = os.listdir(file_path)
files = sorted(files)
files.pop(0)

print(len(files), files)


53 ['contractions_test_prompt7.csv', 'disfluency_7.csv', 'entities_beg_bound_7.csv', 'entities_beg_unbound_7.csv', 'entities_end_bound_7.csv', 'entities_end_unbound_7.csv', 'entities_mid_bound_7.csv', 'entities_mid_unbound_7.csv', 'incorrect_grammar_7.csv', 'para_P_7.csv', 'prompt 7 babel - Sheet1.csv', 'repeat_test_conc1.csv', 'repeat_test_conc2.csv', 'repeat_test_into1.csv', 'repeat_test_into2.csv', 'repeat_test_middle1.csv', 'repeat_test_middle2.csv', 'repeat_test_middle3.csv', 'songs_test_beg.csv', 'songs_test_end.csv', 'speeches_test_beg.csv', 'speeches_test_end.csv', 'test7.csv', 'uf_beg_bound_7.csv', 'uf_beg_unbound_7.csv', 'uf_end_bound_7.csv', 'uf_end_unbound_7.csv', 'uf_mid_bound_7.csv', 'uf_mid_unbound_7.csv', 'ut_beg_bound_7.csv', 'ut_beg_unbound_7.csv', 'ut_end_bound_7.csv', 'ut_end_unbound_7.csv', 'ut_mid_bound_7.csv', 'ut_mid_unbound_7.csv', 'wiki_beg_bound_7.csv', 'wiki_beg_unbound_7.csv', 'wiki_end_bound_7.csv', 'wiki_end_unbound_7.csv', 'wiki_mid_bound_7.csv', 'wiki_m

In [24]:
# import os 
# import statistics
# file_path = "AES_testcases/prompt1/"
# files = os.listdir(file_path)
# print(files)
for testcase in files:
#     print(testcase)
    test_set = pd.read_csv(file_path+testcase, encoding="latin1")
    x = test_set.to_numpy()
    tester = x.tolist()
    test_essaylist = []

    for i in range(0, len(tester)):
        z = tester[i]
#         print(z)
        y = z[0].split(', ', 1)
        test_essaylist.append(y[0])

    test = EssaySet(essaytype="test")
    for i in range(0, len(test_essaylist)):
        test.add_essay(test_essaylist[i], test_scorelist[i])
    Y = features.gen_feats(test)
#     print("features test", Y)
#     print("Done4")

    ## SCALING
    scaled_train = []
    for i in range(0, len(scorelist)):
        scaled_train.append(float((np.clip((scorelist[i]), a_min=0, a_max=30)/30)))

#     print("start training and prediciton")

    # print(scaled_train)
    from sklearn.svm import SVR

    # Values for prompt1
    Cs = [100]
    gammas = [0.000001]
    param_grid = {'C': Cs, 'gamma' : gammas}
    clf = GridSearchCV(SVR(kernel='rbf'), param_grid, cv =5)
    clf.fit(X, scaled_train)
#     print(clf.best_params_)

    final = clf.predict(Y)

    finals = np.rint(np.clip(final,a_min=0,a_max=1)*30)
    finals_list = finals.tolist()
#     mean_diff = statistics.mean(finals_list)
#     print("Mean", statistics.mean(diff_list))
#     print(""+"{:.3f}".format(mean_diff))
    diff_list =[]
    for i in range(0, len(finals_list)):
    #     print(finals_list[i], test_scorelist[i])
        diff = test_scorelist[i] - finals_list[i]
        diff_list.append(diff)
#     print("", statistics.mean(diff_list))
    mean1 = statistics.mean(diff_list)
    print(""+"{:.3f}".format(mean1))
    
    
#     diff_list =[]
#     for i in range(0, len(finals_list)):
#     #     print(finals_list[i], test_scorelist[i])
#         diff = test_scorelist[i] - finals_list[i]
#         diff_list.append(diff)
#     mean_diff = statistics.mean(diff_list)
# #     print("Mean", statistics.mean(diff_list))
#     print(""+"{:.3f}".format(mean_diff))
#         # print(len(finals_list), len(test_scorelist))





10.385




9.125




10.586




10.602




10.389




10.373




10.334




10.354




9.188




9.920




9.214




10.366




10.357




10.306




10.334




10.363




10.360




10.344




10.787




10.389




10.729




10.385




10.390




10.062




10.090




10.855


KeyboardInterrupt: 

In [21]:
print(final)
import statistics

diff_list =[]
for i in range(0, len(finals_list)):
#     print(finals_list[i], test_scorelist[i])
    diff = test_scorelist[i] - finals_list[i]
    diff_list.append(diff)
print("Mean", statistics.mean(diff_list))

[0.28583632 0.29144578 0.29469176 0.3010146  0.29678894 0.27565501
 0.29401789 0.29813644 0.27248337 0.28798591 0.2515703  0.29745256
 0.29813644 0.29678894 0.29546236 0.28513957 0.25656519 0.29545356
 0.29813644 0.29546236 0.24918369 0.28227529 0.29813644 0.26942308
 0.2554716  0.29545356 0.29144578 0.250061   0.29677625 0.25821334
 0.29545356 0.29745256 0.29545356 0.2800879  0.29144578 0.29745256
 0.29020527 0.29545356 0.28750589 0.29469176 0.29813644 0.29262794
 0.29813644 0.29813644 0.29532778 0.28821227 0.2514389  0.29545356
 0.24887221 0.29545356 0.29144578 0.29813644 0.29323807 0.27718626
 0.29144578 0.29599281 0.25790319 0.29813644 0.29286886 0.29813644
 0.2500658  0.29745256 0.29535372 0.29813644 0.29469176 0.26363276
 0.29545356 0.29144578 0.29144578 0.28897303 0.33530783 0.60255487
 0.25303503 0.28787152 0.29813644 0.2908179  0.27687227 0.2946723
 0.29469176 0.29469176 0.29813644 0.29144578 0.28631821 0.29469176
 0.29145309 0.28062391 0.24882527 0.29894694 0.29745256 0.26119