In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
from spellchecker import SpellChecker
from tqdm import tqdm
from tensorflow.keras.utils import to_categorical
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, cohen_kappa_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from transformers import TFGPT2Model, GPT2Tokenizer

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('ASAP Dataset/Preprocessed_df.csv')

In [3]:
# Removing any missing values from the data
df = df.dropna(axis = 1, how = 'any')

In [4]:
drop_columns = ['essay_id', 'pos_ratios', 'essay', 'rater1_domain1', 'rater2_domain1']
df.drop(drop_columns, axis = 1, inplace = True)

In [5]:
def calc_precision(y_true, y_pred, average='macro'):
    """
    Calculates the precision score between the true and predicted values
    """
    precision = precision_score(y_true, y_pred, average=average)
    return precision

def calc_recall(y_true, y_pred, average='macro'):
    """
    Calculates the recall score between the true and predicted values
    """
    recall = recall_score(y_true, y_pred, average=average)
    return recall

def calc_f1_score(y_true, y_pred, average='macro'):
    """
    Calculates the f1-score between the true and predicted values
    """
    f1 = f1_score(y_true, y_pred, average=average)
    return f1

def calc_cohen_kappa_score(y_true, y_pred):
    """
    Calculates the cohen kappa score between the true and predicted values
    """
    kappa_score = cohen_kappa_score(y_true, y_pred, weights = 'quadratic')
    return kappa_score

def calc_accuracy(y_true, y_pred):
    """
    Calculates the accuracy score between the true and predicted values
    """
    accuracy = accuracy_score(y_true, y_pred)
    return accuracy

In [6]:
def print_metrics_function(y_actual, y_predictions):
    
    # Calculate and print accuracy
    accuracy = calc_accuracy(y_actual, y_predictions)
    print("Accuracy:", accuracy)
    
    # Calculate and print precision
    precision = calc_precision(y_actual, y_predictions)
    print("Precision:", precision)

    # Calculate and print recall
    recall = calc_recall(y_actual, y_predictions)
    print("Recall:", recall)

    # Calculate and print f1-score
    f1 = calc_f1_score(y_actual, y_predictions)
    print("F1-Score:", f1)

    # Calculate and print Cohen Kappa Score
    kappa_score = calc_cohen_kappa_score(y_actual, y_predictions)
    print("Cohen Kappa Score:", kappa_score)

    return accuracy, precision, recall, f1, kappa_score

In [7]:
def dataset_preparation(data, target = 'domain1_score'):
    
    X = data.drop([target], axis = 1)
    y = data[target]
    
    return X, y

In [8]:
def choose_classifiers(classifier_name = "logistic_regression"):
    """
    Takes a regressor as input and returns a corresponding classifier object
    """
    
    if classifier_name == 'logistic_regression':
        return LogisticRegression()
    elif classifier_name == 'decision_tree_classifier':
        return DecisionTreeClassifier()
    elif classifier_name == 'random_forest_classifier':
        return RandomForestClassifier()
    elif classifier_name == 'gradient_boosting_classifier':
        return GradientBoostingClassifier()
    elif classifier_name == 'adaboost_classifier':
        return AdaBoostClassifier()
    elif classifier_name == 'k_neighbors_classifier':
        return KNeighborsClassifier()
    elif classifier_name == 'support_vector_classifier':
        return SVC()
    elif classifier_name == 'xgboost_classifier':
        return XGBClassifier()
    elif classifier_name == 'gaussian_naive_bayes_classifier':
        return GaussianNB()
    else:
        raise ValueError(f"Classifier {classifier_name} not supported for this problem.")

In [16]:
def spell_corrector(text):
    spell_checker = SpellChecker()
    correct_tokens = []
    for token in tqdm(text.split()):
        if spell_checker.correction(token.lower()):
            correct_tokens.append(spell_checker.correction(token.lower()))
        else:
            correct_tokens.append(token.lower())
    
    return ' '.join(correct_tokens)

In [17]:
df_essay_set = df[df.essay_set == 1]
X, y = dataset_preparation(df_essay_set)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2)

In [18]:
X_train['preprocessed_text']

1346    dear local people using computer year good hea...
1349    dear newspaper believe computer positive affec...
7       people agree computer make life le complicated...
1251    dear world changed much better technology beco...
661     technology growing changing rapidly look apple...
                              ...                        
599     dear know becoming reaching computer helpful m...
1599    although many people love computer palying vid...
1361    dear local newspaper think computer bad effect...
1547    dear local newspaper agree expert said compute...
863     dear local newspaper opinion computer everythi...
Name: preprocessed_text, Length: 1426, dtype: object

In [None]:
X_train['preprocessed_text'] = X_train['preprocessed_text'].apply(spell_corrector)

100%|█████████████████████████████████████████████████████████████████████████████| 170/170 [00:00<00:00, 21256.61it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 129/129 [00:00<00:00, 279.22it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 249/249 [00:02<00:00, 83.59it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 194/194 [00:00<00:00, 16142.50it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 211/211 [00:02<00:00, 76.20it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 191/191 [00:00<00:00, 645.24it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 140/140 [00:01<00:00, 72.95it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<?, ?it/s]
100%|███████████████████████████████████

100%|████████████████████████████████████████████████████████████████████████████████| 237/237 [00:02<00:00, 96.30it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 150/150 [00:00<00:00, 331.12it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 132/132 [00:01<00:00, 105.85it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 196/196 [00:03<00:00, 60.57it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 252.94it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 218/218 [00:00<00:00, 359.71it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 147/147 [00:00<00:00, 449.51it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 105/105 [00:00<00:00, 314.35it/s]
100%|███████████████████████████████████

100%|███████████████████████████████████████████████████████████████████████████████| 235/235 [00:01<00:00, 154.40it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 234/234 [00:00<00:00, 287.81it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 277/277 [00:00<00:00, 13850.51it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 145/145 [00:00<00:00, 152.15it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 294/294 [00:01<00:00, 209.55it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 124/124 [00:00<00:00, 327.18it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 181/181 [00:00<00:00, 485.22it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 178/178 [00:01<00:00, 170.66it/s]
100%|███████████████████████████████████

100%|█████████████████████████████████████████████████████████████████████████████| 191/191 [00:00<00:00, 47369.45it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 239/239 [00:00<00:00, 409.92it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 336/336 [00:00<00:00, 442.69it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 117/117 [00:00<00:00, 263.49it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 154/154 [00:00<00:00, 30579.12it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 174/174 [00:01<00:00, 138.86it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 109/109 [00:00<00:00, 293.01it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 264/264 [00:03<00:00, 66.60it/s]
100%|███████████████████████████████████

100%|███████████████████████████████████████████████████████████████████████████████| 189/189 [00:00<00:00, 354.61it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 155/155 [00:00<00:00, 277.28it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 196/196 [00:04<00:00, 45.92it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 281/281 [00:00<00:00, 975.69it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 168/168 [00:00<00:00, 27988.68it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 163/163 [00:00<00:00, 178.53it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 58/58 [00:00<00:00, 71.96it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 272/272 [00:01<00:00, 245.49it/s]
100%|███████████████████████████████████

100%|███████████████████████████████████████████████████████████████████████████████| 192/192 [00:00<00:00, 755.81it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 163/163 [00:01<00:00, 136.63it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 133/133 [00:01<00:00, 119.06it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 238/238 [00:00<00:00, 363.68it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 35/35 [00:00<00:00, 34994.19it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 164/164 [00:00<00:00, 683.34it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 88/88 [00:00<00:00, 8797.07it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 217/217 [00:01<00:00, 151.33it/s]
100%|███████████████████████████████████

100%|████████████████████████████████████████████████████████████████████████████████| 319/319 [00:03<00:00, 88.66it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 160/160 [00:00<00:00, 26515.81it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 206/206 [00:03<00:00, 64.74it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 12447.11it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 225/225 [00:01<00:00, 179.99it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 234/234 [00:00<00:00, 947.24it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 259/259 [00:01<00:00, 132.21it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 183/183 [00:00<00:00, 720.46it/s]
100%|███████████████████████████████████

100%|███████████████████████████████████████████████████████████████████████████████| 212/212 [00:01<00:00, 180.88it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 286/286 [00:00<00:00, 375.33it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 201/201 [00:01<00:00, 105.90it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 138/138 [00:00<00:00, 295.50it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 170/170 [00:00<00:00, 885.42it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 183/183 [00:00<00:00, 256.30it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 208/208 [00:02<00:00, 99.38it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 212/212 [00:00<00:00, 432.65it/s]
100%|███████████████████████████████████

100%|██████████████████████████████████████████████████████████████████████████████████| 75/75 [00:00<00:00, 82.51it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 198/198 [00:01<00:00, 107.37it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 106/106 [00:01<00:00, 86.04it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 177/177 [00:00<00:00, 16038.19it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 149/149 [00:00<00:00, 543.73it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 236/236 [00:00<00:00, 763.67it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 92/92 [00:01<00:00, 82.00it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 154/154 [00:00<00:00, 38159.32it/s]
100%|███████████████████████████████████

In [13]:
# This downloads the pre-trained weights from the huggingface website 
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
gpt_model = TFGPT2Model.from_pretrained('gpt2')
print(f"Total number of parameters: {gpt_model.count_params()}")

All model checkpoint layers were used when initializing TFGPT2Model.

All the layers of TFGPT2Model were initialized from the model checkpoint at gpt2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2Model for predictions without further training.


Total number of parameters: 124439808


### GPT-2 Architecture

#### Extracting GPT - 2 Embeddings

In [14]:
# This code can take about 5 - 10 minutes to run depending on the speed of the system
BATCH_SIZE = 16
MAX_LENGTH = 512

train_encodings = tokenizer(list(X_train['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), y_train)).batch(BATCH_SIZE)

test_encodings = tokenizer(list(X_test['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings), y_test)).batch(BATCH_SIZE)

embeddings_train = []
for batch in tqdm(train_dataset):
    embeddings_train.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_train = tf.concat(embeddings_train, axis=0)

embeddings_test = []
for batch in tqdm(test_dataset):
    embeddings_test.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_test = tf.concat(embeddings_test, axis=0)

100%|██████████████████████████████████████████████████████████████████████████████████| 90/90 [00:31<00:00,  2.86it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 23/23 [00:08<00:00,  2.85it/s]


In [15]:
print("-----------------------Logistic Regression-----------------------")
model = choose_classifiers("logistic_regression")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_logistic_reg, precision_logistic_reg, recall_logistic_reg, f1_logistic_reg, kappa_score_logistic_reg = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Random Forest Classifier-----------------------")
model = choose_classifiers("random_forest_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_random_forest, precision_random_forest, recall_random_forest, f1_random_forest, kappa_score_random_forest = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Adaboost Classifier-----------------------")
model = choose_classifiers("adaboost_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_adaboost, precision_adaboost, recall_adaboost, f1_adaboost, kappa_score_adaboost = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------K Neibhors Classifier-----------------------")
model = choose_classifiers("k_neighbors_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_k_neighbors, precision_k_neighbors, recall_k_neighbors, f1_k_neighbors, kappa_score_k_neighbors = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Support Vector Classifier-----------------------")
model = choose_classifiers("support_vector_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_svc, precision_svc, recall_svc, f1_svc, kappa_score_svc = print_metrics_function(y_test, y_predictions)


-----------------------Logistic Regression-----------------------
Accuracy: 0.49299719887955185
Precision: 0.23054102996944667
Recall: 0.2082108315072025
F1-Score: 0.20492996525170745
Cohen Kappa Score: 0.6590213968822238


-----------------------Random Forest Classifier-----------------------
Accuracy: 0.5126050420168067
Precision: 0.4506009842682005
Recall: 0.3833604701749863
F1-Score: 0.3921587458782141
Cohen Kappa Score: 0.7765472144800646


-----------------------Adaboost Classifier-----------------------
Accuracy: 0.45938375350140054
Precision: 0.2185760539183296
Recall: 0.3278125
F1-Score: 0.25365074963488804
Cohen Kappa Score: 0.6326407276805726


-----------------------K Neibhors Classifier-----------------------
Accuracy: 0.47619047619047616
Precision: 0.26849220112242855
Recall: 0.26487336252457216
F1-Score: 0.2603777231685342
Cohen Kappa Score: 0.7124564050804945


-----------------------Support Vector Classifier-----------------------
Accuracy: 0.3641456582633053
Precision

### Model with Metrics (Essay Set - 2)

In [16]:
df_essay_set = df[df.essay_set == 2]
X, y = dataset_preparation(df_essay_set)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2)

In [17]:
# This code can take about 5 - 10 minutes to run depending on the speed of the system
BATCH_SIZE = 16
MAX_LENGTH = 512

train_encodings = tokenizer(list(X_train['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), y_train)).batch(BATCH_SIZE)

test_encodings = tokenizer(list(X_test['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings), y_test)).batch(BATCH_SIZE)

embeddings_train = []
for batch in tqdm(train_dataset):
    embeddings_train.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_train = tf.concat(embeddings_train, axis=0)

embeddings_test = []
for batch in tqdm(test_dataset):
    embeddings_test.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_test = tf.concat(embeddings_test, axis=0)

100%|██████████████████████████████████████████████████████████████████████████████████| 90/90 [00:40<00:00,  2.20it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 23/23 [00:10<00:00,  2.12it/s]


In [18]:
print("-----------------------Logistic Regression-----------------------")
model = choose_classifiers("logistic_regression")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_logistic_reg, precision_logistic_reg, recall_logistic_reg, f1_logistic_reg, kappa_score_logistic_reg = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Random Forest Classifier-----------------------")
model = choose_classifiers("random_forest_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_random_forest, precision_random_forest, recall_random_forest, f1_random_forest, kappa_score_random_forest = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Adaboost Classifier-----------------------")
model = choose_classifiers("adaboost_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_adaboost, precision_adaboost, recall_adaboost, f1_adaboost, kappa_score_adaboost = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------K Neibhors Classifier-----------------------")
model = choose_classifiers("k_neighbors_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_k_neighbors, precision_k_neighbors, recall_k_neighbors, f1_k_neighbors, kappa_score_k_neighbors = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Support Vector Classifier-----------------------")
model = choose_classifiers("support_vector_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_svc, precision_svc, recall_svc, f1_svc, kappa_score_svc = print_metrics_function(y_test, y_predictions)


-----------------------Logistic Regression-----------------------
Accuracy: 0.6194444444444445
Precision: 0.41094814241486066
Recall: 0.36667658703559713
F1-Score: 0.37984665961887876
Cohen Kappa Score: 0.5450316337923334


-----------------------Random Forest Classifier-----------------------
Accuracy: 0.6694444444444444
Precision: 0.5061645779563455
Recall: 0.4837492738468689
F1-Score: 0.4685032399312341
Cohen Kappa Score: 0.6926229508196722


-----------------------Adaboost Classifier-----------------------
Accuracy: 0.6
Precision: 0.41165107971860415
Recall: 0.3218745028643948
F1-Score: 0.32709890668700736
Cohen Kappa Score: 0.37589285714285714


-----------------------K Neibhors Classifier-----------------------
Accuracy: 0.5777777777777777
Precision: 0.34395278167367926
Recall: 0.36121922227882497
F1-Score: 0.34850386276522294
Cohen Kappa Score: 0.5543478260869565


-----------------------Support Vector Classifier-----------------------
Accuracy: 0.43333333333333335
Precision: 0.

### Model with Metrics (Essay Set - 3)

In [19]:
df_essay_set = df[df.essay_set == 3]
X, y = dataset_preparation(df_essay_set)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2)

In [20]:
# This code can take about 5 - 10 minutes to run depending on the speed of the system
BATCH_SIZE = 16
MAX_LENGTH = 512

train_encodings = tokenizer(list(X_train['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), y_train)).batch(BATCH_SIZE)

test_encodings = tokenizer(list(X_test['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings), y_test)).batch(BATCH_SIZE)

embeddings_train = []
for batch in tqdm(train_dataset):
    embeddings_train.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_train = tf.concat(embeddings_train, axis=0)

embeddings_test = []
for batch in tqdm(test_dataset):
    embeddings_test.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_test = tf.concat(embeddings_test, axis=0)

100%|██████████████████████████████████████████████████████████████████████████████████| 87/87 [00:16<00:00,  5.28it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 22/22 [00:03<00:00,  6.82it/s]


In [21]:
print("-----------------------Logistic Regression-----------------------")
model = choose_classifiers("logistic_regression")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_logistic_reg, precision_logistic_reg, recall_logistic_reg, f1_logistic_reg, kappa_score_logistic_reg = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Random Forest Classifier-----------------------")
model = choose_classifiers("random_forest_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_random_forest, precision_random_forest, recall_random_forest, f1_random_forest, kappa_score_random_forest = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Adaboost Classifier-----------------------")
model = choose_classifiers("adaboost_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_adaboost, precision_adaboost, recall_adaboost, f1_adaboost, kappa_score_adaboost = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------K Neibhors Classifier-----------------------")
model = choose_classifiers("k_neighbors_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_k_neighbors, precision_k_neighbors, recall_k_neighbors, f1_k_neighbors, kappa_score_k_neighbors = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Support Vector Classifier-----------------------")
model = choose_classifiers("support_vector_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_svc, precision_svc, recall_svc, f1_svc, kappa_score_svc = print_metrics_function(y_test, y_predictions)


-----------------------Logistic Regression-----------------------
Accuracy: 0.43641618497109824
Precision: 0.37673337438423643
Recall: 0.37037280062214445
F1-Score: 0.3330595090183158
Cohen Kappa Score: 0.4345963792930363


-----------------------Random Forest Classifier-----------------------
Accuracy: 0.3872832369942196
Precision: 0.3157647871634095
Recall: 0.27538734474430016
F1-Score: 0.2596099368499933
Cohen Kappa Score: 0.23280317428561903


-----------------------Adaboost Classifier-----------------------
Accuracy: 0.407514450867052
Precision: 0.33401437853128707
Recall: 0.3434983287345492
F1-Score: 0.30227710337222224
Cohen Kappa Score: 0.2941236775198883


-----------------------K Neibhors Classifier-----------------------
Accuracy: 0.5086705202312138
Precision: 0.4049070847851336
Recall: 0.4052164419618488
F1-Score: 0.3918480470338056
Cohen Kappa Score: 0.5226543479617953


-----------------------Support Vector Classifier-----------------------
Accuracy: 0.37572254335260113
P

### Model with Metrics (Essay Set - 4)

In [22]:
df_essay_set = df[df.essay_set == 4]
X, y = dataset_preparation(df_essay_set)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2)

In [23]:
# This code can take about 5 - 10 minutes to run depending on the speed of the system
BATCH_SIZE = 16
MAX_LENGTH = 512

train_encodings = tokenizer(list(X_train['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), y_train)).batch(BATCH_SIZE)

test_encodings = tokenizer(list(X_test['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings), y_test)).batch(BATCH_SIZE)

embeddings_train = []
for batch in tqdm(train_dataset):
    embeddings_train.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_train = tf.concat(embeddings_train, axis=0)

embeddings_test = []
for batch in tqdm(test_dataset):
    embeddings_test.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_test = tf.concat(embeddings_test, axis=0)

100%|██████████████████████████████████████████████████████████████████████████████████| 89/89 [00:16<00:00,  5.26it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 23/23 [00:03<00:00,  6.23it/s]


In [24]:
print("-----------------------Logistic Regression-----------------------")
model = choose_classifiers("logistic_regression")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_logistic_reg, precision_logistic_reg, recall_logistic_reg, f1_logistic_reg, kappa_score_logistic_reg = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Random Forest Classifier-----------------------")
model = choose_classifiers("random_forest_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_random_forest, precision_random_forest, recall_random_forest, f1_random_forest, kappa_score_random_forest = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Adaboost Classifier-----------------------")
model = choose_classifiers("adaboost_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_adaboost, precision_adaboost, recall_adaboost, f1_adaboost, kappa_score_adaboost = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------K Neibhors Classifier-----------------------")
model = choose_classifiers("k_neighbors_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_k_neighbors, precision_k_neighbors, recall_k_neighbors, f1_k_neighbors, kappa_score_k_neighbors = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Support Vector Classifier-----------------------")
model = choose_classifiers("support_vector_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_svc, precision_svc, recall_svc, f1_svc, kappa_score_svc = print_metrics_function(y_test, y_predictions)


-----------------------Logistic Regression-----------------------
Accuracy: 0.556497175141243
Precision: 0.5724331759425006
Recall: 0.5359502914182354
F1-Score: 0.5263625256090638
Cohen Kappa Score: 0.6405354947406767


-----------------------Random Forest Classifier-----------------------
Accuracy: 0.5254237288135594
Precision: 0.4816507465963987
Recall: 0.5017769143163395
F1-Score: 0.46883739608454017
Cohen Kappa Score: 0.6446624411540384


-----------------------Adaboost Classifier-----------------------
Accuracy: 0.3785310734463277
Precision: 0.41673526917128173
Recall: 0.39499778924097273
F1-Score: 0.3289425000781391
Cohen Kappa Score: 0.33141457055576073


-----------------------K Neibhors Classifier-----------------------
Accuracy: 0.519774011299435
Precision: 0.5154162439981065
Recall: 0.49972800964694847
F1-Score: 0.5001461660123753
Cohen Kappa Score: 0.623744309417209


-----------------------Support Vector Classifier-----------------------
Accuracy: 0.3531073446327684
Precis

### Model with Metrics (Essay Set - 5)

In [25]:
df_essay_set = df[df.essay_set == 5]
X, y = dataset_preparation(df_essay_set)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2)

In [26]:
# This code can take about 5 - 10 minutes to run depending on the speed of the system
BATCH_SIZE = 16
MAX_LENGTH = 512

train_encodings = tokenizer(list(X_train['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), y_train)).batch(BATCH_SIZE)

test_encodings = tokenizer(list(X_test['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings), y_test)).batch(BATCH_SIZE)

embeddings_train = []
for batch in tqdm(train_dataset):
    embeddings_train.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_train = tf.concat(embeddings_train, axis=0)

embeddings_test = []
for batch in tqdm(test_dataset):
    embeddings_test.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_test = tf.concat(embeddings_test, axis=0)

100%|██████████████████████████████████████████████████████████████████████████████████| 91/91 [00:20<00:00,  4.49it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 23/23 [00:04<00:00,  5.49it/s]


In [27]:
print("-----------------------Logistic Regression-----------------------")
model = choose_classifiers("logistic_regression")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_logistic_reg, precision_logistic_reg, recall_logistic_reg, f1_logistic_reg, kappa_score_logistic_reg = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Random Forest Classifier-----------------------")
model = choose_classifiers("random_forest_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_random_forest, precision_random_forest, recall_random_forest, f1_random_forest, kappa_score_random_forest = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Adaboost Classifier-----------------------")
model = choose_classifiers("adaboost_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_adaboost, precision_adaboost, recall_adaboost, f1_adaboost, kappa_score_adaboost = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------K Neibhors Classifier-----------------------")
model = choose_classifiers("k_neighbors_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_k_neighbors, precision_k_neighbors, recall_k_neighbors, f1_k_neighbors, kappa_score_k_neighbors = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Support Vector Classifier-----------------------")
model = choose_classifiers("support_vector_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_svc, precision_svc, recall_svc, f1_svc, kappa_score_svc = print_metrics_function(y_test, y_predictions)


-----------------------Logistic Regression-----------------------
Accuracy: 0.4404432132963989
Precision: 0.4194384832593788
Recall: 0.38678125557879994
F1-Score: 0.3484857873247363
Cohen Kappa Score: 0.5515029948847507


-----------------------Random Forest Classifier-----------------------
Accuracy: 0.5373961218836565
Precision: 0.4398766930743675
Recall: 0.44080553104975906
F1-Score: 0.4043604655992235
Cohen Kappa Score: 0.6872329329430369


-----------------------Adaboost Classifier-----------------------
Accuracy: 0.3961218836565097
Precision: 0.2828282828282828
Recall: 0.2431957368965243
F1-Score: 0.19296081277213353
Cohen Kappa Score: 0.259543185599758


-----------------------K Neibhors Classifier-----------------------
Accuracy: 0.49584487534626037
Precision: 0.6141106389161309
Recall: 0.44044296273443473
F1-Score: 0.4437849682170759
Cohen Kappa Score: 0.6353022034894793


-----------------------Support Vector Classifier-----------------------
Accuracy: 0.3656509695290859
Prec

### Model with Metrics (Essay Set - 6)

In [28]:
df_essay_set = df[df.essay_set == 6]
X, y = dataset_preparation(df_essay_set)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2)

In [29]:
# This code can take about 5 - 10 minutes to run depending on the speed of the system
BATCH_SIZE = 16
MAX_LENGTH = 512

train_encodings = tokenizer(list(X_train['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), y_train)).batch(BATCH_SIZE)

test_encodings = tokenizer(list(X_test['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings), y_test)).batch(BATCH_SIZE)

embeddings_train = []
for batch in tqdm(train_dataset):
    embeddings_train.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_train = tf.concat(embeddings_train, axis=0)

embeddings_test = []
for batch in tqdm(test_dataset):
    embeddings_test.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_test = tf.concat(embeddings_test, axis=0)

100%|██████████████████████████████████████████████████████████████████████████████████| 90/90 [00:23<00:00,  3.79it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 23/23 [00:05<00:00,  3.89it/s]


In [30]:
print("-----------------------Logistic Regression-----------------------")
model = choose_classifiers("logistic_regression")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_logistic_reg, precision_logistic_reg, recall_logistic_reg, f1_logistic_reg, kappa_score_logistic_reg = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Random Forest Classifier-----------------------")
model = choose_classifiers("random_forest_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_random_forest, precision_random_forest, recall_random_forest, f1_random_forest, kappa_score_random_forest = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Adaboost Classifier-----------------------")
model = choose_classifiers("adaboost_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_adaboost, precision_adaboost, recall_adaboost, f1_adaboost, kappa_score_adaboost = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------K Neibhors Classifier-----------------------")
model = choose_classifiers("k_neighbors_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_k_neighbors, precision_k_neighbors, recall_k_neighbors, f1_k_neighbors, kappa_score_k_neighbors = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Support Vector Classifier-----------------------")
model = choose_classifiers("support_vector_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_svc, precision_svc, recall_svc, f1_svc, kappa_score_svc = print_metrics_function(y_test, y_predictions)


-----------------------Logistic Regression-----------------------
Accuracy: 0.325
Precision: 0.31789825840246005
Recall: 0.34965930986162197
F1-Score: 0.2508616525070798
Cohen Kappa Score: 0.4982526210683974


-----------------------Random Forest Classifier-----------------------
Accuracy: 0.5277777777777778
Precision: 0.3860048093838232
Recall: 0.41225746315341694
F1-Score: 0.3937333161177591
Cohen Kappa Score: 0.6431919381298337


-----------------------Adaboost Classifier-----------------------
Accuracy: 0.3
Precision: 0.21571137508168992
Recall: 0.277430373095113
F1-Score: 0.17919703832329753
Cohen Kappa Score: 0.38911613017961943


-----------------------K Neibhors Classifier-----------------------
Accuracy: 0.39166666666666666
Precision: 0.3215442880097593
Recall: 0.33740172159247306
F1-Score: 0.3048868533924695
Cohen Kappa Score: 0.5017269402681837


-----------------------Support Vector Classifier-----------------------
Accuracy: 0.48055555555555557
Precision: 0.096111111111111

### Model with Metrics (Essay Set - 7)

In [31]:
df_essay_set = df[df.essay_set == 7]
X, y = dataset_preparation(df_essay_set)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2)

In [32]:
# This code can take about 5 - 10 minutes to run depending on the speed of the system
BATCH_SIZE = 16
MAX_LENGTH = 512

train_encodings = tokenizer(list(X_train['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), y_train)).batch(BATCH_SIZE)

test_encodings = tokenizer(list(X_test['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings), y_test)).batch(BATCH_SIZE)

embeddings_train = []
for batch in tqdm(train_dataset):
    embeddings_train.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_train = tf.concat(embeddings_train, axis=0)

embeddings_test = []
for batch in tqdm(test_dataset):
    embeddings_test.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_test = tf.concat(embeddings_test, axis=0)

100%|██████████████████████████████████████████████████████████████████████████████████| 79/79 [00:27<00:00,  2.89it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:06<00:00,  3.02it/s]


In [33]:
print("-----------------------Logistic Regression-----------------------")
model = choose_classifiers("logistic_regression")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_logistic_reg, precision_logistic_reg, recall_logistic_reg, f1_logistic_reg, kappa_score_logistic_reg = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Random Forest Classifier-----------------------")
model = choose_classifiers("random_forest_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_random_forest, precision_random_forest, recall_random_forest, f1_random_forest, kappa_score_random_forest = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Adaboost Classifier-----------------------")
model = choose_classifiers("adaboost_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_adaboost, precision_adaboost, recall_adaboost, f1_adaboost, kappa_score_adaboost = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------K Neibhors Classifier-----------------------")
model = choose_classifiers("k_neighbors_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_k_neighbors, precision_k_neighbors, recall_k_neighbors, f1_k_neighbors, kappa_score_k_neighbors = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Support Vector Classifier-----------------------")
model = choose_classifiers("support_vector_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_svc, precision_svc, recall_svc, f1_svc, kappa_score_svc = print_metrics_function(y_test, y_predictions)


-----------------------Logistic Regression-----------------------
Accuracy: 0.054140127388535034
Precision: 0.016046205227196127
Recall: 0.07114367114367115
F1-Score: 0.022080132946922564
Cohen Kappa Score: 0.14571914799797436


-----------------------Random Forest Classifier-----------------------
Accuracy: 0.14012738853503184
Precision: 0.06272694039694061
Recall: 0.10255954332043979
F1-Score: 0.06288810719561673
Cohen Kappa Score: 0.5675649674665229


-----------------------Adaboost Classifier-----------------------
Accuracy: 0.14331210191082802
Precision: 0.028508753963247285
Recall: 0.13197492163009403
F1-Score: 0.04677015286315846
Cohen Kappa Score: 0.6142216068810089


-----------------------K Neibhors Classifier-----------------------
Accuracy: 0.08280254777070063
Precision: 0.017302274445131587
Recall: 0.07261997261997262
F1-Score: 0.02749636476192022
Cohen Kappa Score: 0.36737588539623656


-----------------------Support Vector Classifier-----------------------
Accuracy: 0.09

### Model with Metrics (Essay Set - 8)

In [34]:
df_essay_set = df[df.essay_set == 8]
X, y = dataset_preparation(df_essay_set)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2)

In [35]:
# This code can take about 5 - 10 minutes to run depending on the speed of the system
BATCH_SIZE = 16
MAX_LENGTH = 512

train_encodings = tokenizer(list(X_train['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), y_train)).batch(BATCH_SIZE)

test_encodings = tokenizer(list(X_test['preprocessed_text']), truncation=True, padding=True, max_length=MAX_LENGTH, return_tensors='tf')
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings), y_test)).batch(BATCH_SIZE)

embeddings_train = []
for batch in tqdm(train_dataset):
    embeddings_train.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_train = tf.concat(embeddings_train, axis=0)

embeddings_test = []
for batch in tqdm(test_dataset):
    embeddings_test.append(gpt_model(batch[0]['input_ids'])[0][:, -1, :])
embeddings_test = tf.concat(embeddings_test, axis=0)

100%|██████████████████████████████████████████████████████████████████████████████████| 37/37 [00:25<00:00,  1.44it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:10<00:00,  1.05s/it]


In [36]:
print("-----------------------Logistic Regression-----------------------")
model = choose_classifiers("logistic_regression")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_logistic_reg, precision_logistic_reg, recall_logistic_reg, f1_logistic_reg, kappa_score_logistic_reg = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Random Forest Classifier-----------------------")
model = choose_classifiers("random_forest_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_random_forest, precision_random_forest, recall_random_forest, f1_random_forest, kappa_score_random_forest = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Adaboost Classifier-----------------------")
model = choose_classifiers("adaboost_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_adaboost, precision_adaboost, recall_adaboost, f1_adaboost, kappa_score_adaboost = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------K Neibhors Classifier-----------------------")
model = choose_classifiers("k_neighbors_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_k_neighbors, precision_k_neighbors, recall_k_neighbors, f1_k_neighbors, kappa_score_k_neighbors = print_metrics_function(y_test, y_predictions)

print("\n")
print("-----------------------Support Vector Classifier-----------------------")
model = choose_classifiers("support_vector_classifier")
model.fit(embeddings_train, y_train)
y_predictions = model.predict(embeddings_test)
accuracy_svc, precision_svc, recall_svc, f1_svc, kappa_score_svc = print_metrics_function(y_test, y_predictions)


-----------------------Logistic Regression-----------------------
Accuracy: 0.18620689655172415
Precision: 0.011843270365997638
Recall: 0.036740558292282434
F1-Score: 0.017006802721088437
Cohen Kappa Score: 0.26708986312239635


-----------------------Random Forest Classifier-----------------------
Accuracy: 0.18620689655172415
Precision: 0.02910135841170324
Recall: 0.0432063477545047
F1-Score: 0.030471743295019157
Cohen Kappa Score: 0.4567809978816424


-----------------------Adaboost Classifier-----------------------
Accuracy: 0.19310344827586207
Precision: 0.0097985347985348
Recall: 0.03622742200328407
F1-Score: 0.014774882699411001
Cohen Kappa Score: 0.3125753314584171


-----------------------K Neibhors Classifier-----------------------
Accuracy: 0.11724137931034483
Precision: 0.04152399324813118
Recall: 0.04279983129566959
F1-Score: 0.04066094510119073
Cohen Kappa Score: 0.3170703293097833


-----------------------Support Vector Classifier-----------------------
Accuracy: 0.2
Pre