In [74]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from prettytable import PrettyTable

import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.optimizers import RMSprop, SGD, Adam
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score
from tensorflow.keras.layers import GlobalMaxPooling1D
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import cohen_kappa_score, f1_score

import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

In [23]:
df = pd.read_csv('ASAP Dataset/Preprocessed_df.csv')

In [24]:
df.head()

Unnamed: 0,essay_id,essay_set,essay,rater1_domain1,rater2_domain1,rater3_domain1,domain1_score,rater1_domain2,rater2_domain2,domain2_score,...,word_len,chars_len,avg_word_length,avg_sentence_length,pos_ratios,num_sentences,num_paragraphs,sentiment_polariy,sentiment_subjectivity,preprocessed_text
0,1,1,"Dear local newspaper, I think effects computer...",4,4,,8,,,,...,386,1875,3.984456,1.0,"{'NNP': 0.031088082901554404, 'JJ': 0.05181347...",16,1,0.310471,0.385613,dear local newspaper think effect computer peo...
1,2,1,"Dear @CAPS1 @CAPS2, I believe that using compu...",5,4,,9,,,,...,464,2288,4.030172,1.0,"{'NNP': 0.03879310344827586, ',': 0.0258620689...",20,1,0.274,0.613167,dear believe using computer benefit u many way...
2,3,1,"Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl...",4,3,,7,,,,...,313,1541,4.035144,1.0,"{'NNP': 0.04153354632587859, ',': 0.0287539936...",14,1,0.340393,0.498657,dear people use computer everyone agrees benef...
3,4,1,"Dear Local Newspaper, @CAPS1 I have found that...",5,5,,10,,,,...,611,3165,4.328969,1.0,"{'NNP': 0.11620294599018004, ',': 0.0212765957...",27,1,0.266828,0.441795,dear local newspaper found many expert say com...
4,5,1,"Dear @LOCATION1, I know having computers has a...",4,4,,8,,,,...,517,2569,4.071567,1.0,"{'NNP': 0.017408123791102514, ',': 0.025145067...",30,1,0.199684,0.485814,dear know computer positive effect people comp...


In [25]:
df = df.dropna(axis = 1, how = 'any')

In [26]:
drop_columns = ['essay_id', 'pos_ratios', 'essay', 'rater1_domain1', 'rater2_domain1']
df.drop(drop_columns, axis = 1, inplace = True)

In [27]:
def calculate_precision(y_true, y_pred, average='macro'):
    precision = precision_score(y_true, y_pred, average=average)
    return precision

def calculate_recall(y_true, y_pred, average='macro'):
    recall = recall_score(y_true, y_pred, average=average)
    return recall

def calculate_f1_score(y_true, y_pred, average='macro'):
    f1 = f1_score(y_true, y_pred, average=average)
    return f1

def calculate_cohen_kappa_score(y_true, y_pred):
    kappa_score = cohen_kappa_score(y_true, y_pred, weights = 'quadratic')
    return kappa_score

def calculate_accuracy(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    return accuracy

def print_metrics_function(y_actual, y_predictions):
    
    accuracy = calculate_accuracy(y_actual, y_predictions)
    precision = calculate_precision(y_actual, y_predictions)
    recall = calculate_recall(y_actual, y_predictions)
    f1 = calculate_f1_score(y_actual, y_predictions)
    kappa_score = calculate_cohen_kappa_score(y_actual, y_predictions)

    return accuracy, precision, recall, f1, kappa_score

In [29]:
def dataset_preparation(data, target = 'domain1_score'):
    
    X = data.drop([target], axis = 1)
    y = data[target]
    
    return X, y

In [1]:
def choose_classifiers(classifier_name = "logistic_regression"):
    
    if classifier_name == 'logistic_regression':
        return LogisticRegression()
    elif classifier_name == 'random_forest_classifier':
        return RandomForestClassifier()
    elif classifier_name == 'adaboost_classifier':
        return AdaBoostClassifier()
    elif classifier_name == 'k_neighbors_classifier':
        return KNeighborsClassifier()
    elif classifier_name == 'support_vector_classifier':
        return SVC()
    else:
        raise ValueError(f"Classifier {classifier_name} not supported for this problem.")

### Model with Metrics (Essay Set - 1)

In [31]:
df_essay_set = df[df.essay_set == 1]
X, y = dataset_preparation(df_essay_set)
y = to_categorical(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2,)

In [34]:
vocab_size = 15000
tokenizer = Tokenizer(num_words = vocab_size, oov_token = "<OOV>")
tokenizer.fit_on_texts(X_train['preprocessed_text'])
sequences_train = tokenizer.texts_to_sequences(X_train['preprocessed_text'])
sequences_test = tokenizer.texts_to_sequences(X_test['preprocessed_text'])

In [55]:
max_sequence_len = 750
padded_sequences_train = pad_sequences(sequences_train, maxlen = max_sequence_len,
                                      padding = 'post',
                                      truncating = 'post')
padded_sequences_test = pad_sequences(sequences_test, maxlen = max_sequence_len,
                                     padding = 'post', 
                                     truncating = 'post')

In [100]:
embedding_dim = 512
lstm_units = 4
epochs = 10
batch_size = 64
num_classes = len(y_train[0])

model = Sequential([Embedding(vocab_size, embedding_dim, input_length = max_sequence_len),
                   LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                   # Dropout(0.2),
                   # LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                    GlobalMaxPooling1D(),
                    Dense(10, activation = "relu"),
                   Dense(num_classes, activation = "softmax")])

optimizer = Adam(lr = 0.01)
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])

history = model.fit(padded_sequences_train, y_train, batch_size = batch_size, validation_data = (padded_sequences_test, y_test),
         epochs = epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [101]:
y_predictions = model.predict(padded_sequences_test)
y_predictions = np.argmax(y_predictions, axis = 1)
print_metrics_function(np.argmax(y_test, axis = 1), y_predictions)

Accuracy: 0.35014005602240894
Precision: 0.20081637532026358
Recall: 0.16341059142470432
F1-Score: 0.16172301284509846
Cohen Kappa Score: 0.40699451586802116


(0.35014005602240894,
 0.20081637532026358,
 0.16341059142470432,
 0.16172301284509846,
 0.40699451586802116)

### Model with Metrics (Essay Set - 2)

In [103]:
df_essay_set = df[df.essay_set == 2]
X, y = dataset_preparation(df_essay_set)
y = to_categorical(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2,)

In [104]:
vocab_size = 15000
tokenizer = Tokenizer(num_words = vocab_size, oov_token = "<OOV>")
tokenizer.fit_on_texts(X_train['preprocessed_text'])
sequences_train = tokenizer.texts_to_sequences(X_train['preprocessed_text'])
sequences_test = tokenizer.texts_to_sequences(X_test['preprocessed_text'])

In [105]:
max_sequence_len = 750
padded_sequences_train = pad_sequences(sequences_train, maxlen = max_sequence_len,
                                      padding = 'post',
                                      truncating = 'post')
padded_sequences_test = pad_sequences(sequences_test, maxlen = max_sequence_len,
                                     padding = 'post', 
                                     truncating = 'post')

In [106]:
embedding_dim = 512
lstm_units = 4
epochs = 10
batch_size = 64
num_classes = len(y_train[0])

model = Sequential([Embedding(vocab_size, embedding_dim, input_length = max_sequence_len),
                   LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                   # Dropout(0.2),
                   # LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                    GlobalMaxPooling1D(),
                    Dense(10, activation = "relu"),
                   Dense(num_classes, activation = "softmax")])

optimizer = Adam(lr = 0.01)
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])

history = model.fit(padded_sequences_train, y_train, batch_size = batch_size, validation_data = (padded_sequences_test, y_test),
         epochs = epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [107]:
y_predictions = model.predict(padded_sequences_test)
y_predictions = np.argmax(y_predictions, axis = 1)
print_metrics_function(np.argmax(y_test, axis = 1), y_predictions)

Accuracy: 0.49722222222222223
Precision: 0.201593827082411
Recall: 0.23339276617422314
F1-Score: 0.21575757575757573
Cohen Kappa Score: 0.2920835145715528


(0.49722222222222223,
 0.201593827082411,
 0.23339276617422314,
 0.21575757575757573,
 0.2920835145715528)

### Model with Metrics (Essay Set - 3)

In [108]:
df_essay_set = df[df.essay_set == 3]
X, y = dataset_preparation(df_essay_set)
y = to_categorical(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2,)

In [109]:
vocab_size = 15000
tokenizer = Tokenizer(num_words = vocab_size, oov_token = "<OOV>")
tokenizer.fit_on_texts(X_train['preprocessed_text'])
sequences_train = tokenizer.texts_to_sequences(X_train['preprocessed_text'])
sequences_test = tokenizer.texts_to_sequences(X_test['preprocessed_text'])

In [110]:
max_sequence_len = 750
padded_sequences_train = pad_sequences(sequences_train, maxlen = max_sequence_len,
                                      padding = 'post',
                                      truncating = 'post')
padded_sequences_test = pad_sequences(sequences_test, maxlen = max_sequence_len,
                                     padding = 'post', 
                                     truncating = 'post')

In [111]:
embedding_dim = 512
lstm_units = 4
epochs = 10
batch_size = 64
num_classes = len(y_train[0])

model = Sequential([Embedding(vocab_size, embedding_dim, input_length = max_sequence_len),
                   LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                   # Dropout(0.2),
                   # LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                    GlobalMaxPooling1D(),
                    Dense(10, activation = "relu"),
                   Dense(num_classes, activation = "softmax")])

optimizer = Adam(lr = 0.01)
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])

history = model.fit(padded_sequences_train, y_train, batch_size = batch_size, validation_data = (padded_sequences_test, y_test),
         epochs = epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [112]:
y_predictions = model.predict(padded_sequences_test)
y_predictions = np.argmax(y_predictions, axis = 1)
print_metrics_function(np.argmax(y_test, axis = 1), y_predictions)

Accuracy: 0.3786127167630058
Precision: 0.2841512551990493
Recall: 0.25842119628208865
F1-Score: 0.2026565961986403
Cohen Kappa Score: 0.0727168973036868


(0.3786127167630058,
 0.2841512551990493,
 0.25842119628208865,
 0.2026565961986403,
 0.0727168973036868)

### Model with Metrics (Essay Set - 4)

In [113]:
df_essay_set = df[df.essay_set == 4]
X, y = dataset_preparation(df_essay_set)
y = to_categorical(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2,)

In [114]:
vocab_size = 15000
tokenizer = Tokenizer(num_words = vocab_size, oov_token = "<OOV>")
tokenizer.fit_on_texts(X_train['preprocessed_text'])
sequences_train = tokenizer.texts_to_sequences(X_train['preprocessed_text'])
sequences_test = tokenizer.texts_to_sequences(X_test['preprocessed_text'])

In [115]:
max_sequence_len = 750
padded_sequences_train = pad_sequences(sequences_train, maxlen = max_sequence_len,
                                      padding = 'post',
                                      truncating = 'post')
padded_sequences_test = pad_sequences(sequences_test, maxlen = max_sequence_len,
                                     padding = 'post', 
                                     truncating = 'post')

In [116]:
embedding_dim = 512
lstm_units = 4
epochs = 10
batch_size = 64
num_classes = len(y_train[0])

model = Sequential([Embedding(vocab_size, embedding_dim, input_length = max_sequence_len),
                   LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                   # Dropout(0.2),
                   # LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                    GlobalMaxPooling1D(),
                    Dense(10, activation = "relu"),
                   Dense(num_classes, activation = "softmax")])

optimizer = Adam(lr = 0.01)
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])

history = model.fit(padded_sequences_train, y_train, batch_size = batch_size, validation_data = (padded_sequences_test, y_test),
         epochs = epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [117]:
y_predictions = model.predict(padded_sequences_test)
y_predictions = np.argmax(y_predictions, axis = 1)
print_metrics_function(np.argmax(y_test, axis = 1), y_predictions)

Accuracy: 0.5084745762711864
Precision: 0.3962862318840579
Recall: 0.44466403162055335
F1-Score: 0.41455028425474094
Cohen Kappa Score: 0.617012644533099


(0.5084745762711864,
 0.3962862318840579,
 0.44466403162055335,
 0.41455028425474094,
 0.617012644533099)

### Model with Metrics (Essay Set - 5)

In [118]:
df_essay_set = df[df.essay_set == 5]
X, y = dataset_preparation(df_essay_set)
y = to_categorical(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2,)

In [119]:
vocab_size = 15000
tokenizer = Tokenizer(num_words = vocab_size, oov_token = "<OOV>")
tokenizer.fit_on_texts(X_train['preprocessed_text'])
sequences_train = tokenizer.texts_to_sequences(X_train['preprocessed_text'])
sequences_test = tokenizer.texts_to_sequences(X_test['preprocessed_text'])

In [120]:
max_sequence_len = 750
padded_sequences_train = pad_sequences(sequences_train, maxlen = max_sequence_len,
                                      padding = 'post',
                                      truncating = 'post')
padded_sequences_test = pad_sequences(sequences_test, maxlen = max_sequence_len,
                                     padding = 'post', 
                                     truncating = 'post')

In [121]:
embedding_dim = 512
lstm_units = 4
epochs = 10
batch_size = 64
num_classes = len(y_train[0])

model = Sequential([Embedding(vocab_size, embedding_dim, input_length = max_sequence_len),
                   LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                   # Dropout(0.2),
                   # LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                    GlobalMaxPooling1D(),
                    Dense(10, activation = "relu"),
                   Dense(num_classes, activation = "softmax")])

optimizer = Adam(lr = 0.01)
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])

history = model.fit(padded_sequences_train, y_train, batch_size = batch_size, validation_data = (padded_sequences_test, y_test),
         epochs = epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [122]:
y_predictions = model.predict(padded_sequences_test)
y_predictions = np.argmax(y_predictions, axis = 1)
print_metrics_function(np.argmax(y_test, axis = 1), y_predictions)

Accuracy: 0.44321329639889195
Precision: 0.3655787735683871
Recall: 0.362267803966723
F1-Score: 0.35922234068950837
Cohen Kappa Score: 0.5484591182886587


(0.44321329639889195,
 0.3655787735683871,
 0.362267803966723,
 0.35922234068950837,
 0.5484591182886587)

### Model with Metrics (Essay Set - 6)

In [123]:
df_essay_set = df[df.essay_set == 6]
X, y = dataset_preparation(df_essay_set)
y = to_categorical(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2,)

In [124]:
vocab_size = 15000
tokenizer = Tokenizer(num_words = vocab_size, oov_token = "<OOV>")
tokenizer.fit_on_texts(X_train['preprocessed_text'])
sequences_train = tokenizer.texts_to_sequences(X_train['preprocessed_text'])
sequences_test = tokenizer.texts_to_sequences(X_test['preprocessed_text'])

In [125]:
max_sequence_len = 750
padded_sequences_train = pad_sequences(sequences_train, maxlen = max_sequence_len,
                                      padding = 'post',
                                      truncating = 'post')
padded_sequences_test = pad_sequences(sequences_test, maxlen = max_sequence_len,
                                     padding = 'post', 
                                     truncating = 'post')

In [126]:
embedding_dim = 512
lstm_units = 4
epochs = 10
batch_size = 64
num_classes = len(y_train[0])

model = Sequential([Embedding(vocab_size, embedding_dim, input_length = max_sequence_len),
                   LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                   # Dropout(0.2),
                   # LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                    GlobalMaxPooling1D(),
                    Dense(10, activation = "relu"),
                   Dense(num_classes, activation = "softmax")])

optimizer = Adam(lr = 0.01)
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])

history = model.fit(padded_sequences_train, y_train, batch_size = batch_size, validation_data = (padded_sequences_test, y_test),
         epochs = epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [127]:
y_predictions = model.predict(padded_sequences_test)
y_predictions = np.argmax(y_predictions, axis = 1)
print_metrics_function(np.argmax(y_test, axis = 1), y_predictions)

Accuracy: 0.4444444444444444
Precision: 0.22929596719070405
Recall: 0.24350686885368966
F1-Score: 0.2341949175676111
Cohen Kappa Score: 0.2466666666666667


(0.4444444444444444,
 0.22929596719070405,
 0.24350686885368966,
 0.2341949175676111,
 0.2466666666666667)

### Model with Metrics (Essay Set - 7)

In [128]:
df_essay_set = df[df.essay_set == 7]
X, y = dataset_preparation(df_essay_set)
y = to_categorical(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2,)

In [129]:
vocab_size = 15000
tokenizer = Tokenizer(num_words = vocab_size, oov_token = "<OOV>")
tokenizer.fit_on_texts(X_train['preprocessed_text'])
sequences_train = tokenizer.texts_to_sequences(X_train['preprocessed_text'])
sequences_test = tokenizer.texts_to_sequences(X_test['preprocessed_text'])

In [130]:
max_sequence_len = 750
padded_sequences_train = pad_sequences(sequences_train, maxlen = max_sequence_len,
                                      padding = 'post',
                                      truncating = 'post')
padded_sequences_test = pad_sequences(sequences_test, maxlen = max_sequence_len,
                                     padding = 'post', 
                                     truncating = 'post')

In [131]:
embedding_dim = 512
lstm_units = 4
epochs = 10
batch_size = 64
num_classes = len(y_train[0])

model = Sequential([Embedding(vocab_size, embedding_dim, input_length = max_sequence_len),
                   LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                   # Dropout(0.2),
                   # LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                    GlobalMaxPooling1D(),
                    Dense(10, activation = "relu"),
                   Dense(num_classes, activation = "softmax")])

optimizer = Adam(lr = 0.01)
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])

history = model.fit(padded_sequences_train, y_train, batch_size = batch_size, validation_data = (padded_sequences_test, y_test),
         epochs = epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [132]:
y_predictions = model.predict(padded_sequences_test)
y_predictions = np.argmax(y_predictions, axis = 1)
print_metrics_function(np.argmax(y_test, axis = 1), y_predictions)

Accuracy: 0.09872611464968153
Precision: 0.01921364667907223
Recall: 0.04673705759287174
F1-Score: 0.026198926200366585
Cohen Kappa Score: -0.0004539280368134335


(0.09872611464968153,
 0.01921364667907223,
 0.04673705759287174,
 0.026198926200366585,
 -0.0004539280368134335)

### Model with Metrics (Essay Set - 8)

In [133]:
df_essay_set = df[df.essay_set == 8]
X, y = dataset_preparation(df_essay_set)
y = to_categorical(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = True, 
                                                    random_state = 101, test_size = 0.2,)

In [134]:
vocab_size = 15000
tokenizer = Tokenizer(num_words = vocab_size, oov_token = "<OOV>")
tokenizer.fit_on_texts(X_train['preprocessed_text'])
sequences_train = tokenizer.texts_to_sequences(X_train['preprocessed_text'])
sequences_test = tokenizer.texts_to_sequences(X_test['preprocessed_text'])

In [135]:
max_sequence_len = 750
padded_sequences_train = pad_sequences(sequences_train, maxlen = max_sequence_len,
                                      padding = 'post',
                                      truncating = 'post')
padded_sequences_test = pad_sequences(sequences_test, maxlen = max_sequence_len,
                                     padding = 'post', 
                                     truncating = 'post')

In [136]:
embedding_dim = 512
lstm_units = 4
epochs = 10
batch_size = 64
num_classes = len(y_train[0])

model = Sequential([Embedding(vocab_size, embedding_dim, input_length = max_sequence_len),
                   LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                   # Dropout(0.2),
                   # LSTM(lstm_units, dropout = 0.2, return_sequences = True),
                    GlobalMaxPooling1D(),
                    Dense(10, activation = "relu"),
                   Dense(num_classes, activation = "softmax")])

optimizer = Adam(lr = 0.01)
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])

history = model.fit(padded_sequences_train, y_train, batch_size = batch_size, validation_data = (padded_sequences_test, y_test),
         epochs = epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [137]:
y_predictions = model.predict(padded_sequences_test)
y_predictions = np.argmax(y_predictions, axis = 1)
print_metrics_function(np.argmax(y_test, axis = 1), y_predictions)

Accuracy: 0.14482758620689656
Precision: 0.010169977081741789
Recall: 0.03807471264367816
F1-Score: 0.015683962264150943
Cohen Kappa Score: 0.21483088294368113


(0.14482758620689656,
 0.010169977081741789,
 0.03807471264367816,
 0.015683962264150943,
 0.21483088294368113)