In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import drive
from keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense, Dropout, Embedding
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import metrics
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
import tensorflow as tf
import seaborn as sns
from tensorflow.keras.utils import to_categorical
from gc import callbacks
import os
from tensorflow import keras
from keras.models import Sequential, load_model
from sklearn.metrics import classification_report

In [None]:
drive.mount('/content/drive')

X_train = np.genfromtxt('/content/drive/My Drive/thesis/data/processed_data/tokenized/X_train.csv', delimiter=',')
X_val = np.genfromtxt('/content/drive/My Drive/thesis/data/processed_data/tokenized/X_val.csv', delimiter=',')
X_test = np.genfromtxt('/content/drive/My Drive/thesis/data/processed_data/tokenized/X_test.csv', delimiter=',')

Y_train = np.genfromtxt('/content/drive/My Drive/thesis/data/processed_data/tokenized/Y_train.csv', delimiter=',')
Y_val = np.genfromtxt('/content/drive/My Drive/thesis/data/processed_data/tokenized/Y_val.csv', delimiter=',')
Y_test = np.genfromtxt('/content/drive/My Drive/thesis/data/processed_data/tokenized/Y_test.csv', delimiter=',')

print(X_train.shape,Y_train.shape)
print(X_val.shape,Y_val.shape)
print(X_test.shape,Y_test.shape)

Mounted at /content/drive
(291, 47) (291, 3)
(98, 47) (98, 3)
(130, 47) (130, 3)


In [None]:
from sklearn.utils import class_weight
labels =  [np.where(r==1)[0][0] for r in Y_train]
class_weights = compute_class_weight(
                                        class_weight = "balanced",
                                        classes = np.unique(labels),
                                        y = labels                                                 
                                    )
class_weights = dict(zip(np.unique(labels), class_weights))
print(class_weights)

{0: 0.7293233082706767, 1: 3.3448275862068964, 2: 0.751937984496124}


In [None]:
checkpoint_path = "/content/drive/My Drive/thesis/code/saved_detailed_model/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

def fit(classifier,epochs, batch_size,monitor = 'val_loss', verbose = 'auto'):
  history = classifier.fit(
    x = X_train, 
    y = Y_train, 
    epochs=epochs, 
    shuffle = False,
    batch_size=batch_size,
    validation_data=(X_val, Y_val),
    verbose = verbose,
    class_weight=class_weights, 
    callbacks=[ModelCheckpoint(filepath=checkpoint_path, monitor=monitor,mode='max',save_best_only=True)]
    )
  return history

def evaluate(classifier):
  accr = classifier.evaluate(X_test,Y_test)
  print('Test set\n  Loss: {:0.3f}\n  Accuracy: {:0.3f}'.format(accr[0],accr[1]))
  return accr

# d1

In [None]:
classifier = Sequential()
classifier.add(Embedding(332, 128, input_length = X_train.shape[1]))
classifier.add(Dropout(0.4))
classifier.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
classifier.add(Dropout(0.4))
classifier.add(Dense(32))
classifier.add(Dropout(0.4))
classifier.add(Dense(3, activation='softmax')) 
classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=[metrics.categorical_accuracy])  

epochs = 50
batch_size = 128
history = fit(classifier, epochs, batch_size, 'val_categorical_accuracy',  0)

In [None]:
best_cassifier = load_model(checkpoint_path)
evaluate(best_cassifier)

Y_predict = classifier.predict(X_test)
Y_predict = to_categorical(np.argmax(Y_predict, axis=1), 3)

target_names = ["negative" , "neutral" , "positive"]
print(classification_report(Y_test, Y_predict, target_names = target_names))

Test set
  Loss: 0.667
  Accuracy: 0.677
              precision    recall  f1-score   support

    negative       0.72      0.57      0.64        60
     neutral       0.31      0.31      0.31        13
    positive       0.66      0.81      0.72        57

   micro avg       0.65      0.65      0.65       130
   macro avg       0.56      0.56      0.56       130
weighted avg       0.65      0.65      0.64       130
 samples avg       0.65      0.65      0.65       130



# d 2

In [None]:
classifier = Sequential()
classifier.add(Embedding(332, 64, input_length = X_train.shape[1]))
classifier.add(Dropout(0.4))
classifier.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
classifier.add(Dropout(0.4))
classifier.add(Dense(32))
classifier.add(Dropout(0.4))
classifier.add(Dense(3, activation='softmax')) 
classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=[metrics.categorical_accuracy])  

epochs = 50
batch_size = 128
history = fit(classifier, epochs, batch_size, 'val_categorical_accuracy',  0)

In [None]:
best_cassifier = load_model(checkpoint_path)
evaluate(best_cassifier)

Y_predict = classifier.predict(X_test)
Y_predict = to_categorical(np.argmax(Y_predict, axis=1), 3)

target_names = ["negative" , "neutral" , "positive"]
print(classification_report(Y_test, Y_predict, target_names = target_names))

Test set
  Loss: 0.749
  Accuracy: 0.700
              precision    recall  f1-score   support

    negative       0.76      0.65      0.70        60
     neutral       0.42      0.38      0.40        13
    positive       0.69      0.81      0.74        57

   micro avg       0.69      0.69      0.69       130
   macro avg       0.62      0.61      0.61       130
weighted avg       0.70      0.69      0.69       130
 samples avg       0.69      0.69      0.69       130



#d 3

In [None]:
classifier = Sequential()
classifier.add(Embedding(332, 64, input_length = X_train.shape[1]))
classifier.add(Dropout(0.4))
classifier.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
classifier.add(Dropout(0.4))
classifier.add(Dense(32))
classifier.add(Dropout(0.4))
classifier.add(Dense(3, activation='softmax')) 
classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=[metrics.categorical_accuracy])  

epochs = 50
batch_size = 128
history = fit(classifier, epochs, batch_size, 'val_categorical_accuracy',  0)

In [None]:
best_cassifier = load_model(checkpoint_path)
evaluate(best_cassifier)

Y_predict = classifier.predict(X_test)
Y_predict = to_categorical(np.argmax(Y_predict, axis=1), 3)

target_names = ["negative" , "neutral" , "positive"]
print(classification_report(Y_test, Y_predict, target_names = target_names))

Test set
  Loss: 0.823
  Accuracy: 0.677
              precision    recall  f1-score   support

    negative       0.76      0.63      0.69        60
     neutral       0.22      0.15      0.18        13
    positive       0.68      0.84      0.75        57

   micro avg       0.68      0.68      0.68       130
   macro avg       0.55      0.54      0.54       130
weighted avg       0.67      0.68      0.67       130
 samples avg       0.68      0.68      0.68       130



#d 4

In [None]:
classifier = Sequential()
classifier.add(Embedding(332, 64, input_length = X_train.shape[1]))
classifier.add(LSTM(128, recurrent_dropout=0.2))
classifier.add(Dense(32))
classifier.add(Dense(3, activation='softmax')) 
classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=[metrics.categorical_accuracy])  

epochs = 50
batch_size = 128
history = fit(classifier, epochs, batch_size, 'val_categorical_accuracy',  0)

In [None]:
best_cassifier = load_model(checkpoint_path)
evaluate(best_cassifier)

Y_predict = classifier.predict(X_test)
Y_predict = to_categorical(np.argmax(Y_predict, axis=1), 3)

target_names = ["negative" , "neutral" , "positive"]
print(classification_report(Y_test, Y_predict, target_names = target_names))

Test set
  Loss: 0.677
  Accuracy: 0.654
              precision    recall  f1-score   support

    negative       0.80      0.53      0.64        60
     neutral       0.29      0.31      0.30        13
    positive       0.62      0.82      0.71        57

   micro avg       0.64      0.64      0.64       130
   macro avg       0.57      0.56      0.55       130
weighted avg       0.67      0.64      0.63       130
 samples avg       0.64      0.64      0.64       130



#d 5

In [None]:
classifier = Sequential()
classifier.add(Embedding(332, 64, input_length = X_train.shape[1]))
classifier.add(Dropout(0.4))
classifier.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
classifier.add(Dropout(0.4))
classifier.add(Dense(64))
classifier.add(Dropout(0.4))
classifier.add(Dense(32))
classifier.add(Dropout(0.4))
classifier.add(Dense(3, activation='softmax')) 
classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=[metrics.categorical_accuracy])  

epochs = 50
batch_size = 128
history = fit(classifier, epochs, batch_size, 'val_categorical_accuracy',  0)

In [None]:
best_cassifier = load_model(checkpoint_path)
evaluate(best_cassifier)

Y_predict = classifier.predict(X_test)
Y_predict = to_categorical(np.argmax(Y_predict, axis=1), 3)

target_names = ["negative" , "neutral" , "positive"]
print(classification_report(Y_test, Y_predict, target_names = target_names))

Test set
  Loss: 0.506
  Accuracy: 0.692
              precision    recall  f1-score   support

    negative       0.73      0.55      0.63        60
     neutral       0.30      0.46      0.36        13
    positive       0.71      0.81      0.75        57

   micro avg       0.65      0.65      0.65       130
   macro avg       0.58      0.61      0.58       130
weighted avg       0.68      0.65      0.66       130
 samples avg       0.65      0.65      0.65       130



#d 6

In [25]:
classifier = Sequential()
classifier.add(Embedding(332, 64, input_length = X_train.shape[1]))
classifier.add(Dropout(0.4))
classifier.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
classifier.add(Dropout(0.4))
classifier.add(Dense(3, activation='softmax')) 
classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=[metrics.categorical_accuracy])  

epochs = 50
batch_size = 128
history = fit(classifier, epochs, batch_size, 'val_categorical_accuracy',  0)

In [26]:
best_cassifier = load_model(checkpoint_path)
evaluate(best_cassifier)

Y_predict = classifier.predict(X_test)
Y_predict = to_categorical(np.argmax(Y_predict, axis=1), 3)

target_names = ["negative" , "neutral" , "positive"]
print(classification_report(Y_test, Y_predict, target_names = target_names))

Test set
  Loss: 0.492
  Accuracy: 0.685
              precision    recall  f1-score   support

    negative       0.76      0.58      0.66        60
     neutral       0.21      0.23      0.22        13
    positive       0.67      0.82      0.74        57

   micro avg       0.65      0.65      0.65       130
   macro avg       0.55      0.55      0.54       130
weighted avg       0.67      0.65      0.65       130
 samples avg       0.65      0.65      0.65       130

