<h3>Model tests</h3>

>KNN test

In [1]:
import pickle
import pandas as pd
from sklearn.metrics import classification_report
import numpy as np
from keras.models import model_from_json
import os
from keras.models import load_model
from tensorflow.keras.layers import TextVectorization
from keras.preprocessing.sequence import pad_sequences

In [2]:
test_data = pd.read_csv('dataset/test_preprocessed.csv')
X_test = test_data['text']
Y_test = test_data['class']

In [3]:
loaded_model = pickle.load(open("models/KNN.sav", 'rb'))

print(classification_report(Y_test,loaded_model.predict(X_test)))

              precision    recall  f1-score   support

           0       0.63      0.49      0.55       744
           1       0.81      0.80      0.80       139
           2       0.80      0.90      0.85      2329
           3       0.70      0.50      0.58       105
           4       0.67      0.80      0.73       172
           5       0.79      0.82      0.81      1381
           6       0.71      0.58      0.64       477
           7       0.78      0.71      0.74       205
           8       0.86      0.70      0.77       236
           9       0.94      0.85      0.89        20

    accuracy                           0.77      5808
   macro avg       0.77      0.71      0.74      5808
weighted avg       0.77      0.77      0.77      5808



In [4]:
SVM_X_test=pickle.load(open("models/SVM_Vectorizer.pk", 'rb')).transform(X_test)
loaded_model = pickle.load(open("models/SVM.sav", 'rb'))

print(classification_report(Y_test,loaded_model.predict(SVM_X_test)))

  SVM_X_test=pickle.load(open("models/SVM_Vectorizer.pk", 'rb')).transform(X_test)
  loaded_model = pickle.load(open("models/SVM.sav", 'rb'))


              precision    recall  f1-score   support

           0       0.71      0.61      0.66       744
           1       0.88      0.76      0.81       139
           2       0.82      0.93      0.87      2329
           3       0.77      0.50      0.61       105
           4       0.82      0.78      0.80       172
           5       0.85      0.86      0.85      1381
           6       0.78      0.65      0.71       477
           7       0.88      0.71      0.79       205
           8       0.92      0.75      0.83       236
           9       1.00      1.00      1.00        20

    accuracy                           0.82      5808
   macro avg       0.84      0.76      0.79      5808
weighted avg       0.82      0.82      0.81      5808



In [5]:
with open('models/LSTM_Tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

MAX_SEQUENCE_LENGTH = 100
X_test_transformed = tokenizer.texts_to_sequences(X_test.values)
X_test_transformed = pad_sequences(X_test_transformed, maxlen=MAX_SEQUENCE_LENGTH)
loaded_model = load_model('models/LSTM.h5')

predicted = loaded_model.predict(X_test_transformed)
Y_predict = np.argmax(predicted, axis=1)

print(classification_report(Y_test,Y_predict))

              precision    recall  f1-score   support

           0       0.66      0.59      0.62       744
           1       0.85      0.78      0.82       139
           2       0.85      0.86      0.86      2329
           3       0.62      0.48      0.54       105
           4       0.75      0.73      0.74       172
           5       0.81      0.84      0.82      1381
           6       0.65      0.70      0.68       477
           7       0.76      0.80      0.78       205
           8       0.83      0.76      0.79       236
           9       0.94      0.75      0.83        20

    accuracy                           0.79      5808
   macro avg       0.77      0.73      0.75      5808
weighted avg       0.79      0.79      0.79      5808



In [6]:
from_disk = pickle.load(open("models/LSTM_GloVe_Vectorizer.pkl", "rb"))
vectorizer = TextVectorization.from_config(from_disk['config'])
vectorizer.set_weights(from_disk['weights'])

X_test_transformed = vectorizer(np.array([[s] for s in X_test])).numpy()

loaded_model = load_model('models/LSTM_GloVe_keras.h5')

predicted = loaded_model.predict(X_test_transformed)
Y_predict = np.argmax(predicted, axis=1)

print(classification_report(Y_test,Y_predict))

              precision    recall  f1-score   support

           0       0.69      0.58      0.63       744
           1       0.88      0.74      0.80       139
           2       0.80      0.88      0.84      2329
           3       0.75      0.40      0.52       105
           4       0.84      0.80      0.82       172
           5       0.79      0.83      0.81      1381
           6       0.71      0.67      0.69       477
           7       0.81      0.71      0.75       205
           8       0.86      0.69      0.76       236
           9       1.00      0.70      0.82        20

    accuracy                           0.79      5808
   macro avg       0.81      0.70      0.75      5808
weighted avg       0.78      0.79      0.78      5808



In [7]:
loaded_model = pickle.load(open("models/BERT.sav", 'rb'))
predicted = loaded_model.predict(list(X_test))

Y_predict = np.array(predicted[1])
Y_predict = np.argmax(Y_predict, axis=1)

print(classification_report(Y_test, Y_predict))

  0%|          | 0/5808 [00:00<?, ?it/s]

  0%|          | 0/726 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.71      0.68      0.70       744
           1       0.89      0.79      0.84       139
           2       0.85      0.90      0.88      2329
           3       0.82      0.62      0.71       105
           4       0.85      0.77      0.80       172
           5       0.85      0.86      0.85      1381
           6       0.75      0.69      0.72       477
           7       0.89      0.79      0.84       205
           8       0.88      0.81      0.84       236
           9       1.00      0.95      0.97        20

    accuracy                           0.83      5808
   macro avg       0.85      0.79      0.81      5808
weighted avg       0.83      0.83      0.83      5808

