In [9]:
from MyNLPToolBox import FilePickling as FP
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
import numpy as np

So in this experiment we're gonna fine-tune the best model we got from the experiment 3, which Logistic Regression with Augmented TFIDF Embedder, without removing special characters & stopwords in the corpus.

# Load the pre-embedded dataset

In [3]:
x_train = FP.load_obj('x_train_aug')
x_test = FP.load_obj('x_test_aug')
y_train = FP.load_obj('y_train')
y_test = FP.load_obj('y_test')

loaded obj/x_train_best.pkl
loaded obj/x_test_best.pkl
loaded obj/y_train.pkl
loaded obj/y_test.pkl


# Re-Perform Logistic Regression

In [6]:
model_lr = LogisticRegression().fit(x_train,y_train)
y_test_pred = model_lr.predict(x_test)
print('====\nLOGISTIC REGRESSION')
print('Accuracy: ', accuracy_score(y_test,y_test_pred))
print('Precision Score: ', precision_score(y_test,y_test_pred))
print('Recall Score: ', recall_score(y_test,y_test_pred))
print('F1 Score: ', f1_score(y_test,y_test_pred))

====
LOGISTIC REGRESSION
Accuracy:  0.8373826203607095
Precision Score:  0.829688060236644
Recall Score:  0.7897610921501707
F1 Score:  0.8092323832837909


Alright! The accuracy and recall score are quite high. However, now we're gonna finetune this model to make it even better!

# Hyperparameter Tuning
As sklearn use the threshold of 0.5, now we're gonna try different threshold to see what if the perfomance is gained on the test set

In [44]:
thresholds = np.arange(0.1,0.9,0.1) # All threshold from 0.1 -> 0.9 with step of 0.1
for thresh in thresholds:
    print('THRESHOLD ',thresh)
    y_test_pred = [1  if prob[1] > thresh else 0 for prob in model_lr.predict_proba(x_test)]
    print('Accuracy: ', accuracy_score(y_test,y_test_pred))
    print('Precision Score: ', precision_score(y_test,y_test_pred))
    print('Recall Score: ', recall_score(y_test,y_test_pred))
    print('F1 Score: ', f1_score(y_test,y_test_pred))
    print('---')

THRESHOLD  0.1
Accuracy:  0.8193471456252794
Precision Score:  0.7440340909090909
Recall Score:  0.8938566552901024
F1 Score:  0.812093023255814
---
THRESHOLD  0.2
Accuracy:  0.8303771053808318
Precision Score:  0.7760320394331485
Recall Score:  0.8597269624573379
F1 Score:  0.8157383419689119
---
THRESHOLD  0.30000000000000004
Accuracy:  0.8339543896258756
Precision Score:  0.7948051948051948
Recall Score:  0.8354948805460751
F1 Score:  0.8146422628951748
---
THRESHOLD  0.4
Accuracy:  0.8367864063198688
Precision Score:  0.8128196385952949
Recall Score:  0.8136518771331058
F1 Score:  0.8132355449428619
---
THRESHOLD  0.5
Accuracy:  0.8373826203607095
Precision Score:  0.829688060236644
Recall Score:  0.7897610921501707
F1 Score:  0.8092323832837909
---
THRESHOLD  0.6
Accuracy:  0.834252496646296
Precision Score:  0.8414725770097671
Recall Score:  0.764505119453925
F1 Score:  0.8011444921316166
---
THRESHOLD  0.7000000000000001
Accuracy:  0.8339543896258756
Precision Score:  0.85917721

As we can see, the hyperparam is somewhere between 0.4 -> 0.5, we're going to investigate this range more specifically :D 

In [45]:
thresholds = np.arange(0.4,0.5,0.01) # All threshold from 0.1 -> 0.9 with step of 0.1
for thresh in thresholds:
    print('THRESHOLD ',thresh)
    y_test_pred = [1  if prob[1] > thresh else 0 for prob in model_lr.predict_proba(x_test)]
    print('Accuracy: ', accuracy_score(y_test,y_test_pred))
    print('Precision Score: ', precision_score(y_test,y_test_pred))
    print('Recall Score: ', recall_score(y_test,y_test_pred))
    print('F1 Score: ', f1_score(y_test,y_test_pred))
    print('---')

THRESHOLD  0.4
Accuracy:  0.8367864063198688
Precision Score:  0.8128196385952949
Recall Score:  0.8136518771331058
F1 Score:  0.8132355449428619
---
THRESHOLD  0.41000000000000003
Accuracy:  0.8367864063198688
Precision Score:  0.8147512864493996
Recall Score:  0.810580204778157
F1 Score:  0.8126603934987169
---
THRESHOLD  0.42000000000000004
Accuracy:  0.836935459830079
Precision Score:  0.8161157024793388
Recall Score:  0.8088737201365188
F1 Score:  0.8124785738772712
---
THRESHOLD  0.43000000000000005
Accuracy:  0.8367864063198688
Precision Score:  0.8173642338291248
Recall Score:  0.8064846416382253
F1 Score:  0.8118879917539942
---
THRESHOLD  0.44000000000000006
Accuracy:  0.8372335668504993
Precision Score:  0.8193189715079917
Recall Score:  0.8047781569965871
F1 Score:  0.8119834710743801
---
THRESHOLD  0.45000000000000007
Accuracy:  0.8373826203607095
Precision Score:  0.8209424083769633
Recall Score:  0.8027303754266212
F1 Score:  0.8117342536669543
---
THRESHOLD  0.460000000

As we can see, the hyperparam is somewhere between 0.46 -> 0.48, we're going to investigate this range more specifically :D 

In [46]:
thresholds = np.arange(0.46,0.48,0.001) # All threshold from 0.1 -> 0.9 with step of 0.1
for thresh in thresholds:
    print('THRESHOLD ',thresh)
    y_test_pred = [1  if prob[1] > thresh else 0 for prob in model_lr.predict_proba(x_test)]
    print('Accuracy: ', accuracy_score(y_test,y_test_pred))
    print('Precision Score: ', precision_score(y_test,y_test_pred))
    print('Recall Score: ', recall_score(y_test,y_test_pred))
    print('F1 Score: ', f1_score(y_test,y_test_pred))
    print('---')

THRESHOLD  0.46
Accuracy:  0.8376807273811299
Precision Score:  0.8240056318197818
Recall Score:  0.798976109215017
F1 Score:  0.8112978686536129
---
THRESHOLD  0.461
Accuracy:  0.8379788344015502
Precision Score:  0.8245861218738992
Recall Score:  0.798976109215017
F1 Score:  0.81157912983186
---
THRESHOLD  0.462
Accuracy:  0.8379788344015502
Precision Score:  0.8245861218738992
Recall Score:  0.798976109215017
F1 Score:  0.81157912983186
---
THRESHOLD  0.463
Accuracy:  0.83782978089134
Precision Score:  0.8245243128964059
Recall Score:  0.7986348122866894
F1 Score:  0.8113730929264911
---
THRESHOLD  0.464
Accuracy:  0.83782978089134
Precision Score:  0.8245243128964059
Recall Score:  0.7986348122866894
F1 Score:  0.8113730929264911
---
THRESHOLD  0.465
Accuracy:  0.8379788344015502
Precision Score:  0.824814945364822
Recall Score:  0.7986348122866894
F1 Score:  0.8115137853303278
---
THRESHOLD  0.466
Accuracy:  0.83782978089134
Precision Score:  0.82475317348378
Recall Score:  0.7982

As we can see, the hyperparam is somewhere between 0.47 -> 0.472, we're going to investigate this range more specifically :D 

In [48]:
thresholds = np.arange(0.47,0.472,0.0001) # All threshold from 0.1 -> 0.9 with step of 0.1
for thresh in thresholds:
    print('THRESHOLD ',thresh)
    y_test_pred = [1  if prob[1] > thresh else 0 for prob in model_lr.predict_proba(x_test)]
    print('Accuracy: ', accuracy_score(y_test,y_test_pred))
    print('Precision Score: ', precision_score(y_test,y_test_pred))
    print('Recall Score: ', recall_score(y_test,y_test_pred))
    print('F1 Score: ', f1_score(y_test,y_test_pred))
    print('---')

THRESHOLD  0.47
Accuracy:  0.8379788344015502
Precision Score:  0.8252735615954818
Recall Score:  0.7979522184300342
F1 Score:  0.8113829602637516
---
THRESHOLD  0.47009999999999996
Accuracy:  0.8379788344015502
Precision Score:  0.8252735615954818
Recall Score:  0.7979522184300342
F1 Score:  0.8113829602637516
---
THRESHOLD  0.47019999999999995
Accuracy:  0.8381278879117603
Precision Score:  0.8255649717514124
Recall Score:  0.7979522184300342
F1 Score:  0.8115237764665046
---
THRESHOLD  0.47029999999999994
Accuracy:  0.8382769414219705
Precision Score:  0.8258565877781703
Recall Score:  0.7979522184300342
F1 Score:  0.8116646415552856
---
THRESHOLD  0.47039999999999993
Accuracy:  0.8382769414219705
Precision Score:  0.8258565877781703
Recall Score:  0.7979522184300342
F1 Score:  0.8116646415552856
---
THRESHOLD  0.4704999999999999
Accuracy:  0.8382769414219705
Precision Score:  0.8258565877781703
Recall Score:  0.7979522184300342
F1 Score:  0.8116646415552856
---
THRESHOLD  0.4705999

So at the end our best threshold is 0.47 which gave us accuracy **83.83%** and F1 score of **0.813** :D