In [2]:
from MyNLPToolBox import FilePickling as FP
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
import numpy as np

So in this experiment we're gonna fine-tune the best models we got from the experiment 3, which Logistic Regression with Boolean TFIDF Embedder, without removing special characters & stopwords in the corpus.

# Load the pre-embedded dataset

In [3]:
x_train = FP.load_obj('x_train_boolean')
x_test = FP.load_obj('x_test_boolean')
y_train = FP.load_obj('y_train')
y_test = FP.load_obj('y_test')

loaded obj/x_train_boolean.pkl
loaded obj/x_test_boolean.pkl
loaded obj/y_train.pkl
loaded obj/y_test.pkl


# Re-Perform Logistic Regression

In [4]:
model_lr = LogisticRegression().fit(x_train,y_train)
y_test_pred = model_lr.predict(x_test)
print('====\nLOGISTIC REGRESSION')
print('Accuracy: ', accuracy_score(y_test,y_test_pred))
print('Precision Score: ', precision_score(y_test,y_test_pred))
print('Recall Score: ', recall_score(y_test,y_test_pred))
print('F1 Score: ', f1_score(y_test,y_test_pred))



====
LOGISTIC REGRESSION
Accuracy:  0.8373826203607095
Precision Score:  0.8301615798922801
Recall Score:  0.7890784982935154
F1 Score:  0.8090988626421698


Alright! The accuracy and recall score are quite high. However, now we're gonna finetune this model to make it even better!

# Hyperparameter Tuning
As sklearn use the threshold of 0.5, now we're gonna try different threshold to see what if the perfomance is gained on the test set

In [5]:
thresholds = np.arange(0.1,0.9,0.1)
for thresh in thresholds:
    print('THRESHOLD ',thresh)
    y_test_pred = [1  if prob[1] > thresh else 0 for prob in model_lr.predict_proba(x_test)]
    print('Accuracy: ', accuracy_score(y_test,y_test_pred))
    print('Precision Score: ', precision_score(y_test,y_test_pred))
    print('Recall Score: ', recall_score(y_test,y_test_pred))
    print('F1 Score: ', f1_score(y_test,y_test_pred))
    print('---')

THRESHOLD  0.1
Accuracy:  0.8190490386048591
Precision Score:  0.7448630136986302
Recall Score:  0.8907849829351536
F1 Score:  0.8113148896487411
---
THRESHOLD  0.2
Accuracy:  0.8293337308093606
Precision Score:  0.7764013626509755
Recall Score:  0.8556313993174062
F1 Score:  0.8140931969475563
---
THRESHOLD  0.30000000000000004
Accuracy:  0.8335072290952452
Precision Score:  0.7957585644371942
Recall Score:  0.8324232081911263
F1 Score:  0.8136780650542118
---
THRESHOLD  0.4
Accuracy:  0.8361901922790281
Precision Score:  0.8136348064405619
Recall Score:  0.810580204778157
F1 Score:  0.8121046332706445
---
THRESHOLD  0.5
Accuracy:  0.8373826203607095
Precision Score:  0.8301615798922801
Recall Score:  0.7890784982935154
F1 Score:  0.8090988626421698
---
THRESHOLD  0.6
Accuracy:  0.8358920852586078
Precision Score:  0.843409688321442
Recall Score:  0.7665529010238907
F1 Score:  0.803146790631146
---
THRESHOLD  0.7000000000000001
Accuracy:  0.831569533462513
Precision Score:  0.85714285

As we can see, the hyperparam is somewhere between 0.4 -> 0.5, we're going to investigate this range more specifically :D 

In [6]:
thresholds = np.arange(0.4,0.5,0.01)
for thresh in thresholds:
    print('THRESHOLD ',thresh)
    y_test_pred = [1  if prob[1] > thresh else 0 for prob in model_lr.predict_proba(x_test)]
    print('Accuracy: ', accuracy_score(y_test,y_test_pred))
    print('Precision Score: ', precision_score(y_test,y_test_pred))
    print('Recall Score: ', recall_score(y_test,y_test_pred))
    print('F1 Score: ', f1_score(y_test,y_test_pred))
    print('---')

THRESHOLD  0.4
Accuracy:  0.8361901922790281
Precision Score:  0.8136348064405619
Recall Score:  0.810580204778157
F1 Score:  0.8121046332706445
---
THRESHOLD  0.41000000000000003
Accuracy:  0.8367864063198688
Precision Score:  0.8164884442911349
Recall Score:  0.8078498293515358
F1 Score:  0.8121461657231086
---
THRESHOLD  0.42000000000000004
Accuracy:  0.836935459830079
Precision Score:  0.8185287994448299
Recall Score:  0.8051194539249147
F1 Score:  0.8117687543014453
---
THRESHOLD  0.43000000000000005
Accuracy:  0.8376807273811299
Precision Score:  0.8208434994771697
Recall Score:  0.8037542662116041
F1 Score:  0.8122090015519917
---
THRESHOLD  0.44000000000000006
Accuracy:  0.8379788344015502
Precision Score:  0.8225411270563528
Recall Score:  0.8020477815699659
F1 Score:  0.8121651978572664
---
THRESHOLD  0.45000000000000007
Accuracy:  0.83782978089134
Precision Score:  0.8233848314606742
Recall Score:  0.8003412969283277
F1 Score:  0.811699550017307
---
THRESHOLD  0.460000000000

As we can see, the hyperparam is somewhere between 0.45 -> 0.47, we're going to investigate this range more specifically :D 

In [7]:
thresholds = np.arange(0.46,0.48,0.001) 
for thresh in thresholds:
    print('THRESHOLD ',thresh)
    y_test_pred = [1  if prob[1] > thresh else 0 for prob in model_lr.predict_proba(x_test)]
    print('Accuracy: ', accuracy_score(y_test,y_test_pred))
    print('Precision Score: ', precision_score(y_test,y_test_pred))
    print('Recall Score: ', recall_score(y_test,y_test_pred))
    print('F1 Score: ', f1_score(y_test,y_test_pred))
    print('---')

THRESHOLD  0.46
Accuracy:  0.8382769414219705
Precision Score:  0.8258565877781703
Recall Score:  0.7979522184300342
F1 Score:  0.8116646415552856
---
THRESHOLD  0.461
Accuracy:  0.8384259949321806
Precision Score:  0.826148409893993
Recall Score:  0.7979522184300342
F1 Score:  0.8118055555555556
---
THRESHOLD  0.462
Accuracy:  0.8385750484423908
Precision Score:  0.8266713830916166
Recall Score:  0.7976109215017065
F1 Score:  0.811881188118812
---
THRESHOLD  0.463
Accuracy:  0.8384259949321806
Precision Score:  0.8266100495399858
Recall Score:  0.7972696245733788
F1 Score:  0.8116747741487144
---
THRESHOLD  0.464
Accuracy:  0.8382769414219705
Precision Score:  0.82701169797944
Recall Score:  0.7962457337883959
F1 Score:  0.8113371587549992
---
THRESHOLD  0.465
Accuracy:  0.8384259949321806
Precision Score:  0.827304964539007
Recall Score:  0.7962457337883959
F1 Score:  0.8114782608695652
---
THRESHOLD  0.466
Accuracy:  0.8382769414219705
Precision Score:  0.8272437034409365
Recall Sco

So at the end our best threshold is 0.465 which gave us accuracy **0.8384** and F1 score of **0.8114** :D