In [30]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report,accuracy_score

X=joblib.load('../data/feature.pkl')
y=joblib.load('../data/label.pkl')

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=44)

model=LogisticRegression()
model.fit(X_train,y_train)

pred=model.predict(X_test)

print(accuracy_score(y_test,pred))
print(classification_report(y_test,pred))

0.9861649142224682
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       925
           1       0.99      0.99      0.99       882

    accuracy                           0.99      1807
   macro avg       0.99      0.99      0.99      1807
weighted avg       0.99      0.99      0.99      1807



In [31]:
from sklearn.naive_bayes import MultinomialNB

nb=MultinomialNB()
nb.fit(X_train,y_train)

prd=nb.predict(X_test)

print(accuracy_score(y_test,prd))
print(classification_report(y_test,prd))

0.9601549529607084
              precision    recall  f1-score   support

           0       1.00      0.92      0.96       925
           1       0.93      1.00      0.96       882

    accuracy                           0.96      1807
   macro avg       0.96      0.96      0.96      1807
weighted avg       0.96      0.96      0.96      1807



In [32]:
train_pred=model.predict(X_train)
test_pred=model.predict(X_test)

print('train accuracy',accuracy_score(y_train,train_pred))
print('test accuracy',accuracy_score(y_test,test_pred))

train accuracy 0.9880968858131488
test accuracy 0.9861649142224682


In [33]:
from sklearn.model_selection import GridSearchCV

param_grid={
    'C':[0.01,0.1,1,10,100],
    'penalty':['l2'],
    'solver':['liblinear']
}
grid=GridSearchCV(LogisticRegression(),param_grid,cv=5,scoring='accuracy')
grid.fit(X_train,y_train)

print('best parameter',grid.best_params_)
print('best score',grid.best_score_)

best parameter {'C': 100, 'penalty': 'l2', 'solver': 'liblinear'}
best score 0.994878892733564


In [34]:
lastmodel=LogisticRegression(C=100,penalty='l2',solver='liblinear')
lastmodel.fit(X_train,y_train)

pred=lastmodel.predict(X_test)

print(accuracy_score(y_test,pred))
print(classification_report(y_test,pred))

0.9983397897066962
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       925
           1       1.00      1.00      1.00       882

    accuracy                           1.00      1807
   macro avg       1.00      1.00      1.00      1807
weighted avg       1.00      1.00      1.00      1807



In [35]:
joblib.dump(lastmodel,'../data/finalmodel.pkl')

['../data/finalmodel.pkl']