# Simple Logistic Regression Model

In [None]:
import pandas as pd

# load data
src = 'data/training_data_features.csv'
training_data = pd.read_csv(src, index_col=0)

src = 'data/validation_data_features.csv'
validation_data = pd.read_csv(src, index_col=0)

### LogReg model

In [None]:
# Code reference: https://www.freecodecamp.org/news/how-to-build-and-train-linear-and-logistic-regression-ml-models-in-python/

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV


features = [
    'date_count', 
    'url_count', 
    'exclm_count',
    'content_word_freq', 
    'stop_word_freq', 
    'stem_word_freq',
    'stop_reduction_rate', 
    'stem_reduction_rate', 
    'average_sentence_length'
]

# features
X_training = training_data[features]
X_validation = validation_data[features]

# targets
y_training = training_data['reliable']
y_validation = validation_data['reliable']

# create logistic reg. model, and train it
model = LogisticRegression(max_iter=100)
params = {'solver':('lbfgs', 'sag'), 'C': [1, 10]}
clf = GridSearchCV(model, params)
clf.fit(X_training, y_training)
# GridSearchCV(estimator=LogisticRegression(), param_grid={'C': [1, 10], 'solver':('lbfgs', 'sag')})
sorted(clf.cv_results_.keys())



In [None]:
clf.best_params_


In [None]:
# test the model on validation data and report performance
predictions = clf.best_estimator_.predict(X_validation)

print('LOGISTIC REGRESSION (SIMPLE)')
print(classification_report(y_validation, predictions))

In [None]:
# get weights of coefficents
dict(zip(clf.best_estimator_.feature_names_in_, clf.best_estimator_.coef_[0]))