# Simple Models w. Baseline and Meta-Data Features

In [None]:
import pandas as pd

# load data
src = 'data/training_data_features.csv'
training_data = pd.read_csv(src, index_col=0)

src = 'data/validation_data_features.csv'
validation_data = pd.read_csv(src, index_col=0)

### Logistic Regression

In [None]:
# Code reference: https://www.freecodecamp.org/news/how-to-build-and-train-linear-and-logistic-regression-ml-models-in-python/

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

features = [
    'date_count', 
    'url_count', 
    'exclm_count',
    'content_word_freq', 
    'stop_word_freq', 
    'stem_word_freq',
    'stop_reduction_rate', 
    'stem_reduction_rate', 
    'average_sentence_length',
    'has_author' # meta-data
]

# features
X_training = training_data[features]
X_validation = validation_data[features]

# targets
y_training = training_data['reliable']
y_validation = validation_data['reliable']

# create logistic reg. model, and train it
model = LogisticRegression(max_iter=1000)
model.fit(X_training, y_training)

# test the model on validation data and report performance
predictions = model.predict(X_validation)

print('LOGISTIC REGRESSION w/ BASELINE, META-DATA [VALIDATION]')
print(classification_report(y_validation, predictions))

In [None]:
# get weights of coefficents
dict(zip(model.feature_names_in_, model.coef_[0]))

### Naive Bayes

In [None]:
# NAIVE BAYES
# REF: https://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html

from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, mean_squared_error

features = [
    'date_count', 
    'url_count', 
    'exclm_count',
    'content_word_freq', 
    'stop_word_freq', 
    'stem_word_freq',
    'stop_reduction_rate', 
    'stem_reduction_rate', 
    'average_sentence_length',
    'has_author' # meta-data
]

# features
X_training = training_data[features]
X_validation = validation_data[features]

# targets
y_training = training_data['reliable']
y_validation = validation_data['reliable']

# naive bayes model
nb_model = MultinomialNB()
nb_model.fit(X_training, y_training)

# predictions
y_pred = nb_model.predict(X_validation)
print('NAIVE BAYES w/ BASELINE, META-DATA [VALIDATION]')
print(classification_report(y_validation, y_pred))