# Logistic Regression

The main purpose of this file serves to run hyperparamter tuning to find the best settings for the chosen model, which will then be transferred onto the main `models.ipynb` file.

In [33]:
# IMPORTS
from utils import *
from sklearn.linear_model import LogisticRegression

In [34]:
# READ AND SPLIT DATA
df = pd.read_pickle("../../datasets/pickle/processed_action_movie_data.pkl")

X, y = df.drop("rating", axis=1), df['rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = catboost_encoding(X_train, X_test, y_train, y_test)

In [None]:
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
}

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# create a linear regression object
clf = LogisticRegression()
# Create a GridSearchCV object
grid = GridSearchCV(clf, param_grid, cv=cv, scoring="f1_macro")
# Fit the GridSearchCV object to the data
grid.fit(X_train, y_train)


In [41]:
print("Best parameters: ", grid.best_params_)
print("Best score: ", grid.best_score_)

Best parameters:  {'C': 10, 'penalty': 'l2', 'solver': 'newton-cg'}
Best score:  0.574081626758661
