In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [2]:
dataset = pd.read_csv("./../Social_Network_Ads.csv")

In [3]:
# to convert categorical data into binary format(one-hot encoding)
dataset = pd.get_dummies(dataset, drop_first=True)

In [4]:
dataset = dataset.drop("User ID", axis=1)

In [5]:
independent = dataset[["Age", "EstimatedSalary", "Gender_Male"]]
dependent = dataset["Purchased"]

In [6]:
x_train, x_test, y_train, y_test = train_test_split(independent, dependent, test_size=1/3, random_state=0)

In [17]:
sc=StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [21]:
classification = LogisticRegression()
classification.fit(x_train, y_train)
model_predict = classification.predict(x_test)
report = classification_report(y_test, model_predict)
print(report)


              precision    recall  f1-score   support

           0       0.89      0.93      0.91        85
           1       0.87      0.80      0.83        49

    accuracy                           0.88       134
   macro avg       0.88      0.86      0.87       134
weighted avg       0.88      0.88      0.88       134



In [47]:
param_grid = {
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'saga', 'newton-cholesky', 'sag'],
    'penalty': ['l1', 'l2', 'elasticnet', 'None'],
    'C': [0.1, 1, 10]  # Regularization strength
}

grid = GridSearchCV(LogisticRegression(), param_grid, refit=True, verbose = 3, n_jobs=-1, scoring='f1_weighted')
grid.fit(x_train, y_train)

Fitting 5 folds for each of 72 candidates, totalling 360 fits


240 fits failed out of a total of 360.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1194, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [49]:
best_params = grid.best_params_
print("Best Parameters:", best_params)

best_score = grid.best_score_
print("Best F1 Score:", best_score)

best_model = grid.best_estimator_
print("Best Model:", best_model)

# results = grid.cv_results_
# print(results)


Best Parameters: {'C': 0.1, 'penalty': 'l1', 'solver': 'liblinear'}
Best F1 Score: 0.8465331313893326
Best Model: LogisticRegression(C=0.1, penalty='l1', solver='liblinear')


In [51]:
y_predict = grid.predict(x_test)

In [53]:
confusion_matrix(y_test, y_predict)

array([[79,  6],
       [ 9, 40]])

In [55]:
report = classification_report(y_test, y_predict)
print(report)

              precision    recall  f1-score   support

           0       0.90      0.93      0.91        85
           1       0.87      0.82      0.84        49

    accuracy                           0.89       134
   macro avg       0.88      0.87      0.88       134
weighted avg       0.89      0.89      0.89       134

