DAY 1 – Load Dataset & Baseline Model

In [1]:
from sklearn.datasets import load_wine
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

wine = load_wine(as_frame=True)
df = wine.frame

X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

baseline = LogisticRegression(max_iter=1000)
baseline.fit(X_train, y_train)
pred = baseline.predict(X_test)

print("Baseline Accuracy:", accuracy_score(y_test, pred))


Baseline Accuracy: 0.9722222222222222


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


DAY 2 – Feature Scaling Impact

In [2]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

scaled_model = LogisticRegression(max_iter=1000)
scaled_model.fit(X_train_scaled, y_train)

scaled_pred = scaled_model.predict(X_test_scaled)
print("Scaled Accuracy:", accuracy_score(y_test, scaled_pred))


Scaled Accuracy: 1.0


DAY 3 – Polynomial Feature Engineering

In [3]:
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train_scaled)
X_test_poly = poly.transform(X_test_scaled)

poly_model = LogisticRegression(max_iter=2000)
poly_model.fit(X_train_poly, y_train)

poly_pred = poly_model.predict(X_test_poly)
print("Polynomial Accuracy:", accuracy_score(y_test, poly_pred))


Polynomial Accuracy: 1.0


DAY 4 – Feature Selection (SelectKBest)

In [4]:
from sklearn.feature_selection import SelectKBest, f_classif

selector = SelectKBest(score_func=f_classif, k=8)
X_train_sel = selector.fit_transform(X_train_scaled, y_train)
X_test_sel = selector.transform(X_test_scaled)

sel_model = LogisticRegression(max_iter=1000)
sel_model.fit(X_train_sel, y_train)

sel_pred = sel_model.predict(X_test_sel)
print("Selected Features Accuracy:", accuracy_score(y_test, sel_pred))


Selected Features Accuracy: 0.9722222222222222


DAY 5 – Cross Validation

In [5]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(
    LogisticRegression(max_iter=1000),
    X_train_scaled,
    y_train,
    cv=5
)

print("Cross-validation scores:", scores)
print("Mean CV Accuracy:", scores.mean())


Cross-validation scores: [0.96551724 0.96551724 1.         0.96428571 1.        ]
Mean CV Accuracy: 0.9790640394088671


DAY 6 – Hyperparameter Tuning (GridSearchCV)

In [6]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [0.01, 0.1, 1, 10],
    'penalty': ['l2']
}

grid = GridSearchCV(
    LogisticRegression(max_iter=1000),
    param_grid,
    cv=5
)

grid.fit(X_train_scaled, y_train)

print("Best Parameters:", grid.best_params_)


Best Parameters: {'C': 0.1, 'penalty': 'l2'}


DAY 7 – Final Optimized Model Evaluation

In [7]:
best_model = grid.best_estimator_

final_pred = best_model.predict(X_test_scaled)
print("Final Accuracy:", accuracy_score(y_test, final_pred))


Final Accuracy: 1.0
