DAY 1 – Load Dataset & Split

In [1]:
from sklearn.datasets import load_breast_cancer
import pandas as pd
from sklearn.model_selection import train_test_split

data = load_breast_cancer(as_frame=True)
df = data.frame

X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


DAY 2 – Feature Scaling

In [2]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


DAY 3 – Baseline Model (Logistic Regression)

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

pred = model.predict(X_test_scaled)
print("Baseline Accuracy:", accuracy_score(y_test, pred))


Baseline Accuracy: 0.9736842105263158


DAY 4 – Cross-Validation

In [4]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(
    LogisticRegression(max_iter=1000),
    X_train_scaled, y_train, cv=5
)
print("Cross-validation scores:", scores)
print("Mean CV Accuracy:", scores.mean())


Cross-validation scores: [0.97802198 0.96703297 1.         0.97802198 0.94505495]
Mean CV Accuracy: 0.9736263736263737


DAY 5 – GridSearchCV for Hyperparameter Tuning

In [5]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l2'],
    'solver': ['lbfgs']
}

grid = GridSearchCV(
    LogisticRegression(max_iter=1000),
    param_grid,
    cv=5
)

grid.fit(X_train_scaled, y_train)
print("Best Parameters:", grid.best_params_)


Best Parameters: {'C': 10, 'penalty': 'l2', 'solver': 'lbfgs'}


DAY 6 – Evaluate Optimized Model

In [6]:
best_model = grid.best_estimator_
best_pred = best_model.predict(X_test_scaled)

from sklearn.metrics import classification_report, confusion_matrix

print("Confusion Matrix:\n", confusion_matrix(y_test, best_pred))
print("\nClassification Report:\n", classification_report(y_test, best_pred))


Confusion Matrix:
 [[42  1]
 [ 2 69]]

Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.98      0.97        43
           1       0.99      0.97      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114



DAY 7 – Save Final Model for Deployment

In [7]:
import joblib

joblib.dump(best_model, "week9_best_model.pkl")
print("Model saved successfully!")


Model saved successfully!
