In [1]:
import pandas as pd
import numpy as np
import os
import joblib
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# 1. Load the official dataset
raw_data = load_breast_cancer()
df = pd.DataFrame(raw_data.data, columns=raw_data.feature_names)

# 2. Rename columns to match YOUR specific required names
column_mapping = {
    'mean radius': 'radius_mean',
    'mean texture': 'texture_mean',
    'mean perimeter': 'perimeter_mean',
    'mean area': 'area_mean',
    'mean smoothness': 'smoothness_mean'
}
df = df.rename(columns=column_mapping)

# 3. Create the diagnosis target (Malignant=1, Benign=0)
df['diagnosis'] = np.where(raw_data.target == 0, 1, 0)

# 4. Feature Selection (Selecting exactly the 5 features requested)
features = ['radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean']
X = df[features]
y = df['diagnosis']

print("Cell 1 Success: Data loaded and 5 features selected.")
X.head()

Cell 1 Success: Data loaded and 5 features selected.


Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean
0,17.99,10.38,122.8,1001.0,0.1184
1,20.57,17.77,132.9,1326.0,0.08474
2,19.69,21.25,130.0,1203.0,0.1096
3,11.42,20.38,77.58,386.1,0.1425
4,20.29,14.34,135.1,1297.0,0.1003


In [2]:
# 1. Split Data (80% Training, 20% Testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 3. Train the Model (Logistic Regression)
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

print("Cell 2 Success: Model trained with scaled data.")

Cell 2 Success: Model trained with scaled data.


In [3]:
# 1. Generate Predictions
y_pred = model.predict(X_test_scaled)

# 2. Calculate Required Metrics
print("--- MODEL EVALUATION METRICS ---")
print(f"Accuracy:  {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred):.4f}")
print(f"Recall:    {recall_score(y_test, y_pred):.4f}")
print(f"F1-score:  {f1_score(y_test, y_pred):.4f}")

# 3. Save to Disk
if not os.path.exists('model'):
    os.makedirs('model')

joblib.dump(model, 'model/breast_cancer_model.pkl')
joblib.dump(scaler, 'model/scaler.pkl')

print("\nCell 3 Success: Model and Scaler saved for deployment.")

--- MODEL EVALUATION METRICS ---
Accuracy:  0.9386
Precision: 0.9286
Recall:    0.9070
F1-score:  0.9176

Cell 3 Success: Model and Scaler saved for deployment.


In [4]:
# 1. Reload the objects
reloaded_model = joblib.load('model/breast_cancer_model.pkl')
reloaded_scaler = joblib.load('model/scaler.pkl')

# 2. Mock input for validation (radius, texture, perimeter, area, smoothness)
sample = np.array([[17.99, 10.38, 122.8, 1001.0, 0.1184]])
sample_scaled = reloaded_scaler.transform(sample)
prediction = reloaded_model.predict(sample_scaled)

result = "Malignant" if prediction[0] == 1 else "Benign"
print(f"Validation Prediction: {result}")

Validation Prediction: Malignant




In [5]:
# Create model directory if it doesn't exist
if not os.path.exists('model'):
    os.makedirs('model')

# 7. Save the trained model and the scaler
joblib.dump(model, 'model/breast_cancer_model.pkl')
joblib.dump(scaler, 'model/scaler.pkl')

print("Model and Scaler saved successfully in /model/ folder!")

Model and Scaler saved successfully in /model/ folder!


In [6]:
# Load the saved objects
loaded_model = joblib.load('model/breast_cancer_model.pkl')
loaded_scaler = joblib.load('model/scaler.pkl')

# Mock input for testing (using means)
sample_input = np.array([[17.99, 10.38, 122.8, 1001.0, 0.1184]])
sample_scaled = loaded_scaler.transform(sample_input)

prediction = loaded_model.predict(sample_scaled)
result = "Malignant" if prediction[0] == 1 else "Benign"

print(f"Reloaded model test prediction: {result}")

Reloaded model test prediction: Malignant


