In [11]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv1D, MaxPool1D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

print(tf.__version__)

2.20.0


In [12]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import datasets, metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import pickle
    

In [15]:
cancer = datasets.load_breast_cancer()

In [16]:
X = pd.DataFrame(data=cancer.data, columns=cancer.feature_names)
X.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [17]:
y = cancer.target

In [18]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [19]:
cancer.target_names

array(['malignant', 'benign'], dtype='<U9')

In [20]:
X_train_full, X_test_full, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0, stratify=y
)


In [21]:
print("--- Applying Recursive Feature Elimination (RFE) ---")
# Use a simple model (Logistic Regression) as the base estimator for RFE
logreg = LogisticRegression(solver='liblinear', random_state=0)
rfe = RFE(estimator=logreg, n_features_to_select=15) # Targeting 15 features
rfe.fit(X_train_full, y_train)

--- Applying Recursive Feature Elimination (RFE) ---


In [22]:
selected_feature_indices = rfe.support_
global SELECTED_FEATURE_NAMES 
SELECTED_FEATURE_NAMES = X_train_full.columns[selected_feature_indices].tolist()
print(f"Selected {len(SELECTED_FEATURE_NAMES)} Features:\n{SELECTED_FEATURE_NAMES}")

Selected 15 Features:
['mean radius', 'mean texture', 'mean perimeter', 'mean concavity', 'mean concave points', 'mean symmetry', 'texture error', 'perimeter error', 'area error', 'worst radius', 'worst texture', 'worst perimeter', 'worst compactness', 'worst concavity', 'worst concave points']


In [23]:
X_train = X_train_full[SELECTED_FEATURE_NAMES]
X_test = X_test_full[SELECTED_FEATURE_NAMES]

In [24]:
N_FEATURES = 15

In [25]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [26]:
X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], N_FEATURES, 1)
X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], N_FEATURES, 1)

print(f"X_train shape after RFE and reshape: {X_train_reshaped.shape}")

X_train shape after RFE and reshape: (455, 15, 1)


In [27]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
epochs = 50
model = Sequential()

# IMPORTANT: Update input_shape to the new number of features (15)
model.add(Conv1D(filters=32, kernel_size=2, activation="relu", input_shape=(N_FEATURES, 1)))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Conv1D(filters=64, kernel_size=2, activation="relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(
    optimizer=Adam(learning_rate=0.001), 
    loss='binary_crossentropy', 
    metrics=['accuracy']
)

early_stopping = EarlyStopping(
    monitor='val_loss', 
    patience=10, 
    restore_best_weights=True,
    verbose=1
)
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss', 
    factor=0.5, 
    patience=5, 
    min_lr=0.00001,
    verbose=1
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [30]:
print("\n--- Starting Model Training with Callbacks ---")

# Train the model, passing the callbacks list
history = model.fit(
    X_train_reshaped, 
    y_train, 
    epochs=100, # Increase max epochs, as EarlyStopping will handle when to stop
    batch_size=32,
    verbose=1,
    validation_data=(X_test_reshaped, y_test),
    callbacks=[early_stopping, reduce_lr] # <-- The crucial addition
)


--- Starting Model Training with Callbacks ---
Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.9824 - loss: 0.0496 - val_accuracy: 0.9649 - val_loss: 0.1071 - learning_rate: 6.2500e-05
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9626 - loss: 0.0726 - val_accuracy: 0.9649 - val_loss: 0.1084 - learning_rate: 6.2500e-05
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9692 - loss: 0.0659 - val_accuracy: 0.9561 - val_loss: 0.1091 - learning_rate: 6.2500e-05
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9846 - loss: 0.0444 - val_accuracy: 0.9561 - val_loss: 0.1102 - learning_rate: 6.2500e-05
Epoch 5/100
[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 28ms/step - accuracy: 0.9375 - loss: 0.1061
Epoch 5: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.

In [31]:
model.save("Breast_model.keras") 
print("\nModel saved as Breast_model.keras")

# Save the feature list and the scaler object for the Flask app
with open("feature_artifacts.pkl", "wb") as f:
    pickle.dump({
        'feature_names': SELECTED_FEATURE_NAMES,
        'scaler': scaler
    }, f)
print("Feature names and scaler saved to feature_artifacts.pkl")


Model saved as Breast_model.keras
Feature names and scaler saved to feature_artifacts.pkl
