In [26]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.ensemble import VotingClassifier
import seaborn as sns
import matplotlib.pyplot as plt
import scikeras
from scikeras.wrappers import KerasClassifier, KerasRegressor
from sklearn.base import BaseEstimator, ClassifierMixin



In [13]:
url = 'https://raw.githubusercontent.com/richardcornall/Tensorflow-Pytorch-Ensemble-Machine-Learning-Model-for-Breast-Cancer-Detection-/main/data.csv'
# Load and check data
data = pd.read_csv(url)
data.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [14]:
# Drop columns with missing values
data = data.dropna(axis=1)
# Encode 'Diagnosis' column
labelencoder = LabelEncoder()
data['diagnosis'] = labelencoder.fit_transform(data['diagnosis'])
# Separate features and target
X = data.drop(['diagnosis'], axis=1)
y = data['diagnosis']
# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [15]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y)


In [16]:
def create_tf_model():
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))
    
    # Compile model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [17]:
tf_model = create_tf_model()

# Fit model
tf_model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<keras.src.callbacks.history.History at 0x21a8cefb770>

In [18]:
tf_loss, tf_accuracy = tf_model.evaluate(X_test, y_test, verbose=0)
print(f'TensorFlow Model Test Accuracy: {tf_accuracy*100:.2f}%')


TensorFlow Model Test Accuracy: 96.49%


In [19]:
class PyTorchModel(nn.Module):
    def __init__(self, input_size):
        super(PyTorchModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.dropout1 = nn.Dropout(0.2)
        self.fc2 = nn.Linear(64, 32)
        self.dropout2 = nn.Dropout(0.2)
        self.output = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.sigmoid(self.output(x))
        return x


In [20]:
# Convert data to PyTorch tensors
X_train_torch = torch.from_numpy(X_train.astype(np.float32))
y_train_torch = torch.from_numpy(y_train.values.astype(np.float32)).unsqueeze(1)
X_test_torch = torch.from_numpy(X_test.astype(np.float32))
y_test_torch = torch.from_numpy(y_test.values.astype(np.float32)).unsqueeze(1)

# Create datasets and loaders
train_dataset = TensorDataset(X_train_torch, y_train_torch)
test_dataset = TensorDataset(X_test_torch, y_test_torch)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [21]:
# Initialize model, loss function, optimizer 
input_size = X_train.shape[1]
pytorch_model = PyTorchModel(input_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(pytorch_model.parameters(), lr=0.001)

In [22]:
epochs = 50
for epoch in range(epochs):
    pytorch_model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = pytorch_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()


In [23]:
pytorch_model.eval()
with torch.no_grad():
    outputs = pytorch_model(X_test_torch)
    predicted = (outputs.numpy() > 0.5).astype(int)
    y_test_np = y_test_torch.numpy().astype(int)
    pytorch_accuracy = accuracy_score(y_test_np, predicted)
    print(f'PyTorch Model Test Accuracy: {pytorch_accuracy*100:.2f}%')


PyTorch Model Test Accuracy: 97.37%


In [25]:

def create_tf_model_wrapper():
    model = create_tf_model()
    return model

tf_model_wrapper = KerasClassifier(build_fn=create_tf_model_wrapper, epochs=50, batch_size=32, verbose=0)


In [28]:
class PyTorchClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, epochs=50, batch_size=32, learning_rate=0.001):
        self.epochs = epochs
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.model = None
    
    def fit(self, X, y):
        X_tensor = torch.from_numpy(X.astype(np.float32))
        y_tensor = torch.from_numpy(y.astype(np.float32)).unsqueeze(1)
        dataset = TensorDataset(X_tensor, y_tensor)
        loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
        
        input_size = X.shape[1]
        self.model = PyTorchModel(input_size)
        criterion = nn.BCELoss()
        optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        
        for epoch in range(self.epochs):
            self.model.train()
            for inputs, labels in loader:
                optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
        return self
    
    def predict(self, X):
        X_tensor = torch.from_numpy(X.astype(np.float32))
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(X_tensor)
            return (outputs.numpy() > 0.5).astype(int).flatten()
    
    def predict_proba(self, X):
        X_tensor = torch.from_numpy(X.astype(np.float32))
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(X_tensor)
            probs = outputs.numpy()
            return np.hstack((1 - probs, probs))
