In [1]:
import pandas as pd

# Load the dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data'
column_names = ['ID', 'Diagnosis'] + [f'feature_{i}' for i in range(1, 31)]
data = pd.read_csv(url, header=None, names=column_names)

# Drop the ID column as it's not useful for the model
data.drop('ID', axis=1, inplace=True)


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Encode the target variable
label_encoder = LabelEncoder()
data['Diagnosis'] = label_encoder.fit_transform(data['Diagnosis'])

# Split features and target
X = data.drop('Diagnosis', axis=1).values
Y = data['Diagnosis'].values.reshape(1, -1)

# Split into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y.T, test_size=0.2, random_state=1)


In [3]:
# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train).T
X_test = scaler.transform(X_test).T


In [4]:
from cleaned_nn import DeepNeuralNetwork

# Define the layer dimensions (input layer size should match the number of features)
layer_dims = [30, 20, 10, 1]  # Example architecture

# Initialize and train the model
model = DeepNeuralNetwork(layer_dims, learning_rate=0.0075, num_iterations=3000, print_cost=True)
parameters, costs = model.fit(X_train, Y_train.T)


Cost after iteration 0: 0.6931429371780813
Cost after iteration 100: 0.6831340584606894
Cost after iteration 200: 0.6762564408806359
Cost after iteration 300: 0.6715212316790682
Cost after iteration 400: 0.668253910788261
Cost after iteration 500: 0.6659941941146602
Cost after iteration 600: 0.6644276503529021
Cost after iteration 700: 0.6633390971705494
Cost after iteration 800: 0.6625809154122346
Cost after iteration 900: 0.6620515554950178
Cost after iteration 1000: 0.6616809666398035
Cost after iteration 1100: 0.6614206909309853
Cost after iteration 1200: 0.6612371460838903
Cost after iteration 1300: 0.6611069926861264
Cost after iteration 1400: 0.6610139846928905
Cost after iteration 1500: 0.6609467328018039
Cost after iteration 1600: 0.6608972862907662
Cost after iteration 1700: 0.6608600914974896
Cost after iteration 1800: 0.660831101028124
Cost after iteration 1900: 0.6608075777400757
Cost after iteration 2000: 0.6607875793555544
Cost after iteration 2100: 0.6607696014182056
Co

In [5]:
import numpy as np

# Forward propagation on the test set
AL_test, _ = model.L_model_forward(X_test)

# Convert probabilities to binary predictions
predictions = (AL_test > 0.5).astype(int)

# Calculate accuracy
accuracy = np.mean(predictions == Y_test)

print(f"Test Accuracy: {accuracy}")


Test Accuracy: 0.631578947368421
