# CS5720: Neural Networks and Deep Learning (Spring 2025)
## ICP 4 - Problem 1b
**Student Name:** Nidhin Ninan

**School:** University of Central Missouri

**Video Link:** https://drive.google.com/file/d/1WjSHpy91Yn-WzhPcIszfnTF46LuW9O3q/view?usp=sharing

In [30]:
# Import necessary libraries
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, PReLU
from keras.layers import LeakyReLU  # Import LeakyReLU
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
# For reproducibility
np.random.seed(155)

The CSV file contains a header row with the following columns:
id, diagnosis, radius_mean, texture_mean, perimeter_mean, area_mean, ... , fractal_dimension_worst

We will:
- Remove the "id" column (not useful for prediction)
- Map the "diagnosis" column to a binary target:
    'M' (malignant) -> 1, and 'B' (benign) -> 0
- Use the remaining 30 feature columns for training.

In [48]:
# Load the dataset (reading header from file)
df = pd.read_csv("Breas Cancer.csv")

# Display first few rows to inspect the data
print("First 5 rows of the dataset:")
print(df.head())

# Drop the 'id' column; keep 'diagnosis' for target and all the rest as features
df = df.drop(['id'], axis=1)

# Map the 'diagnosis' column to binary values
df['diagnosis'] = df['diagnosis'].str.strip().map({'M': 1, 'B': 0})

print(df['diagnosis'].value_counts())

# Separate features and target
X = df.drop(['diagnosis'], axis=1).values  # shape should be (n_samples, 30)
X = X[:,:-1]
y = df['diagnosis'].values

print("\nFeature matrix shape: ", X.shape)
print("Target vector shape: ", y.shape)
print(X[0:5,:])

# Split the dataset into training and testing sets (75% train, 25% test)
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.25,
                                                    random_state=87)

First 5 rows of the dataset:
         id diagnosis  radius_mean  texture_mean  perimeter_mean  area_mean  \
0    842302         M        17.99         10.38          122.80     1001.0   
1    842517         M        20.57         17.77          132.90     1326.0   
2  84300903         M        19.69         21.25          130.00     1203.0   
3  84348301         M        11.42         20.38           77.58      386.1   
4  84358402         M        20.29         14.34          135.10     1297.0   

   smoothness_mean  compactness_mean  concavity_mean  concave points_mean  \
0          0.11840           0.27760          0.3001              0.14710   
1          0.08474           0.07864          0.0869              0.07017   
2          0.10960           0.15990          0.1974              0.12790   
3          0.14250           0.28390          0.2414              0.10520   
4          0.10030           0.13280          0.1980              0.10430   

   ...  texture_worst  perimeter_

In [42]:
# This model uses one hidden layer with 20 neurons (as in the provided snippet)
print("\n---- Training Baseline Model (1 hidden layer, no normalization) ----")
model_baseline = Sequential()
model_baseline.add(Dense(20, input_dim=X_train.shape[1], activation='relu'))
model_baseline.add(Dense(1, activation='sigmoid'))  # binary classification

# Compile the model
model_baseline.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model_baseline.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)

# Show model summary and evaluate on the test set
print("\nBaseline Model Summary:")
print(model_baseline.summary())
loss_base, acc_base = model_baseline.evaluate(X_test, y_test, verbose=0)
print("Baseline Model -- Test Loss: {:.4f}, Test Accuracy: {:.4f}".format(loss_base, acc_base))



---- Training Baseline Model (1 hidden layer, no normalization) ----


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Baseline Model Summary:


None
Baseline Model -- Test Loss: 0.1836, Test Accuracy: 0.9371


In [43]:
# Here we add additional hidden layers to the network to see how accuracy changes.
print("\n---- Training Expanded Model (More hidden layers, no normalization) ----")
model_expanded = Sequential()
# First hidden layer: 20 neurons
model_expanded.add(Dense(20, input_dim=X_train.shape[1], activation='relu'))
# Second hidden layer: 15 neurons
model_expanded.add(Dense(15, activation='relu'))
# Third hidden layer: 10 neurons
model_expanded.add(Dense(10, activation='relu'))
# Output layer: single neuron with sigmoid activation
model_expanded.add(Dense(1, activation='sigmoid'))

# Compile the model
model_expanded.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model_expanded.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)

# Show model summary and evaluate on the test set
print("\nExpanded Model Summary:")
print(model_expanded.summary())
loss_exp, acc_exp = model_expanded.evaluate(X_test, y_test, verbose=0)
print("Expanded Model -- Test Loss: {:.4f}, Test Accuracy: {:.4f}".format(loss_exp, acc_exp))


---- Training Expanded Model (More hidden layers, no normalization) ----

Expanded Model Summary:


None
Expanded Model -- Test Loss: 0.3272, Test Accuracy: 0.9371


In [44]:
# Normalizing the input features can help the model learn more efficiently.
print("\n---- Training Expanded Model with Data Normalization ----")

# Normalize the data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the same expanded model architecture as before
model_normalized = Sequential()
model_normalized.add(Dense(20, input_dim=X_train_scaled.shape[1], activation='relu'))
model_normalized.add(Dense(15, activation='relu'))
model_normalized.add(Dense(10, activation='relu'))
model_normalized.add(Dense(1, activation='sigmoid'))

# Compile the model
model_normalized.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model on the normalized data
model_normalized.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=0)

# Show model summary and evaluate on the test set
print("\nNormalized Data Model Summary:")
print(model_normalized.summary())
loss_norm, acc_norm = model_normalized.evaluate(X_test_scaled, y_test, verbose=0)
print("Normalized Data Model -- Test Loss: {:.4f}, Test Accuracy: {:.4f}".format(loss_norm, acc_norm))



---- Training Expanded Model with Data Normalization ----


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Normalized Data Model Summary:


None
Normalized Data Model -- Test Loss: 0.3199, Test Accuracy: 0.9650


In [45]:
# Normalizing the input features can help the model learn more efficiently.
print("\n---- Training Expanded Model with Data Normalization ----")

# Normalize the data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the same expanded model architecture as before
model_normalized = Sequential()
model_normalized.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='tanh'))
model_normalized.add(Dense(128, activation='tanh'))
model_normalized.add(Dense(128, activation='tanh'))
model_normalized.add(Dense(1, activation='sigmoid'))

# Compile the model
model_normalized.compile(loss='binary_crossentropy', optimizer='adagrad', metrics=['accuracy'])

# Train the model on the normalized data
model_normalized.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=0)

# Show model summary and evaluate on the test set
print("\nNormalized Data Model Summary:")
print(model_normalized.summary())
loss_norm, acc_norm = model_normalized.evaluate(X_test_scaled, y_test, verbose=0)
print("Normalized Data Model -- Test Loss: {:.4f}, Test Accuracy: {:.4f}".format(loss_norm, acc_norm))



---- Training Expanded Model with Data Normalization ----


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Normalized Data Model Summary:


None
Normalized Data Model -- Test Loss: 0.0963, Test Accuracy: 0.9860
