In [2]:
#Importing Libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, roc_auc_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Layer
from sklearn.metrics import confusion_matrix, classification_report

In [3]:
#Reading the cleaned dataset from assignment 1
df = pd.read_csv("scaled_data.csv")
df

Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,...,pcv,wbcc,rbcc,htn,dm,cad,appet,pe,ane,class
0,0.522727,0.230769,0.75,0.2,0.0,1.0,1.0,0.0,0.0,0.211538,...,0.777778,0.231405,0.525424,1.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.056818,0.000000,0.75,0.8,0.0,1.0,1.0,0.0,0.0,0.269309,...,0.644444,0.157025,0.441938,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.681818,0.230769,0.25,0.4,0.6,1.0,1.0,0.0,0.0,0.856838,...,0.488889,0.219008,0.441938,0.0,1.0,0.0,1.0,0.0,1.0,0.0
3,0.522727,0.153846,0.00,0.8,0.0,1.0,0.0,1.0,0.0,0.202991,...,0.511111,0.185950,0.305085,1.0,0.0,0.0,1.0,1.0,1.0,0.0
4,0.556818,0.230769,0.25,0.4,0.0,1.0,1.0,0.0,0.0,0.179487,...,0.577778,0.210744,0.423729,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,0.602273,0.230769,0.75,0.0,0.0,1.0,1.0,0.0,0.0,0.252137,...,0.844444,0.185950,0.474576,0.0,0.0,0.0,0.0,0.0,0.0,1.0
395,0.454545,0.153846,1.00,0.0,0.0,1.0,1.0,0.0,0.0,0.113248,...,1.000000,0.231405,0.694915,0.0,0.0,0.0,0.0,0.0,0.0,1.0
396,0.113636,0.230769,0.75,0.0,0.0,1.0,1.0,0.0,0.0,0.166667,...,0.888889,0.181818,0.559322,0.0,0.0,0.0,0.0,0.0,0.0,1.0
397,0.170455,0.076923,1.00,0.0,0.0,1.0,1.0,0.0,0.0,0.196581,...,0.933333,0.206612,0.644068,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [4]:
X = df.drop(columns=['class']) 
y = df['class'] 

In [5]:
# Naive Bayes classifier
nb_classifier = GaussianNB()

r = 5  #5 times repeat
test_size = 0.3
accuracies = []

# repeated random sampling
for i in range(r):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=None)
    nb_classifier.fit(X_train, y_train)
    y_pred = nb_classifier.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

# average accuracy
average_accuracy = np.mean(accuracies)

print(f"Accuracies over {r} iterations: {accuracies}")
print(f"Average Accuracy: {average_accuracy * 100:.2f}%")

Accuracies over 5 iterations: [0.9416666666666667, 0.975, 0.9416666666666667, 0.9416666666666667, 0.9583333333333334]
Average Accuracy: 95.17%


In [6]:
# Convert X (DataFrame) to a NumPy array before creating a PyTorch tensor
X_tensor = torch.tensor(X.values, dtype=torch.float32) 
y_tensor = torch.tensor(y.values, dtype=torch.float32)  


# Define the MLP structure
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 16)  # First hidden layer
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(16, 1)  # Output layer
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

# Hyperparameters
input_size = 24
num_epochs = 20
batch_size = 16
learning_rate = 0.001
k_folds = 5

# Cross-validation setup
kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)
accuracies = []
roc_aucs = []

# K-Fold Cross-Validation
for fold, (train_idx, test_idx) in enumerate(kf.split(X_tensor)):
    print(f"Fold {fold + 1}/{k_folds}")
    
    # Split data
    X_train, X_test = X_tensor[train_idx], X_tensor[test_idx]
    y_train, y_test = y_tensor[train_idx], y_tensor[test_idx]
    
    # Create DataLoader for batching
    train_data = torch.utils.data.TensorDataset(X_train, y_train)
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
    
    # Initialize model, loss, and optimizer
    model = MLP(input_size=input_size)
    criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # Train the model
    model.train()
    for epoch in range(num_epochs):
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X).squeeze()  # Forward pass
            loss = criterion(outputs, batch_y)
            loss.backward()  # Backward pass
            optimizer.step()  # Optimize
    
    # Evaluate the model
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test).squeeze()  # Predictions
        y_pred_class = (y_pred >= 0.5).float()  # Threshold for binary classification
        
        # Accuracy
        acc = accuracy_score(y_test, y_pred_class)
        accuracies.append(acc)
        
        # ROC AUC
        roc_auc = roc_auc_score(y_test, y_pred)
        roc_aucs.append(roc_auc)

    print(f"Fold {fold + 1}: Accuracy = {acc:.4f}, ROC AUC = {roc_auc:.4f}")

# Report average results
print("\nCross-Validation Results:")
print(f"Average Accuracy: {np.mean(accuracies) * 100:.2f}%")
print(f"Average ROC AUC: {np.mean(roc_aucs):.4f}")


Fold 1/5
Fold 1: Accuracy = 1.0000, ROC AUC = 1.0000
Fold 2/5
Fold 2: Accuracy = 0.8750, ROC AUC = 0.9986
Fold 3/5
Fold 3: Accuracy = 0.9250, ROC AUC = 0.9987
Fold 4/5
Fold 4: Accuracy = 0.9750, ROC AUC = 1.0000
Fold 5/5
Fold 5: Accuracy = 0.9620, ROC AUC = 0.9981

Cross-Validation Results:
Average Accuracy: 94.74%
Average ROC AUC: 0.9991


In [7]:
# Split data: 50% training, 50% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Define a custom RBF Layer
class RBFLayer(Layer):
    def __init__(self, num_centers, gamma=1.0, **kwargs):
        super(RBFLayer, self).__init__(**kwargs)
        self.num_centers = num_centers
        self.gamma = gamma

    def build(self, input_shape):
        self.centers = self.add_weight(
            shape=(self.num_centers, input_shape[-1]),
            initializer="random_normal",
            trainable=True,
        )
        super().build(input_shape)

    def call(self, inputs):
        diff = tf.expand_dims(inputs, axis=1) - self.centers
        l2 = tf.reduce_sum(tf.square(diff), axis=-1)
        return tf.exp(-self.gamma * l2)

# Build the RBFNN model
model = Sequential([
    RBFLayer(num_centers=10, gamma=0.5, input_shape=(X_train.shape[1],)),
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=1)

# Evaluate the model
y_pred = (model.predict(X_test) > 0.5).astype(int)

# Confusion Matrix and Metrics
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)


Epoch 1/50


  super(RBFLayer, self).__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 630us/step - accuracy: 0.3648 - loss: 0.6982
Epoch 2/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 325us/step - accuracy: 0.6660 - loss: 0.6923
Epoch 3/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 379us/step - accuracy: 0.7555 - loss: 0.6910
Epoch 4/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 344us/step - accuracy: 0.8487 - loss: 0.6874
Epoch 5/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 350us/step - accuracy: 0.7647 - loss: 0.6855
Epoch 6/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 384us/step - accuracy: 0.6418 - loss: 0.6822
Epoch 7/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 305us/step - accuracy: 0.5499 - loss: 0.6814
Epoch 8/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 347us/step - accuracy: 0.6105 - loss: 0.6758
Epoch 9/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━