In [185]:
import pandas as pd
import torch
import joblib
import numpy as np
import math
from torch import nn
from torchviz import make_dot
from torchview import draw_graph
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

In [186]:
df = pd.read_csv('diabetes_data.csv')
df.dropna(inplace=True)
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [187]:
#Count the binary values in the 'Outcome' column
outcome_counts = df['Outcome'].value_counts()

#Print the counts
print(outcome_counts)

#Insight: Unbalanced dataset

Outcome
0    500
1    268
Name: count, dtype: int64


In [188]:
x = df.drop(columns=['Outcome'])
y = df['Outcome']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)

x_train_tensor = torch.tensor(x_train.values).to(torch.float32)
x_test_tensor = torch.tensor(x_test.values).to(torch.float32)

y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

In [189]:
class NeuralNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    #input layer
    self.layer1 = nn.Linear(8,70)
    self.relu1 = nn.ReLU()

    self.layer2 = nn.Linear(70,70)
    self.relu2 = nn.ReLU()

    self.layer3 = nn.Linear(70,1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    x = self.layer1(x)
    x = self.relu1(x)

    x = self.layer2(x)
    x = self.relu2(x)

    x = self.layer3(x)
    pred = self.sigmoid(x)
    return pred #returns prediction

In [190]:
model = NeuralNetwork()
loss_fn = nn.BCELoss()

#BACK PROPAGATION, optimizers (gradient descent optimizer)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)# parameters that will be updated

In [191]:
#set the model into training mode
model.train()

for epoch in range(100):
  for i in range(0, len(x_train), 5):
    #Calculate how far from true value
    pred = model(x_train_tensor[i:i+1])
    loss = loss_fn(pred, y_train_tensor[i:i+1]) #generate a loss, error score

    #Update the weights
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

  print(f"Epoch {epoch+1}, Loss: {loss.item()}")

Epoch 1, Loss: 0.17084036767482758
Epoch 2, Loss: 0.3622669279575348
Epoch 3, Loss: 0.4232748746871948
Epoch 4, Loss: 0.4780923128128052
Epoch 5, Loss: 0.43978846073150635
Epoch 6, Loss: 0.46493294835090637
Epoch 7, Loss: 0.4467869997024536
Epoch 8, Loss: 0.44705966114997864
Epoch 9, Loss: 0.4557293653488159
Epoch 10, Loss: 0.4539615511894226
Epoch 11, Loss: 0.44533100724220276
Epoch 12, Loss: 0.4411555826663971
Epoch 13, Loss: 0.4803052544593811
Epoch 14, Loss: 0.46577396988868713
Epoch 15, Loss: 0.4624632000923157
Epoch 16, Loss: 0.45768725872039795
Epoch 17, Loss: 0.4015481472015381
Epoch 18, Loss: 0.3752038776874542
Epoch 19, Loss: 0.34946680068969727
Epoch 20, Loss: 0.3827776610851288
Epoch 21, Loss: 0.3640471398830414
Epoch 22, Loss: 0.3641899824142456
Epoch 23, Loss: 0.4068203270435333
Epoch 24, Loss: 0.33815616369247437
Epoch 25, Loss: 0.35540857911109924
Epoch 26, Loss: 0.35383713245391846
Epoch 27, Loss: 0.33001846075057983
Epoch 28, Loss: 0.3345237076282501
Epoch 29, Loss: 0

In [192]:
model.eval()  #Set model to evaluation mode
predictions = []

for i in range(len(x_test)):
    with torch.no_grad():
        pred = model(x_test_tensor[i]).round()
        predictions.append(pred.item())  # Append the prediction as a scalar

# Convert predictions to a NumPy array
y_pred = np.array(predictions)
y_true = y_test_tensor.numpy().flatten()  # Convert y_test_tensor to NumPy array

In [193]:
acc_nn1 = accuracy_score(y_true, y_pred)
precision_nn1 = precision_score(y_true, y_pred)
recall_nn1 = recall_score(y_true, y_pred)
f1_score_nn1 = f1_score(y_true, y_pred)

# Print results
print(f'Accuracy: {acc_nn1}')
print(f'Precision: {precision_nn1}')
print(f'Recall: {recall_nn1}')
print(f"F1-score: {f1_score_nn1}")

Accuracy: 0.6406926406926406
Precision: 0.5217391304347826
Recall: 0.2823529411764706
F1-score: 0.366412213740458


In [194]:
torch.save(model.state_dict(), 'classicalnn-model.pth')

In [195]:
# Create a sample input tensor that matches your input shape
sample_input = torch.randn(1, 8)  # batch size = 1, features = 8

# Pass the sample input through the model to get the computation graph
output = model(sample_input)

# Create and render the visualization
dot = make_dot(output, params=dict(model.named_parameters()))
dot.format = 'png'
dot.render('neural_network_architecture')

'neural_network_architecture.png'

In [196]:
logistic_model = LogisticRegression()
logistic_model.fit(x_train, y_train)
logistic_predicted = logistic_model.predict(x_test)
print(f"This is the model's coef_ {logistic_model.coef_}.\nThis is the model's intercepts {logistic_model.intercept_}.\n These are the model's predictions {logistic_predicted}.")

This is the model's coef_ [[ 0.10107846  0.03357831 -0.01570481 -0.00135947 -0.00069553  0.08937806
   0.55938328  0.01707619]].
This is the model's intercepts [-7.86410566].
 These are the model's predictions [0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 0
 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 1 1 1 1 1 0
 1 0 1 0 0 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 1 0 0 1 1 0 0 0
 0 0 0 1 0 0 1 1 0 0 0 0 1 1 0 0 0 0 0 1 1 1 1 1 0 0 1 0 1 0 0 0 0 1 0 1 0
 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 1 0].


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
