In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from mlxtend.preprocessing import minmax_scaling

import torch
from torch import nn
from torch.functional import F

print(torch.__version__)
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")

import wandb
wandb.login()


  from .autonotebook import tqdm as notebook_tqdm


1.12.1
Using mps device


[34m[1mwandb[0m: Currently logged in as: [33mmhrnciar[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [2]:
df = pd.read_csv('Data/cleaned.csv', index_col=0)
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148.0,72.0,35.0,169.5,33.6,0.627,50,1
1,1,85.0,66.0,29.0,102.5,26.6,0.351,31,0
2,8,183.0,64.0,32.0,169.5,23.3,0.672,32,1
3,1,89.0,66.0,23.0,94.0,28.1,0.167,21,0
4,0,137.0,40.0,35.0,168.0,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101.0,76.0,48.0,180.0,32.9,0.171,63,0
764,2,122.0,70.0,27.0,102.5,36.8,0.340,27,0
765,5,121.0,72.0,23.0,112.0,26.2,0.245,30,0
766,1,126.0,60.0,32.0,169.5,30.1,0.349,47,1


In [3]:
df[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']] = minmax_scaling(df,['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age'])
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,0.352941,0.670968,0.489796,0.304348,0.186899,0.314928,0.234415,0.483333,1
1,0.058824,0.264516,0.428571,0.239130,0.106370,0.171779,0.116567,0.166667,0
2,0.470588,0.896774,0.408163,0.271739,0.186899,0.104294,0.253629,0.183333,1
3,0.058824,0.290323,0.428571,0.173913,0.096154,0.202454,0.038002,0.000000,0
4,0.000000,0.600000,0.163265,0.304348,0.185096,0.509202,0.943638,0.200000,1
...,...,...,...,...,...,...,...,...,...
763,0.588235,0.367742,0.530612,0.445652,0.199519,0.300613,0.039710,0.700000,0
764,0.117647,0.503226,0.469388,0.217391,0.106370,0.380368,0.111870,0.100000,0
765,0.294118,0.496774,0.489796,0.173913,0.117788,0.163599,0.071307,0.150000,0
766,0.058824,0.529032,0.367347,0.271739,0.186899,0.243354,0.115713,0.433333,1


In [4]:
X, y = df.drop('Outcome', axis=1).values, df.Outcome.values

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, shuffle=True)

X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)

In [11]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_features=8, hidden1=20, hidden2=10, hidden3= 5, out_features=2):
        super().__init__()
        self.f_connected1 = nn.Linear(input_features, hidden1)
        self.f_connected2 = nn.Linear(hidden1, hidden2)
        self.f_connected3 = nn.Linear(hidden2, hidden3)
        self.out = nn.Linear(hidden3, out_features)

    def forward(self,x):
        x = F.relu(self.f_connected1(x))
        x = F.relu(self.f_connected2(x))
        x = F.relu(self.f_connected3(x))
        x = self.out(x)
        
        return x

model = NeuralNetwork()
print(model)

config = {'lr': 0.01, 'epochs': 500}

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])

run = wandb.init(project="basic-nn-torch", id="vivid-serenity-10", resume=True)
wandb.config.update(config)
wandb.watch(model)

NeuralNetwork(
  (f_connected1): Linear(in_features=8, out_features=20, bias=True)
  (f_connected2): Linear(in_features=20, out_features=10, bias=True)
  (f_connected3): Linear(in_features=10, out_features=5, bias=True)
  (out): Linear(in_features=5, out_features=2, bias=True)
)


[]

In [12]:
train_losses, val_losses = [], []

for i in range(config['epochs']):
    i += 1
    y_pred = model.forward(X_train)
    train_loss = loss_fn(y_pred, y_train)
    train_losses.append(train_loss)

    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()
    
    with torch.no_grad():
        y_pred = model(X_test)
        val_loss = loss_fn(y_pred, y_test)
        val_losses.append(val_loss)

        wandb.log({'training_loss': train_loss, 'validation_loss': val_loss}, commit=False)

        f1_none = f1_score(y_test, y_pred.argmax(1), average=None)
        f1_macro = f1_score(y_test, y_pred.argmax(1), average='macro')
        accuracy =  accuracy_score(y_test, y_pred.argmax(1))

        wandb.log({'f1_macro': f1_macro}, commit=False)

        f1_none = {'f1_none/' + str(e): v for e,v in enumerate(f1_none)}
        wandb.log(f1_none, commit=False)

        wandb.log({'accuracy': accuracy})

    if i % 10 == 0:
        print(f'Epoch {i}')
        print('-' * 25)
        print(f'Training loss: {train_loss}, validation loss: {val_loss}', end='\n\n')

Epoch 10
-------------------------
Training loss: 0.6310485601425171, validation loss: 0.6139008402824402

Epoch 20
-------------------------
Training loss: 0.5488553643226624, validation loss: 0.5247406363487244

Epoch 30
-------------------------
Training loss: 0.450631320476532, validation loss: 0.4262317717075348

Epoch 40
-------------------------
Training loss: 0.4329376220703125, validation loss: 0.41661137342453003

Epoch 50
-------------------------
Training loss: 0.4080664813518524, validation loss: 0.40236449241638184

Epoch 60
-------------------------
Training loss: 0.3904975652694702, validation loss: 0.39290910959243774

Epoch 70
-------------------------
Training loss: 0.3746865391731262, validation loss: 0.3917069733142853

Epoch 80
-------------------------
Training loss: 0.36757832765579224, validation loss: 0.3845846652984619

Epoch 90
-------------------------
Training loss: 0.36121049523353577, validation loss: 0.3777238726615906

Epoch 100
-----------------------

In [13]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_scatter(x=list(range(config['epochs'])), y=list(map(lambda x: x.item(), train_losses)), name='train')
fig.add_scatter(x=list(range(config['epochs'])), y=list(map(lambda x: x.item(), val_losses)), name='validation')
fig.update_layout(xaxis_title='Epoch', yaxis_title='Loss')

In [14]:
torch.save(model.state_dict(), "models/model.pth")
wandb.save('runs/pima_run_2023-03-22')
wandb.finish()

0,1
accuracy,▁▁▅▅▆▇▇▇▇▇▇████▇██████▇████▇▇▇▇▇▇▇▇▇█▇▇▇
f1_macro,▁▁▆▆▇▇▇██▇███████████████████████▇██████
f1_none/0,▁▁▄▄▅▆▆▇▆▆▇█▇▇▇▇▇██▇▇█▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
f1_none/1,▁▁▆▇▇███████████████████████████████████
training_loss,█▇▅▅▄▄▄▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
validation_loss,█▇▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▁▂▁▂▁▁▁▁▁▁

0,1
accuracy,0.88312
f1_macro,0.87054
f1_none/0,0.91089
f1_none/1,0.83019
training_loss,0.19919
validation_loss,0.29754


In [15]:
predictions=[]

with torch.no_grad():
    for i, data in enumerate(X_test):
        y_pred=model(data)
        predictions.append(y_pred.argmax().item())

score = accuracy_score(y_test, predictions)
score

0.8831168831168831