In [33]:
# Import torch and neural network library:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# import sklearn model_selection, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# import numpy, pandas, matplotlib, seaborn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sea
# Setup device either gpu or cpu

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device used: ', device)

Device used:  cpu


In [34]:
# read the dataset framingham.csv and display the first 5 rows.
data = pd.read_csv('./framingham.csv')

In [35]:
# display the shape, null values
# shape of data
data.head()

# null values
missing_values = data.isnull()
print(missing_values)


       male    age  education  currentSmoker  cigsPerDay  BPMeds  \
0     False  False      False          False       False   False   
1     False  False      False          False       False   False   
2     False  False      False          False       False   False   
3     False  False      False          False       False   False   
4     False  False      False          False       False   False   
...     ...    ...        ...            ...         ...     ...   
4235  False  False      False          False       False    True   
4236  False  False      False          False       False   False   
4237  False  False      False          False       False   False   
4238  False  False      False          False       False   False   
4239  False  False      False          False       False   False   

      prevalentStroke  prevalentHyp  diabetes  totChol  sysBP  diaBP    BMI  \
0               False         False     False    False  False  False  False   
1               False    

In [36]:
# Fill null values with either median or mean.
# using the mean:
data = data.fillna(data.mean())
data.head()

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.1,85.0,85.0,0


In [37]:
# get the features as X and the target column as y.
# get all the features except the target column
X = data.drop('TenYearCHD', axis=1)
# only the target column
y = data['TenYearCHD']  

In [38]:
# Split the data into train and test:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [39]:
# Scale the features X_train and X_test using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)
# Convert the X_train, X_test, y_train, y_test to torch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)


In [40]:
# Build your neural network
# Define a simple neural network (Linear Regression with 1 neuron)
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.linear = nn.Linear(15, 1) # One input, one output neuron
    def forward(self, x):
        return self.linear(x) # Forward pass (prediction)

In [41]:
# Instantiate the model, define the loss function and the optimizer
# Create model
model = SimpleNN()
# Define loss function (Mean Squared Error) for regression
criterion = nn.MSELoss() # BCELoss() for binary classification, CrossEntropyLoss() for classification.
# Define optimizer (Stochastic Gradient Descent)
optimizer = optim.SGD(model.parameters(), lr=0.01) # optim.Adam(model.parameters(), lr=0.01)

In [42]:
# Train the model
# Forward and Backward Propagation
for epoch in range(100): # Train for 100 iterations
    optimizer.zero_grad() # Reset gradients before new calculation
    # Forward pass
    y_pred = model(X_train_tensor) # Compute predictions
    # Compute loss
    train_loss = criterion(y_pred, y_train_tensor)
    # Backward pass (compute gradients)
    train_loss.backward() # Backpropagate error
    # Update weights
    optimizer.step() # Apply weight updates
    # Print loss every 20 epochs
    if epoch % 20 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

Epoch 0, Loss: 0.1304
Epoch 20, Loss: 0.1304
Epoch 40, Loss: 0.1304
Epoch 60, Loss: 0.1304
Epoch 80, Loss: 0.1304


In [43]:
# Evaluate the model
model.eval()
with torch.no_grad():
    train_output = model(X_train_tensor)
    test_output = model(X_test_tensor)
    train_loss = criterion(train_output, y_train_tensor)
    test_loss = criterion(test_output, y_test_tensor)
print("Train loss=", train_loss.item(), "test loss=", test_loss.item())


Train loss= 0.12657220661640167 test loss= 0.13057295978069305
