project requires python version above than 3.7

In [14]:
import sys
assert sys.version_info >= (3, 7)

In [15]:
IS_COLAB = 'google.colab' in sys.modules

# Get the Data
*In this project you task is to predict the heart patient whether their is chances that he will be cancer patient or not*

## Download the Data

In [16]:
from pathlib import Path
import pandas as pd
import urllib.request


def load_heart_data():
  csv_file_path = Path('datasets/heart.csv')
  if not csv_file_path.is_file():
    Path('datasets').mkdir(parents=True, exist_ok=True)
    # Changed URL to raw content and removed tarfile operations as it's a CSV file
    url = "https://raw.githubusercontent.com/khalidkhankakar/Hands-on-Machine-Learning/master/datasets/heart.csv"
    urllib.request.urlretrieve(url, csv_file_path)
  return pd.read_csv(csv_file_path)

heart_data = load_heart_data()

In [17]:
heart_data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


## Split the dataset

In [18]:
X = heart_data.drop('target', axis=1)
y = heart_data['target'].copy()

In [19]:
from sklearn.model_selection import train_test_split

X_train_full, X_test, y_train_full, y_test, = train_test_split(X, y, test_size=0.30, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)


# Because we can't covert dataframe into tensor directly
X_train, X_valid, X_test, y_train, y_valid, y_test = X_train.values, X_valid.values, X_test.values, y_train.values, y_valid.values, y_test.values

## Scale the data

In [20]:
# Method One: By us StandardScaler
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.fit_transform(X_valid)
X_test = scaler.fit_transform(X_test)

In [21]:
# Method Two: Calculate the Manual method

means = X_train.mean(axis=0, keepdims=True)
stds = X_train.std(axis=0, keepdims=True)

X_train = (X_train - means) / stds
X_valid = (X_valid - means) / stds
X_test = (X_test - means) / stds

In [22]:
y_train = torch.FloatTensor(y_train.reshape(-1, 1))
y_valid = torch.FloatTensor(y_valid.reshape(-1, 1))
x_test = torch.FloatTensor(X_test)

## Convert the data into pytorch tensor

In [23]:
import torch
X_train = torch.FloatTensor(X_train)
X_valid = torch.FloatTensor(X_valid)
X_test = torch.FloatTensor(X_test)

# Make the Model

In [24]:
torch.manual_seed(42)
n_features = X_train.shape[1]
weights = torch.randn(n_features, 1, requires_grad=True)
bias = torch.tensor(0., requires_grad=True)

In [35]:
learning_rate = 0.1
n_epochs = 20

## 1. Batch Gradient Descent

In [36]:
for epoch in range(n_epochs):
  # Convert X_train numpy array to a PyTorch tensor for the operation
  X_train_tensor = torch.FloatTensor(X_train)
  y_pred = X_train_tensor @ weights + bias
  loss = ((y_pred - y_train)**2).mean()
  loss.backward()
  with torch.no_grad():
    bias -= learning_rate * bias.grad
    weights -= learning_rate * weights.grad

    bias.grad.zero_()
    weights.grad.zero_()

    print(f"Epoch: {epoch+1}/{n_epochs}, Loss: {loss.item()} ")

Epoch: 1/20, Loss: 0.11696784943342209 
Epoch: 2/20, Loss: 0.11696784943342209 
Epoch: 3/20, Loss: 0.11696784943342209 
Epoch: 4/20, Loss: 0.11696784943342209 
Epoch: 5/20, Loss: 0.11696784943342209 
Epoch: 6/20, Loss: 0.11696784943342209 
Epoch: 7/20, Loss: 0.11696784943342209 
Epoch: 8/20, Loss: 0.11696784943342209 
Epoch: 9/20, Loss: 0.11696784943342209 
Epoch: 10/20, Loss: 0.11696784943342209 
Epoch: 11/20, Loss: 0.11696784943342209 
Epoch: 12/20, Loss: 0.11696784943342209 
Epoch: 13/20, Loss: 0.11696784943342209 
Epoch: 14/20, Loss: 0.11696784943342209 
Epoch: 15/20, Loss: 0.11696784943342209 
Epoch: 16/20, Loss: 0.11696784943342209 
Epoch: 17/20, Loss: 0.11696784943342209 
Epoch: 18/20, Loss: 0.11696784943342209 
Epoch: 19/20, Loss: 0.11696784943342209 
Epoch: 20/20, Loss: 0.11696784943342209 


In [37]:
X_new = X_test[:3]
with torch.no_grad():
  y_pred = X_new @ weights + bias
y_pred

tensor([[0.9657],
        [1.2330],
        [0.0435]])

## 2. Linear Regression by Using Pytorch's High Level API

In [38]:
import torch.nn as nn

torch.manual_seed(42)
model = nn.Linear(in_features=n_features, out_features=1)

In [39]:
model.bias

Parameter containing:
tensor([0.0376], requires_grad=True)

In [40]:
model.weight

Parameter containing:
tensor([[ 0.2120,  0.2302, -0.0650,  0.2548, -0.0608,  0.0560, -0.1350,  0.1629,
          0.2445, -0.2035,  0.2411,  0.0519,  0.2049]], requires_grad=True)

In [44]:
# call the model as regular function
model(X_train[:2])

tensor([[ 0.7794],
        [-0.1022]], grad_fn=<AddmmBackward0>)

In [46]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
mse=nn.MSELoss()


In [48]:
def train_model(model, optimizer, criterion, X_train, y_train, n_epochs):
  for epoch in range(n_epochs):
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    print(f"Epoch: {epoch+1}/{n_epochs}, Loss: {loss.item()} ")

In [49]:
train_model(model, optimizer, mse, X_train, y_train, n_epochs)

Epoch: 1/20, Loss: 1.022965431213379 
Epoch: 2/20, Loss: 0.585238516330719 
Epoch: 3/20, Loss: 0.3887144923210144 
Epoch: 4/20, Loss: 0.28379663825035095 
Epoch: 5/20, Loss: 0.22250905632972717 
Epoch: 6/20, Loss: 0.18511903285980225 
Epoch: 7/20, Loss: 0.16175442934036255 
Epoch: 8/20, Loss: 0.14690309762954712 
Epoch: 9/20, Loss: 0.1373208463191986 
Epoch: 10/20, Loss: 0.1310473382472992 
Epoch: 11/20, Loss: 0.12687864899635315 
Epoch: 12/20, Loss: 0.12406615912914276 
Epoch: 13/20, Loss: 0.12213889509439468 
Epoch: 14/20, Loss: 0.12079715728759766 
Epoch: 15/20, Loss: 0.11984807252883911 
Epoch: 16/20, Loss: 0.11916600167751312 
Epoch: 17/20, Loss: 0.11866811662912369 
Epoch: 18/20, Loss: 0.11829913407564163 
Epoch: 19/20, Loss: 0.11802171915769577 
Epoch: 20/20, Loss: 0.11781022697687149 


In [50]:
X_new = X_test[:3]
with torch.no_grad():
  y_pred = model(X_new)
y_pred

tensor([[0.9915],
        [1.1757],
        [0.0670]])

# Regression MLP

In [52]:
torch.manual_seed(42)

model = nn.Sequential(
    nn.Linear(in_features=n_features, out_features=50),
    nn.ReLU(),
    nn.Linear(in_features=50, out_features=40),
    nn.ReLU(),
    nn.Linear(in_features=40, out_features=1)
)


In [53]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
mse= nn.MSELoss()
train_model(model, optimizer, mse, X_train, y_train, n_epochs)

Epoch: 1/20, Loss: 0.4106868803501129 
Epoch: 2/20, Loss: 0.2948867678642273 
Epoch: 3/20, Loss: 0.2617495656013489 
Epoch: 4/20, Loss: 0.24497760832309723 
Epoch: 5/20, Loss: 0.2320156842470169 
Epoch: 6/20, Loss: 0.2206326723098755 
Epoch: 7/20, Loss: 0.21040114760398865 
Epoch: 8/20, Loss: 0.20109766721725464 
Epoch: 9/20, Loss: 0.19261257350444794 
Epoch: 10/20, Loss: 0.1849159300327301 
Epoch: 11/20, Loss: 0.17792922258377075 
Epoch: 12/20, Loss: 0.17156437039375305 
Epoch: 13/20, Loss: 0.16575969755649567 
Epoch: 14/20, Loss: 0.1604708731174469 
Epoch: 15/20, Loss: 0.15568207204341888 
Epoch: 16/20, Loss: 0.15137283504009247 
Epoch: 17/20, Loss: 0.1474539190530777 
Epoch: 18/20, Loss: 0.14393696188926697 
Epoch: 19/20, Loss: 0.14080950617790222 
Epoch: 20/20, Loss: 0.13802023231983185 


In [54]:
X_new = X_test[:3]
with torch.no_grad():
  y_pred = model(X_new)
y_pred

tensor([[0.7224],
        [1.1342],
        [0.1511]])

In [55]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [56]:
from torch.utils.data import DataLoader, TensorDataset

train_dataset = TensorDataset(X_train, y_train)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [57]:
torch.manual_seed(42)

model = nn.Sequential(
    nn.Linear(in_features=n_features, out_features=50),
    nn.ReLU(),
    nn.Linear(in_features=50, out_features=40),
    nn.ReLU(),
    nn.Linear(in_features=40, out_features=1)
)

model = model.to(device)

In [65]:
def train_model2(model, optimizer, criterion, train_loader, n_epochs):
  model.train()
  for epoch in range(n_epochs):

    total_loss = 0

    for X_batch, y_batch in train_loader:
      X_batch, y_batch = X_batch.to(device), y_batch.to(device)

      y_pred = model(X_batch)
      loss = criterion(y_pred, y_batch)

      total_loss += loss.item()

      loss.backward()
      optimizer.step()
      optimizer.zero_grad()

      mean_loss = total_loss / len(train_loader)
      print(f"Epoch: {epoch+1}/{n_epochs}, Loss: {mean_loss} ")


In [64]:
train_model2(model, optimizer, mse, train_loader, 5)

Epoch: 1/5, Loss: 0.02627120473805596 
Epoch: 1/5, Loss: 0.05256612686549916 
Epoch: 1/5, Loss: 0.07932507816482992 
Epoch: 1/5, Loss: 0.10632681320695316 
Epoch: 1/5, Loss: 0.12525283939698162 
Epoch: 1/5, Loss: 0.14897404172841242 
Epoch: 1/5, Loss: 0.17175238623338587 
Epoch: 1/5, Loss: 0.19264672608936534 
Epoch: 1/5, Loss: 0.2149865539634929 
Epoch: 1/5, Loss: 0.24139195855926066 
Epoch: 1/5, Loss: 0.2648530321962693 
Epoch: 1/5, Loss: 0.2869996779105243 
Epoch: 1/5, Loss: 0.3073947342003093 
Epoch: 1/5, Loss: 0.3378427151371451 
Epoch: 1/5, Loss: 0.3609980046749115 
Epoch: 1/5, Loss: 0.38806407591875863 
Epoch: 1/5, Loss: 0.410257009898915 
Epoch: 2/5, Loss: 0.018753774025860953 
Epoch: 2/5, Loss: 0.04730726690853343 
Epoch: 2/5, Loss: 0.07600597073050107 
Epoch: 2/5, Loss: 0.09808761933270622 
Epoch: 2/5, Loss: 0.12169830238117892 
Epoch: 2/5, Loss: 0.14191489710527308 
Epoch: 2/5, Loss: 0.15768299733891206 
Epoch: 2/5, Loss: 0.1827797258601469 
Epoch: 2/5, Loss: 0.2095858717665

## Model Evaluation

In [66]:
if IS_COLAB:
  %pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.8.2-py3-none-any.whl.metadata (22 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.15.2-py3-none-any.whl.metadata (5.7 kB)
Downloading torchmetrics-1.8.2-py3-none-any.whl (983 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.15.2-py3-none-any.whl (29 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.15.2 torchmetrics-1.8.2


In [68]:
import torchmetrics
def evaluate_model(mode, data_loader, metric):
  model.eval()
  metric.reset()

  with torch.no_grad():
    for X_batch, y_batch in data_loader:
      X_batch, y_batch = X_batch.to(device), y_batch.to(device)
      y_pred = model(X_batch)
      metric.update(y_pred, y_batch)

  return metric.compute()


In [71]:
rmse = torchmetrics.MeanSquaredError(squared=False).to(device)
valid_dataset = TensorDataset(X_valid, y_valid)
valid_loader = DataLoader(valid_dataset, batch_size=32)
evaluate_model(model, train_loader, rmse)

tensor(0.6408, device='cuda:0')