In [None]:
import sklearn
from sklearn.datasets import make_circles
import pandas as pd
import matplotlib.pyplot as plt
import torch

In [None]:
n_samples = 1000

# create circle
X, y = make_circles(n_samples=n_samples, noise=0.03, random_state=42)

print(f'First 5 samples of X: \n {X[:5]}')
print(f'First 5 samples of y: \n {y[:5]}')

In [None]:
circles = pd.DataFrame({ "X1": X[:, 0], "X2": X[:, 1], "label": y })
circles.head(10)

visualize

In [None]:
plt.scatter(x=X[:, 0], y=X[:, 1], c=y, cmap=plt.cm.RdYlBu)

In [None]:
# 1.1 Check input and output shapes

X_sample = X[0]
y_sample = y[0]

print(f'Values for one sample of X: {X_sample} and the y: {y_sample}')
print(f"Shapes for one sample of X: {X_sample.shape} and y: {y.shape}")

In [None]:
X_torch = torch.from_numpy(X).type(torch.float64)
y_torch = torch.from_numpy(y).type(torch.float64)

type(X_torch), X_torch.dtype, y_torch.dtype

train and test split

In [None]:
torch.manual_seed(42)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_torch, y_torch, test_size=0.2, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)


In [None]:
len(X_train), len(X_test), len(y_train), len(y_test)

type(X_test)

# 2.0 Building a model
1. setup device agnostic code for gpu/cpu
2. construct model by subclassing (nn.module)
3. Define loss function and optimizers
3. create train and test loop

In [None]:
from torch import nn

# make device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# in_feature= 2  (2, )
X_train.shape

# out_features= 1
y_train.shape

In [None]:
# creating model
# 1. subclass nn.module
# 2. 'nn.Linear()' layers that are capable of handling the shapes of our data
# 3. define forward() methods that outlines the forward computation of the model
# 4. instantiate instance of our model class and send it to the target device

class CircleModelV0(nn.Module):
    def __init__(self):
        super().__init__()
        # 2.
        self.layer_1 = nn.Linear(in_features=2, out_features=5) # take 2 features and upscales to 5 features
        # takes in 5 features and outputs 1 single features same shape as y
        self.layer_2 = nn.Linear(in_features=5, out_features=1)
    # 3.
    def forward(self, x):
        return self.layer_2(self.layer_1(x.double())) # x -> layer_1 -> layer_2


# 4. Instantiate the model class and send it to the target device
model_0 = CircleModelV0().to(device)
model_0

In [None]:
print(f'Device: {device}')
next(model_0.parameters()).device

In [None]:
# Lets replicate the model above using nn.Sequential()

model_0 = nn.Sequential(
    nn.Linear(in_features=2, out_features=5),
    nn.Linear(in_features=5, out_features=1)
).to(device)
    
model_0

In [None]:
# model predictions
model_0.state_dict()

In [None]:
untrained_preds = model_0(X_test.to(device))
print(f'Length of predictions: {len(untrained_preds)}, shape: {untrained_preds.shape}')
print(f'Length of test samples: {len(X_test)}, shape: {X_test.shape}')
print(f'\nFirst 10 predictions: \n {torch.round(untrained_preds[:10])}')
print(f'\nFirst 10 labels: \n {y_test[:10]}')

In [None]:
X_test[:10], y_test[:10]

## 2.1 setup loss function and optimizer

which optimizer or loass we should use? it problem specific
1. regression -> MAE or MSE
2. classification -> cross entropy / binary cross entropy

it measures the how wrong the predictions are

Optimizers -> SGD / ADAM

In [None]:
# 2.1 setup loss

loss_fn = nn.BCEWithLogitsLoss() # sigmoid activation built in

# optimize the model parameter as the model weights in such a way that loss is reduced 
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.1)

In [None]:
# calculate accuracy - out of 100 examples, what percentage does our model get

def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc


## train model

build training loop

1. forward pass 
2. calculate the loss
3. optimizer zero grad
4. Loss backward (backpropagation)
5. Optimizer step (gradient descent)

In [None]:
# 3.1 raw logits -> prediction probabilities -> prediction labels
# logits -> raw outputs of a models is called logits

# i. convert logits into prediction probabilities by passing them to some 
# kind of activation function

# ii. convert the prediction probabilities to prediction labels by either 
# rounding them or taking argmax()

# nb: view the first 5 outputs of the forward pass on the test data

model_0.eval()
with torch.inference_mode():
    y_logits = model_0(X_test.to(device))[:5]

y_logits

In [None]:
# use sigmoid activation to our model logits
# convert the predictions into predicted labels
y_preds_probs = torch.sigmoid(y_logits)

y_preds_probs

For our predictions probability values, we need to perform a range-style rounding on them:

`predictions >= 0.5 , y = 1 (class 1)`
`prediction < 0.5, y = 0 (class 0)`

In [None]:
# the predicted labels

torch.round(y_preds_probs)

In [None]:
# find the predicted labels
y_preds = torch.round(y_preds_probs)

# in full (logits -> pred probs -> pred labels)
y_pred_labels = torch.round(torch.sigmoid(model_0(X_test.to(device=device))[:5]))

# check for equality
print(torch.eq(y_preds.squeeze(), y_pred_labels.squeeze()))

# get rid of extra dimension
y_preds.squeeze()

In [None]:
# 3.1 building training and test loop

torch.manual_seed(42)
torch.cuda.manual_seed(42)

# set epochs
epochs = 100

X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

# building the training and evaluation loop
for epoch in range(epochs):
    # train
    model_0.train()

    # 1. forward pass
    y_logits = model_0(X_train).squeeze()
    y_preds = torch.round(torch.sigmoid(y_logits)) # turn logits -> pred probs -> pred labels

    # 2. calculate loss/accuracy
    loss = loss_fn(y_logits, # bceWithLogits expects raw logits
                   y_train)
    acc = accuracy_fn(y_true=y_train, y_pred=y_preds)

    # 3. optimizer zero grad
    optimizer.zero_grad()

    # 4. loss backward (backpropagation)
    loss.backward()

    # 5. optimizer step
    optimizer.step()

    # Testing
    model_0.eval()
    with torch.inference_mode():
        # 1. forward pass
        test_logits = model_0(X_test).squeeze()
        test_preds = torch.round(torch.sigmoid(test_logits))

        # 2. calculate test loss / acc
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_fn(y_true=y_test, y_pred=test_preds)
    
    # print what happens
    if epoch % 10 == 0:
        print(f'Epoch: {epoch} | Loss: {loss:.5f} | Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%')



### 4. make predictions and evaluate the model

we import a function called `plot_decision_boundary()`


In [None]:
import requests
from pathlib import Path

# download helper function

if Path('helper_funcitons.py').is_file():
    print('helper_funtions.py exists')
else:
    print('Downloading helper_functions.py')
    request = requests.get('https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py')
    with open('helper_functions.py', 'wb') as f:
        f.write(request.content)

from helper_functions import plot_predictions, plot_decision_boundary

In [None]:
# plot decision boundary of the model
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title('Train')
plot_decision_boundary(model_0, X_train, y_train)

plt.subplot(1, 2, 2)
plt.title('Test')
plot_decision_boundary(model_0, X_test, y_test)


#### 5. Improving the model (model perspective)

1. Add more layers - give the model more chances to learn about pattersn in the data
2. add more hidden units - go for 5 to 10
3. fit for longer
4. change the activate functions
5. change the learning rate
6. change the loss function

In [None]:
class CircleModelV1(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_1 = nn.Linear(in_features=2, out_features=10)
        self.layer_2 = nn.Linear(in_features=10, out_features=10)
        self.layer_3 = nn.Linear(in_features=10, out_features=1)

    def forward(self, x):
        # z = self.layer_1(x)
        # z = self.layer_2(z)
        # z = self.layer_3(z)

        return self.layer_3(self.layer_2(self.layer_1(x)))
    

model_1 = CircleModelV1().to(device)
model_1

In [None]:
# create loss function
loss_fn = nn.BCEWithLogitsLoss()

# create optimizer
optimizer = torch.optim.SGD(params=model_1.parameters(), lr=0.1)

In [None]:
# write training and evaluation loop
torch.manual_seed(42)
torch.cuda.manual_seed(42)

epochs = 1000

# put data on targe device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

for epoch in range(epochs):
    # train
    model_1.train()
    # 1. forward pass
    y_logits = model_1(X_train).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits)) # logits -> pred probability -> pred labels

    # 2. calculate loss
    loss = loss_fn(y_logits, y_train)
    acc = accuracy_fn(y_true=y_train, y_pred=y_pred)

    # 3. optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss forward (backpropagation)
    loss.backward()

    # 5. optimizer step
    optimizer.step()

    ### testing 
    model_1.eval()
    with torch.inference_mode():
        # 1. forward pass
        test_logits = model_1(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))

        # 2. create loss
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_fn(y_true=y_test, y_pred=test_pred)

    # status
    if epoch % 100 == 0:
        print(f'Epoch: {epoch} | Loss: {loss:.5f} | Accuracy: {acc:.2f}% | Test Loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%')


In [None]:
# plot decision boundary
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title('Train')
plot_decision_boundary(model_1, X_train, y_train)

plt.subplot(1, 2, 2)
plt.title('Test')
plot_decision_boundary(model_1, X_test, y_test)

In [None]:
# 5.1 preparing data to see if model can fit a stright line
# creating data

weight = 0.7
bias = 0.3
start = 0
end = 1
step = 0.01

X_regression = torch.arange(start, end, step).unsqueeze(dim=1)
y_regression = weight * X_regression + bias

# create train and test split
train_split = int(0.8 * len(X_regression))
X_train_regression, y_train_regression = X_regression[:train_split], y_regression[:train_split]
X_test_regression, y_test_regression = X_regression[train_split:], y_regression[train_split:]

len(X_train_regression), len(X_test_regression), len(y_train_regression), len(y_test_regression)

In [None]:
plot_predictions(train_data=X_train_regression, 
                 train_labels=y_train_regression,
                 test_data=X_test_regression,
                 test_labels=y_test_regression)

In [None]:
# 5.2 adjust model_1 to fit a stright line
# same architecture as model_1 

model_2 = nn.Sequential(
    nn.Linear(in_features=1, out_features=10),
    nn.Linear(in_features=10, out_features=10),
    nn.Linear(in_features=10, out_features=1)
).to(device)

# loss and optimizer
loss_fn = nn.L1Loss()
optimizer = torch.optim.SGD(params=model_2.parameters(),
                            lr=0.01)

# train the model
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# epochs
epochs = 1000

# put the data on target device
X_train_regression, y_train_regression = X_train_regression.to(device), y_train_regression.to(device)
X_test_regression, y_test_regression = X_test_regression.to(device), y_test_regression.to(device)

# Training
for epoch in range(epochs):
    y_pred = model_2(X_train_regression)
    loss = loss_fn(y_pred, y_train_regression)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # testing
    model_2.eval()
    with torch.inference_mode():
        test_pred = model_2(X_test_regression)
        test_loss = loss_fn(test_pred, y_test_regression)
        optimizer.zero_grad()
        optimizer.step()

    if epoch % 100 == 0:
        print(f'Epoch: {epoch} | Loss: {loss:.5f} | Test Loss: {test_loss:.5f}')

In [None]:
# turn on evaluation model

model_2.eval()

# make predictions
with torch.inference_mode():
    y_preds = model_2(X_test_regression)

# plot data and predictions
plot_predictions(train_data=X_train_regression,
                 train_labels=y_train_regression,
                 test_data=X_test_regression,
                 test_labels=y_test_regression,
                 predictions=y_preds)

#### THe missing piece: non-linearity
what patterns could you draw if you were given an infinite amount of a stright and non-stright lines?


In [None]:
# 6.1 recreating non-linear data(red, blue) circles
import matplotlib.pyplot as plt
from sklearn.datasets import make_circles

n_samples = 1000

X, y = make_circles(n_samples=n_samples, noise=0.03, random_state=42)

plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdYlBu)
plt.show()

In [None]:
# convert data to tensors
import torch
from sklearn.model_selection import train_test_split

X = torch.from_numpy(X).type(torch.float16)
y = torch.from_numpy(y).type(torch.float16)

# train test splits
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float16, device=device)
X_test = torch.tensor(X_test, dtype=torch.float16, device=device)

y_train = torch.tensor(y_train, dtype=torch.float16, device=device)
y_test = torch.tensor(y_test, dtype=torch.float16, device=device)

X_train[:5]

In [None]:
# 6.2 - building a model with non linearity

class CircleModelV2(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_1 = nn.Linear(in_features=2, out_features=10)
        self.layer_2 = nn.Linear(in_features=10, out_features=10)
        self.layer_3 = nn.Linear(in_features=10, out_features=1)
        self.relu = nn.ReLU() # non-linear activation

    def forward(self, x):
        # where we should put our non-linear function
        return self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(x)))))
    
model_3 = CircleModelV2().to(device)

In [None]:
# setup loss and optimizer

loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(params=model_3.parameters(), lr=0.1)

# spam or not spam
# credit cards = fraud or not 
# insurance claims = at fault or not fault

# 6.3 train model with non linearity
# random seed
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# put all data on target device
X_train, y_train = X_train.float().to(device), y_train.float().to(device)
X_test, y_test = X_test.float().to(device), y_test.float().to(device)

epochs = 1000

for epoch in range(epochs):
    # training
    model_3.train()

    # 1. forward pass
    y_logits = model_3(X_train).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits)) # logits -> prediction probabilities -> prediction labels

    # 2. calculate the loss
    loss = loss_fn(y_logits, y_train)
    acc = accuracy_fn(y_true=y_train, y_pred=y_pred)

    # 3. optimizer zero grad
    optimizer.zero_grad()

    # 4. loss backward
    loss.backward()

    # 5. step
    optimizer.step()

    # testing
    model_3.eval()
    with torch.inference_mode():
        test_logits = model_3(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits)) # logits -> prediction probability -> prediction labels
        
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_fn(y_true=y_test, y_pred=test_pred)

    # print
    if epoch % 100 == 0:
        print(f"Epoch: {epoch} | Loss {loss:.5f} | Accuracy {acc:.2f} | Test loss {test_loss:.5f} | Test Accuracy {test_acc:.2f}% |")



In [None]:
# reset models parameters
def reset_models_weights(model):
    for layer in model.children():
        if hasattr(layer, 'reset_parameters'):
            layer.reset_parameters()

# model_3.state_dict()
# reset_models_weights(model_3)

Evaluating a model trained with non-linear activation function

In [None]:
model_3.eval()

with torch.inference_mode():
    y_logits = model_3(X_test).squeeze()
    y_preds = torch.round(torch.sigmoid(y_logits))

print(y_preds[:5])
print(y_test[:5])

In [None]:
# plot decision boundary
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title('Train')
plot_decision_boundary(model_1, X_train, y_train)

plt.subplot(1, 2, 2)
plt.title('Test')
plot_decision_boundary(model_3, X_test, y_test)

#### 7 - replicating non-linear activation function

Neural networks, rather than telling us the model what to learn, we give it the tools to discover patterns in data and it tries to figure out the patterns on its own.

And these tools are linear & non-linear functions


In [None]:
# create tensor
A = torch.arange(-10, 10, 1, dtype=torch.float32)

# plt.plot(A)
# plt.plot(torch.relu(A))
def relu(x):
    return torch.maximum(torch.tensor(0), x)

def sigmoid(x):
    return 1 / (1 + torch.exp(-x))

# plot ReLu activation function
plt.plot(relu(A))
plt.plot(sigmoid(A))