In [1]:
!pip install pandas




[notice] A new release of pip available: 22.3.1 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
!pip install scikit-learn




[notice] A new release of pip available: 22.3.1 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
# import the required libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.datasets import make_circles # it would generate two circles with two colors

### Prepare an Dataset

In [4]:
# create an dataset
# make 1000 samples
nSamples = 1000

# create circles
# random state => get the same values
X, y = make_circles(nSamples, noise=0.03, random_state=42)

In [5]:
print(f"First 5 X features: \n {X[:5]}")
print()
print(f"First 5 y features: \n {y[:5]}")

First 5 X features: 
 [[ 0.75424625  0.23148074]
 [-0.75615888  0.15325888]
 [-0.81539193  0.17328203]
 [-0.39373073  0.69288277]
 [ 0.44220765 -0.89672343]]

First 5 y features: 
 [1 1 1 1 0]


In [6]:
# convert the data into the dataframe
circles = pd.DataFrame({"X1": X[:, 0], "X2": X[:, 1], "label": y})

circles.head()

Unnamed: 0,X1,X2,label
0,0.754246,0.231481,1
1,-0.756159,0.153259,1
2,-0.815392,0.173282,1
3,-0.393731,0.692883,1
4,0.442208,-0.896723,0


#### It was an binary classification task (0, 1)

In [7]:
# check the each values in the each class
circles.label.value_counts()

label
1    500
0    500
Name: count, dtype: int64

##### Each class contains 500 values and the data set was balanced

### Visualize the data

In [None]:
# visualize the data
plt.scatter(x=X[:,0], y=X[:, 1], c=y, cmap=plt.cm.RdYlBu)

In [None]:
# find the shape of the inputs and the outputs
X.shape, y.shape

In [None]:
# view the example of features and labels
XSample = X[0]
ySample = y[0]

print(f"Values for one sample of X: {XSample} and the same for y: {ySample}")
print(f"Values for one sample of X: {XSample.shape} and the same for y: {ySample.shape}")

##### Note: The shape of X would be vector and y is scalar (One Dimension)

### Turn the data into the Tensors

In [None]:
# convert the numpy data into tensor data
X = torch.from_numpy(X).type(dtype=torch.float)
y = torch.from_numpy(y).type(dtype=torch.float)

In [None]:
# check the type of the data
type(X), type(y)

In [None]:
# view the tensor data
X[:5], y[:5]

### Split the data for training and testing

In [None]:
# use an split methodology using sklearn
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# check the length of the splitted data
len(X_train), len(X_test), len(y_train), len(y_test)

### Building an classification model

In [None]:
# make the model device agnostic
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
# create an model for the classification task
class CircleModel(nn.Module):
    def __init__(self):
        super().__init__()
        # input layer
        self.layer1 = nn.Linear(in_features=2, out_features=5) # 5 would be the hidden units or neurons
        self.layer2 = nn.Linear(in_features=5, out_features=1)
        
    def forward(self, x):
        return self.layer2(self.layer1(x)) # computation taken place in first layer and then the second layer


In [None]:
# create an object for the model instance
model = CircleModel().to(device)

model

In [None]:
# replicate the model using an sequential method (API)
model_1 = nn.Sequential(
    nn.Linear(in_features=2, out_features=5),
    nn.Linear(in_features=5, out_features=1)
).to(device)

model_1

In [None]:
# make predictions with the data
preds = model(X_test.to(device))
# vector data on the predictions
print(f"Length of predictions: {len(preds)}, Shape: {preds.shape}")
print(f"Length of the test samples: {len(y_test)}, Shape: {y_test.shape}")
print(f"\nFirst 10 predictions: \n {preds[:10]}")
print(f"\n First 10 test labels: \n {y_test[:10]}")

In [None]:
#preds

In [None]:
# setup loss function and optimizers
# loss function with sigmoid built in function
lossFn = nn.BCEWithLogitsLoss()

# stocastic gradient descent
optimizer = optim.SGD(params=model.parameters(), lr=0.1)

In [None]:
# create an evaluation metric
def accuracyFn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # claculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100
    return acc

In [None]:
# outputs of the forward pass
yLogits = model(X_test.to(device))[:5]
yLogits

In [None]:
# sigmoid activation function on the logist
yPred_logits = torch.sigmoid(yLogits)
yPred_logits

In [None]:
# find the predicted labels (round)
y_Preds = torch.round(yPred_logits)

y_pred_labels = torch.round(torch.sigmoid(model(X_test.to(device))[:5]))

# check the equality
print(torch.eq(y_Preds.squeeze(), y_pred_labels.squeeze()))

# remove the extra dimension
y_Preds.squeeze()

### Build a training loop

In [None]:
# set the reproducible seed
torch.manual_seed(42)

# hyperparameters
epochs = 100

# data to the gpu
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

print(f"Shape of the training data: {X_train.shape}, {y_train.shape}")

# build the training and evaluation loop
for epoch in range(epochs):
    # set the model into training mode
    model.train()
    
    # forward pass
    y_logits = model(X_train).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits)) # turn logits -> prob -> label
    
    # calculate the loss/accuracy
    # sigmoid function was built in the loss function
    loss = lossFn(y_logits, y_train)
    acc = accuracyFn(y_true=y_train, y_pred=y_pred)
    
    # set the optimizer zero grad
    optimizer.zero_grad()
    
    # set the loss backwards
    loss.backward()
    
    # optimizer step
    optimizer.step()
    
    # set the model into evaluation mode
    model.eval()
    with torch.inference_mode():
        # forward pass
        test_logits = model(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))
        
        # calculate the loss/accuracy
        test_loss = lossFn(test_logits, y_test)
        test_acc = accuracyFn(y_true=y_test, y_pred=test_pred)
        
    # print what's happening on the every step
    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")

Note: The model was guessing the ouput randomly

### Visualize the predictions

In [None]:
def plot_decision_boundary(model: torch.nn.Module, X: torch.Tensor, y: torch.Tensor):
    # Put everything to CPU (works better with NumPy + Matplotlib)
    model.to("cpu")
    X, y = X.to("cpu"), y.to("cpu")

    # Setup prediction boundaries and grid
    x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1
    y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 101), np.linspace(y_min, y_max, 101))

    # Make features
    X_to_pred_on = torch.from_numpy(np.column_stack((xx.ravel(), yy.ravel()))).float()

    # Make predictions
    model.eval()
    with torch.inference_mode():
        y_logits = model(X_to_pred_on)

    # Test for multi-class or binary and adjust logits to prediction labels
    if len(torch.unique(y)) > 2:
        y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1)  # mutli-class
    else:
        y_pred = torch.round(torch.sigmoid(y_logits))  # binary

    # Reshape preds and plot
    y_pred = y_pred.reshape(xx.shape).detach().numpy()
    plt.contourf(xx, yy, y_pred, cmap=plt.cm.RdYlBu, alpha=0.7)
    plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.RdYlBu)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())

In [None]:
def visualize(trainData = X_train, trainLabels = y_train, testData = X_test, testLabels = y_test, predictions = None):
    plt.figure(figsize = (8, 7))
    
    # plot training data in blue color
    plt.scatter(trainData, trainLabels, c="b", s=4, label="Training Data")
    
    # plot testing data in green
    plt.scatter(testData, testLabels, c="g", s=4, label="Testing Data")
    
    if predictions is not None:
        # predictions here print it in red color
        plt.scatter(testData, predictions, c="r", s=4, label="Predictions")
    
    # legend
    plt.legend(prop={"size": 14});

In [None]:
# plot the decision boundaries
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model, X_train, y_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model, X_test, y_test)

From the visualization the model undergoes underfitting (it would not able to learn the predictive pattern)

### Improve the model

In [None]:
# add additional layers
class CircleModelV1(nn.Module):
    def __init__(self):
        super().__init__()
        # input layer
        self.layer1 = nn.Linear(in_features=2, out_features=10) # 10 would be the hidden units or neurons
        self.layer2 = nn.Linear(in_features=10, out_features=10)
        self.layer3 = nn.Linear(in_features=10, out_features=1)
        
    def forward(self, x):
        return self.layer3(self.layer2(self.layer1(x))) # computation taken place in first layer and then the second layer


In [None]:
# create an object for the model version 1
model1 = CircleModelV1().to(device)
model1

In [None]:
# recreate an loss function and optimizer
loss_Fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model1.parameters(), lr=0.1)

In [None]:
# set the reproducible seed
torch.manual_seed(42)

# hyperparameters
epochs = 1000

# data to the gpu
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

print(f"Shape of the training data: {X_train.shape}, {y_train.shape}")

# build the training and evaluation loop
for epoch in range(epochs):
    # set the model into training mode
    model1.train()
    
    # forward pass
    y_logits = model1(X_train).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits)) # turn logits -> prob -> label
    
    # calculate the loss/accuracy
    # sigmoid function was built in the loss function
    loss = lossFn(y_logits, y_train)
    acc = accuracyFn(y_true=y_train, y_pred=y_pred)
    
    # set the optimizer zero grad
    optimizer.zero_grad()
    
    # set the loss backwards
    loss.backward()
    
    # optimizer step
    optimizer.step()
    
    # set the model into evaluation mode
    model1.eval()
    with torch.inference_mode():
        # forward pass
        test_logits = model1(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))
        
        # calculate the loss/accuracy
        test_loss = lossFn(test_logits, y_test)
        test_acc = accuracyFn(y_true=y_test, y_pred=test_pred)
        
    # print what's happening on the every step
    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")

Note: Still the model make predictions random guessing

In [None]:
# visualize the new model predictions
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model1, X_train, y_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model1, X_test, y_test)

### Build model with non linear activation function

In [None]:
# add relu activation function
class CircleModelV2(nn.Module):
    def __init__(self):
        super().__init__()
        # input layer
        self.layer1 = nn.Linear(in_features=2, out_features=10) # 10 would be the hidden units or neurons
        self.layer2 = nn.Linear(in_features=10, out_features=10)
        self.layer3 = nn.Linear(in_features=10, out_features=1)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        return self.layer3(self.relu(self.layer2(self.relu(self.layer1(x))))) # computation taken place in first layer and then the second layer


In [None]:
# create an instance for the model version 2
model2 = CircleModelV2().to(device)
print(model2)

In [None]:
# recreate an new loss and optimizer
lossFn = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model2.parameters(), lr=0.1) 

In [None]:
# train the model with non linear function
# set the reproducible seed
torch.manual_seed(42)

# hyperparameters
epochs = 1000

# data to the gpu
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

print(f"Shape of the training data: {X_train.shape}, {y_train.shape}")

# build the training and evaluation loop
for epoch in range(epochs):
    # set the model into training mode
    model2.train()
    
    # forward pass
    y_logits = model2(X_train).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits)) # turn logits -> prob -> label
    
    # calculate the loss/accuracy
    # sigmoid function was built in the loss function
    loss = lossFn(y_logits, y_train)
    acc = accuracyFn(y_true=y_train, y_pred=y_pred)
    
    # set the optimizer zero grad
    optimizer.zero_grad()
    
    # set the loss backwards
    loss.backward()
    
    # optimizer step
    optimizer.step()
    
    # set the model into evaluation mode
    model2.eval()
    with torch.inference_mode():
        # forward pass
        test_logits = model2(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))
        
        # calculate the loss/accuracy
        test_loss = lossFn(test_logits, y_test)
        test_acc = accuracyFn(y_true=y_test, y_pred=test_pred)
        
    # print what's happening on the every step
    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")

### Evaluate the model

In [None]:
# evaluate the model
model2.eval()
with torch.inference_mode():
    y_preds = torch.round(torch.sigmoid(model2(X_test))).squeeze()

y_preds[:10], y[:10]

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model1, X_train, y_train) # model1 = no non-linearity
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model2,X_test, y_test)

Note: Again and again modify the models hyperparameter to acheive the better accuracy

### Non linear activation function

In [None]:
# create an toy tensor
dataTensor = torch.arange(-10, 10, 1, dtype=torch.float32)
dataTensor  # vector data (One Dimensional)

In [None]:
# visualize the data
plt.plot(dataTensor)

In [None]:
# create an ReLU activation function
# it would convert the negative values into 0 and positive will be remains same
def relu(x):
    # input must be tensor
    return torch.maximum(torch.tensor(0), x)

In [None]:
# pass the toy tensor to the relu activation function
relu(dataTensor)

Note: ReLU would convert the negative values into zero

In [None]:
# visualize the dataTensor after the ReLU activation function
plt.plot(relu(dataTensor))

In [None]:
# create an sigmoid activation function
def sigmoid(x):
    return 1 / (1 + torch.exp(-x))

In [None]:
# test the sigmoid function
sigmoid(dataTensor)

In [None]:
# visualize the sigmoid function
plt.plot(sigmoid(dataTensor))

### Multi Class Classification

In [None]:
# import the dataset
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split

# create an dataset for the multi class classification task
# set the hyperparameters
NUM_CLASSES = 4
NUM_FEATURES = 2
RANDOM_SEED = 42

# create multi class data
X_blob, y_blob = make_blobs(n_samples=1000, n_features=NUM_FEATURES, centers=NUM_CLASSES, cluster_std=1.5, random_state=RANDOM_SEED)

In [None]:
# check the data
X_blob.shape, y_blob.shape

In [None]:
#y_blob

In [None]:
# turn the numpy data into tensors
X_blob = torch.from_numpy(X_blob).type(torch.float)
y_blob = torch.from_numpy(y_blob).type(torch.LongTensor)

print(X_blob[:5], y_blob[:5])

In [None]:
# split the data into training and testing data
Xb_train, Xb_test, yb_train, yb_test = train_test_split(X_blob, y_blob, test_size=0.2, random_state=RANDOM_SEED)

In [None]:
Xb_train.shape

In [None]:
# visualize the data
plt.figure(figsize=(10, 7))
plt.scatter(X_blob[:, 0], X_blob[:, 1], c=y_blob, cmap=plt.cm.RdYlBu)

### Build an model

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# create an model for the multi class classification task
class MultiClass(nn.Module):
    def __init__(self, input_features, output_features, hidden_units=8):
        super().__init__()
        self.linear_layer_stack = nn.Sequential(
            nn.Linear(in_features=input_features, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_features)
        )
        
    def forward(self, x):
        return self.linear_layer_stack(x)

In [None]:
# create an instance for the model
model3 = MultiClass(input_features=NUM_FEATURES, output_features=NUM_CLASSES, hidden_units=8).to(device)
model3

### Create an loss function and optimizer

In [None]:
loss_FN = nn.CrossEntropyLoss()
optimizer = optim.SGD(model3.parameters(), lr=0.01)

In [None]:
# perform an single forward pass
model3(Xb_train.to(device))[:5]

In [None]:
# check the shape
model3(Xb_train.to(device)).shape, NUM_CLASSES

In [None]:
# make an prediction logits
yb_logits = model3(Xb_test.to(device))

# perform the softmax activation function
yb_preds_probs = torch.softmax(yb_logits, dim=1)

print(yb_logits[:5])
print(yb_preds_probs[:5])

In [None]:
# sum the probs
torch.sum(yb_preds_probs[0])

In [None]:
# check the highest values indes
print(yb_preds_probs[0])
print(torch.argmax(yb_preds_probs[0]))

### Build the training and testing loop

In [None]:
# set an reproducible seed
torch.manual_seed(42)

# set the hyperparameters
epochs = 1000

# put the data into the GPU memory
Xb_train, yb_train = Xb_train.to(device), yb_train.to(device)
Xb_test, yb_test = Xb_test.to(device), yb_test.to(device)

for epoch in range(epochs):
    # set the model into training mode
    model3.train()
    
    # forward pass
    yb_logits = model3(Xb_train)
    yb_pred = torch.softmax(yb_logits, dim=1).argmax(dim=1)
    
    # set the loss and accuracy
    loss = loss_FN(yb_logits, yb_pred)
    acc = accuracyFn(y_true=yb_train, y_pred=yb_pred)
    
    # set the optimizer into zero grad
    optimizer.zero_grad()
    
    # loss backwards
    loss.backward()
    
    # optimizer step
    optimizer.step()
    
    # set the model into test mode
    model3.eval()
    with torch.inference_mode():
        # forward pass
        testLogits = model3(Xb_test)
        testPreds = torch.softmax(testLogits, dim=1).argmax(dim=1)
        
        # calculate the test loss and accuracy
        testLoss = loss_FN(testLogits, yb_test)
        testAcc = accuracyFn(y_true=yb_test, y_pred=testPreds)
        
    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Acc: {acc:.2f}% | Test Loss: {testLoss:.5f},Test Acc: {testAcc:.2f}%")

In [None]:
# Make predictions
model3.eval()
with torch.inference_mode():
    y_logits = model3(Xb_test)

# View the first 10 predictions
y_logits[:10]

In [None]:
# Turn predicted logits in prediction probabilities
y_pred_probs = torch.softmax(y_logits, dim=1)

# Turn prediction probabilities into prediction labels
y_preds = y_pred_probs.argmax(dim=1)

# Compare first 10 model preds and test labels
print(f"Predictions: {y_preds[:10]}\nLabels: {yb_test[:10]}")
print(f"Test accuracy: {accuracyFn(y_true=yb_test, y_pred=y_preds)}%")

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model3, Xb_train, yb_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model3, Xb_test, yb_test)