# **PyTorch Neural Network Classification**
## 1. Architecture of a classification neural network
> note: check the ```classification_note.md``` to check the overall architecture description of neural neural network

## 2. Make classification data and get it ready
Here, we can use make_circles() method from scikit-learn to generate two circles with different colored dots

In [None]:
from sklearn.datasets import make_circles

# make 2000 samples
n = 3000

# create circles
X, y = make_circles(n, noise=0.3, # a little bit of noise to the dots 
                    random_state=42) # keep random state with fix sample

In [None]:
# let's view the first 5 x and y values
print(f"First 6 X features: \n{X[:5]}")
print(f"First 5 y values: \n{y[:5]}")

In [None]:
# Make a Dataframe of circle data
import pandas as pd
circles = pd.DataFrame({"X1": X[:, 0], "X2": X[:, 1], "label": y})
circles.head()

In [None]:
circles.head(10)

In [None]:
# check different labels
circles.label.value_counts()

In [None]:
# let's plot the circles

import matplotlib.pyplot as plt
#matplotlib_inline
plt.scatter(x=X[:, 0], y=X[:, 1], c=y, cmap=plt.cm.RdYlBu)

#### 1.1 Input and output shape

In [None]:
# check the shape of our features and labels
X.shape, y.shape

In [None]:
# View the first example of features and labels
X_sample = X[0]
y_sample = y[0]

print(f"Values for on sample of X: {X_sample} and the same for y: {y_sample}")
print(f"shape for one sample of X: {X_sample.shape} and the same for y: {y_sample.shape}")

#### 1.2 Turn data into tensors and create train and test splits

In [None]:
# Turn data into tensors
# otherwise this cause issues with computations later on

import torch
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

# view the first five samples
X[:5], y[:5]

In [None]:
# split data into train and test
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, # 20% test, 80% train
                                                    random_state=42) # make the random split reproducible

In [None]:
# check the status
print(len(X_train), len(X_test), len(y_train), len(y_test))

## 3. Build Model

In [None]:
import torch
from torch import nn

# make device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
# 1. Construct a model class that subclasses nn.Module
class CircleModelv0(nn.Module):
    def __init__(self):
        super().__init__()
        # 2. Create 2 nn.Linear layers capable of handling X and y input shape
        self.layer_1 = nn.Linear(in_features=2, out_features=5) # takes 2 features, produce 5 features
        self.layer_2 = nn.Linear(in_features=5, out_features=1) # takes 5 features, produce 1 feature
    # 3. Define a forward method containing the forword pass computations
    def forward(self, x):
        # Return the output of layer_2, a single feature, the same shape as y
        self.layer_2(self.layer_1(x))

# 4. Create an instance of the model and send it to target device
model_0 = CircleModelv0().to(device)
model_0

In [None]:
model_1 = nn.Sequential(
    nn.Linear(in_features=2, out_features=5),
    nn.Linear(in_features=5, out_features=1)
).to(device)
model_1

In [None]:
# Make prediction with the model
untrained_preds = model_1(X_test.to(device))
print(f"Length of predictions: {len(untrained_preds)}, shape: {untrained_preds.shape}")
print(f"Length of test samples: {len(y_test)}, shape: {y_test.shape}")
print(f"\nLength of 10 predictions: \n{untrained_preds[:10]}")
print(f"\nFirst 10 test labels: \n{y_test[:10]}")

#### 3.1 Setup loss function and optimizer¶

In [None]:
# create a loss function
# loss_fn = nn.BCELoss() no sigmoid built-in

loss_fn = nn.BCEWithLogitsLoss() # sigmoid built-in

# create optimizer
optimizer = torch.optim.SGD(params=model_1.parameters(), lr=0.1)

In [None]:
# Model evaluation matrics and accuracy 
# calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_preds):
    correct = torch.eq(y_true, y_preds).sum().item() # torch.eq() calculate where two tensor a equal
    acc = (correct / len(y_preds)) * 100
    return acc

## 4. Train model
#### 4.1 Going from raw model outputs to predicted labels (logits -> prediction probabilities -> prediction labels)

In [None]:
# view the first 4 outputs of the forward pass on the test data
y_logits = model_1(X_test.to(device))[:5]
y_logits

In [None]:
# Use sigmoid on model logits
y_pred_probs = torch.sigmoid(y_logits)
y_pred_probs

In [None]:
# Find the predicted labels (round the prediction prpbabilities)
y_preds = torch.round(y_pred_probs)

# In full
y_pred_labels = torch.round(torch.sigmoid(model_1(X_test.to(device))[:5]))

# Check for equality
print(torch.eq(y_preds.squeeze(), y_pred_labels.squeeze()))

# git rid of extra dimension
y_preds.squeeze()

In [None]:
y_test[:5]

#### 4.2 Building a training and testing loop

In [None]:
torch.manual_seed(42)

# set the number of epochs
epochs = 100


# put data to target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

# Build the training and evaluation loop
for epoch in range(epochs):
    ### Trainig
    model_1.train()

    # 1. Forward pass (model output raw logits)
    y_logits = model_1(X_train).squeeze() # squeeze to remove extra `1`
    y_pred = torch.round(torch.sigmoid(y_logits)) # turn logits to prediction

    # 2. Calculate the loss function
    loss = loss_fn(y_logits, # usnig nn.BCEWithLogitsLoss works with raw logits
                   y_train)
    acc = accuracy_fn(y_true=y_train, y_preds=y_pred)

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backwards
    loss.backward()

    # 5. optimizer step
    optimizer.step()


    ### Testing
    model_1.eval()
    with torch.inference_mode():
        # 1. forward pass
        test_logits = model_1(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))

        # 2. Calcualate loss/accuarcy
        test_loss = loss_fn(test_logits, test_pred)
        test_acc = accuracy_fn(y_true=y_test, y_preds=test_pred)

    
    # print out what's happening every 100 epochs
    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test Loss: {test_loss:.3f}, Test Accuracy: {test_acc:.2f}% |")

In [None]:
import requests
from pathlib import Path

# Download helper functions from Learn pyTorch repo
if Path("helper_functions.py").is_file():
    print("helper_functions.py already exist, skipping download")
else:
    print("Downloading helper_functions.py")
    request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
    with open('helper_functions.py','wb') as f:
        f.write(request.content)

In [None]:
# import different funciton to plto the visualize the mdoel
from helper_functions import plot_predictions, plot_decision_boundary

In [None]:
# Plot decision boundaries for train and test set
plt.figure(figsize=(12, 6))
plt.subplot(121)
plt.title("Train")
plot_decision_boundary(model_1, X_train, y_train)

plt.subplot(122)
plt.title('Test')
plot_decision_boundary(model_1, X_test, y_test)

The model goes upon the underfiting, meaning it's not learning predictive patterns from the data.
- So, How could we imporve this? 

## 5. Improving a model (from a model perspective)
Let's see what happens if we add an extra layer to our model, fit for longer (`epochs=1000` instead of `epochs=100`) and increase the number of hidden units from `5` to `10`.

In [None]:
# Add the layer in the model increase the hidden units
model_v2 = nn.Sequential(
    nn.Linear(in_features=2, out_features=10),
    nn.Linear(in_features=10, out_features=10), # added a extra layer
    nn.Linear(in_features=10, out_features=1)
).to(device)
model_v2