In [44]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss
import torch.optim as optim

## Discovering Activation Function

### The sigmoid and softmax functions

In [2]:
# Create a sigmoid function and apply it on input_tensor to generate a probability for a binary classification task.
input_tensor = torch.tensor([[2.4]])

# Create a sigmoid function and apply it on input_tensor
sigmoid = nn.Sigmoid()
probability = sigmoid(input_tensor)
print(probability)

tensor([[0.9168]])


In [3]:
# Create a softmax function and apply it on input_tensor to generate a probability for a multi-class classification task.
input_tensor = torch.tensor([[1.0, -6.0, 2.5, -0.3, 1.2, 0.8]])

# Create a softmax function and apply it on input_tensor
softmax = nn.Softmax(dim=-1)
probabilities = softmax(input_tensor)
print(probabilities)

tensor([[1.2828e-01, 1.1698e-04, 5.7492e-01, 3.4961e-02, 1.5669e-01, 1.0503e-01]])


## Running a Forward Pass

### Binary Classifier

In [5]:
# Create a neural network that takes a 1x8 tensor as input and outputs a single value for binary classification.
input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9]])
# Implement a small neural network for binary classification
model = nn.Sequential(
  nn.Linear(8, 1),
  nn.Sigmoid()
)

output = model(input_tensor)
print(output)

tensor([[0.0506]], grad_fn=<SigmoidBackward0>)


### From regression to multi-class classification

In [8]:
# Create a 4-layer linear network that takes 11 input features from input_tensor and produces a single regression output.
input_tensor = torch.Tensor([[3, 4, 6, 7, 10, 12, 2, 3, 6, 8, 9]])

# Implement a neural network with exactly four linear layers
model = nn.Sequential(
  nn.Linear(11, 20),
  nn.Linear(20, 12),
  nn.Linear(12, 6),
  nn.Linear(6, 1)
)

output = model(input_tensor)
print(output)

# Update the network provided to perform a multi-class classification with four outputs.
model1 = nn.Sequential(
  nn.Linear(11, 20),
  nn.Linear(20, 12),
  nn.Linear(12, 6),
  nn.Linear(6, 4),
  nn.Softmax(dim=1)
)

output = model1(input_tensor)
print(output)

tensor([[-1.1306]], grad_fn=<AddmmBackward0>)
tensor([[0.0292, 0.1107, 0.4095, 0.4506]], grad_fn=<SoftmaxBackward0>)


## Loss Function

### Creating one-hot encoded labels

In [11]:
# Manually one-hot encode the ground truth label y using the provided NumPy array and save it as one_hot_numpy
y = 1
num_classes = 3

# Create the one-hot encoded vector using NumPy
one_hot_numpy = np.array([0, 1, 0])

# Create the one-hot encoded vector using PyTorch
one_hot_pytorch = F.one_hot(torch.tensor(y), num_classes=num_classes)

print("One-hot vector using NumPy:", one_hot_numpy)
print("One-hot vector using PyTorch:", one_hot_pytorch)

One-hot vector using NumPy: [0 1 0]
One-hot vector using PyTorch: tensor([0, 1, 0])


### Calculating cross entropy loss

In [15]:
# Create the one-hot encoded vector of the ground truth label y, with 4 features (one for each class), and assign it to one_hot_label.
y = [2]
scores = torch.tensor([[0.1, 6.0, -2.0, 3.2]])

# Create a one-hot encoded vector of the label y
one_hot_label = F.one_hot(torch.tensor(y), num_classes=4)

# Create the cross entropy loss function and store it as criterion
criterion = CrossEntropyLoss()

# Calculate the cross entropy loss using the one_hot_label vector and the scores vector, by calling the loss_function you created.
loss = criterion(scores.double(), one_hot_label.double())
print(loss)

tensor(8.0619, dtype=torch.float64)


## Using Derivatives

### Accessing the model parameters

In [20]:
model = nn.Sequential(nn.Linear(16, 8),
                      nn.Linear(8, 2)
                     )
# Access the weight parameter of the first linear layer.
weight_0 = model[0].weight
print("Weight of the first layer:", weight_0)

# Access the bias parameter of the second linear layer.
bias_1 = model[1].bias
print("Bias of the second layer:", bias_1)

Weight of the first layer: Parameter containing:
tensor([[ 0.2454,  0.1513,  0.0589, -0.1880, -0.1489, -0.1974,  0.0612, -0.2299,
          0.1579,  0.1290, -0.2158,  0.1076, -0.2208, -0.0758,  0.0826,  0.1549],
        [-0.1609, -0.1271,  0.1201,  0.0216, -0.1223,  0.2067, -0.0217,  0.0979,
          0.0987,  0.0437, -0.1453,  0.2415, -0.0360,  0.1768, -0.1418, -0.1298],
        [-0.1864, -0.0895, -0.1360, -0.1801, -0.1693, -0.0127, -0.0830,  0.0159,
          0.0544,  0.0818,  0.0633,  0.1599,  0.1866,  0.1243, -0.2415,  0.1278],
        [-0.2293, -0.1717,  0.0998, -0.0601, -0.2053, -0.0783,  0.2459, -0.1927,
         -0.1767,  0.0207, -0.0043, -0.0339,  0.0746,  0.1216, -0.1386, -0.2237],
        [ 0.1422, -0.1353, -0.2238, -0.0410, -0.0755, -0.1696, -0.0475,  0.0203,
         -0.1766, -0.0026, -0.1624,  0.0088, -0.2401,  0.0663, -0.0911, -0.0575],
        [ 0.1247,  0.0772,  0.0193, -0.1908,  0.0413,  0.0069,  0.1467, -0.2308,
         -0.1282,  0.2292,  0.0425,  0.2248, -0.0205, -

### Updating the weights manually

In [60]:
# Learning rate is typically smalllr = 0.001
lr = 0.001
y = [1]

input_tensor = torch.Tensor([[3, 4, 6, 2, 3, 6, 8, 9,-1,4,8,-2,0,-10,13,7]])

# Create a one-hot encoded vector of the label y
target = F.one_hot(torch.tensor(y), num_classes=2)


# Implement a neural network with exactly four linear layers
model = nn.Sequential(
  nn.Linear(16, 8),
  nn.Linear(8, 4),
  nn.Linear(4, 2)
)

pred = model(input_tensor)

# Calculate the loss and gradients
#criterion = CrossEntropyLoss()
#loss = criterion(prediction.double(), one_hot_label.double())
#loss.backward()


# # Create the gradient variables by accessing the local gradients of each weight tensor.
# weight0 = model[0].weight
# weight1 = model[1].weight
# weight2 = model[2].weight
# 
# # Access the gradients of the weight of each linear layer
# grads0 = weight0.grad
# grads1 = weight1.grad
# grads2 = weight2.grad
# 
# # Update the weights using the learning rate and the gradients
# weight0 = weight0 - lr * grads0
# weight1 = weight1 - lr * grads1
# weight2 = weight2 - lr * grads2


### Using the PyTorch optimizer

In [61]:
# Use optim to create an SGD optimizer with a learning rate of your choice (must be less than one) for the model provided.

# Create the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001)

loss = criterion(pred.double(), target.double())
loss.backward()

# Update the model's parameters using the optimizer.
optimizer.step()

