In [1]:
import torch 
import numpy as np

In [2]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torch.nn as nn

In [8]:
device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
print("Device: ", device)

Device:  cuda:0


1. ETL 
   1. Specifying some some random input
   2. PyTorch Dataset and DataLoader
2. EDA - we gonna just skip because we are lazy...
3. Feature Engineering / Cleaning - which we don't need to....
4. Modeling 
   1. `nn.Linear` (luckily, you already understand this!  Yay!)
   2. Define loss function (mse for regression, cross entrophy for classification)
   3. Define the optimizer function (gradient descent ; adam)
   4. Train the model
5. Inference / Testing

In [9]:
#X(temp, rainfall, hum)

X_train = np.array([[73, 67, 43], [91, 88, 64], [87, 134, 58], 
                   [102, 43, 37], [69, 96, 70], [73, 67, 43], 
                   [91, 88, 64], [87, 134, 58], [102, 43, 37], 
                   [69, 96, 70], [73, 67, 43], [91, 88, 64], 
                   [87, 134, 58], [102, 43, 37], [69, 96, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
Y_train = np.array([[56, 70], [81, 101], [119, 133], 
                    [22, 37], [103, 119], [56, 70], 
                    [81, 101], [119, 133], [22, 37], 
                    [103, 119], [56, 70], [81, 101], 
                    [119, 133], [22, 37], [103, 119]], 
                   dtype='float32')

In [11]:
inputs = torch.tensor(X_train)
targets = torch.tensor(Y_train)
ds = TensorDataset(inputs, targets)

In [12]:
batch_size = 3 # Can be any number
# Too small - slow
# Too large - run out of memory
dl = DataLoader(ds, batch_size, shuffle=True)

In [3]:
#class is the perfect and the best practice for creating a neural network of any type...

#format:
'''
class AnyNameCapitalized(nn.Module): #it must inherit nn.Module
    def __init__():
        super().__init__()  #super is basically inheriting nn.Module init
        #we define all the layers here.....
        
    def forward(self, x):   #YOU CANNOT CHANGE THIS NAME, it MUST BE "forward"
        x = layer1()
        x = layer2()
        return x
'''

class NeuralNetwork(nn.Module):
    
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [4]:
model = NeuralNetwork(  3  ,  10   ,  2 )

In [5]:
sum(p.numel() for p in model.parameters() if p.requires_grad)

62

In [6]:
#so how do we use our model
model

NeuralNetwork(
  (fc1): Linear(in_features=3, out_features=10, bias=True)
  (fc2): Linear(in_features=10, out_features=2, bias=True)
  (relu): ReLU()
)

In [7]:
#under the nn module, there are many loss function
J_fn = nn.MSELoss()

#later on, you will know how to use this.....

In [13]:
# Normally in skleran we call fit, it will perform gradient descent
# In code from scratch we need to like specify how we want to update the gradients
# Optimizer handles how we update the parameters
# If we use w = w -alpha (gradient) ==> gradient descent
#Stochastic gradient descent ==>  is not one sample - mini-batch

optim = torch.optim.SGD(model.parameters(), lr=0.0001)

In [20]:
num_epochs = 10 
for epoch in range(num_epochs):
    for x, y in dl:  
        x.to(device)  #device is either cpu or cuda
        y.to(device)

        yhat = model(x)
        loss = J_fn(yhat, y)
        optim.zero_grad()
        loss.backward()  
        optim.step()  
        
        print(f"Epoch: {epoch} - Loss: {loss}")

Epoch: 0 - Loss: 8127.064453125
Epoch: 0 - Loss: 12062.1494140625
Epoch: 0 - Loss: 11792.26171875
Epoch: 0 - Loss: 1892.9664306640625
Epoch: 0 - Loss: 7108.32666015625
Epoch: 1 - Loss: 9291.6806640625
Epoch: 1 - Loss: 9601.4580078125
Epoch: 1 - Loss: 9599.8974609375
Epoch: 1 - Loss: 4340.59326171875
Epoch: 1 - Loss: 8113.89599609375
Epoch: 2 - Loss: 9595.7099609375
Epoch: 2 - Loss: 5661.01953125
Epoch: 2 - Loss: 9281.0234375
Epoch: 2 - Loss: 10717.3466796875
Epoch: 2 - Loss: 5657.48388671875
Epoch: 3 - Loss: 10713.8798828125
Epoch: 3 - Loss: 7836.72412109375
Epoch: 3 - Loss: 12032.72265625
Epoch: 3 - Loss: 5652.61376953125
Epoch: 3 - Loss: 4641.80810546875
Epoch: 4 - Loss: 4641.1748046875
Epoch: 4 - Loss: 6774.42431640625
Epoch: 4 - Loss: 10854.0849609375
Epoch: 4 - Loss: 6818.201171875
Epoch: 4 - Loss: 11754.9345703125
Epoch: 5 - Loss: 8403.265625
Epoch: 5 - Loss: 2886.538330078125
Epoch: 5 - Loss: 7079.423828125
Epoch: 5 - Loss: 9256.7431640625
Epoch: 5 - Loss: 13182.015625
Epoch: 6 

In [21]:
ds[0:2]

(tensor([[73., 67., 43.],
         [91., 88., 64.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.]]))

In [22]:
#please create two numpy array of 
# [74, 68, 42], [92, 88, 65]
x = np.array([[74, 68, 42], [92, 88, 65]], dtype='float32')
#float  means 32 bits
#double means 64 bits

#please make it a tensor
x_tensor = torch.tensor(x)

#then use our model to predict the number of oranges and apples
yhat = model(x_tensor)
yhat

#print the loss comparing with ds[0] and ds[1] - look at the y part ok...
ytest = ds[0:2][1]
loss = J_fn(yhat, ytest)
print(loss)

tensor(6018.6230, grad_fn=<MseLossBackward0>)
