## Calculadora -10 a 10

In [118]:
# import libraries
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,TensorDataset
from sklearn.model_selection import train_test_split
from rich import print, console

import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

In [119]:
# Crear un dataset de 1000 filas por 2 columnas. Cada columna contiene enteros aleatorios entre -10 y 10
data = np.random.randint(-10, 10, (100, 2))

# Crear un dataset de 1000 filas por 1 columna. Cada columna contiene la suma de los valores de las columnas de X
labels = np.sum(data, axis=1)

print(data.shape, labels.shape)

In [120]:
# Step 1: convert to tensor
dataT   = torch.tensor( data ).float()
labelsT = torch.tensor( labels ).long()

# Step 2: use scikitlearn to split the data
train_data, test_data, train_labels, test_labels = train_test_split(dataT, labelsT, test_size=.1)

print(f"train_data: {train_data.shape}, train_labels: {train_labels.shape}")
print(f"test_data: {test_data.shape}, test_labels: {test_labels.shape}")

# show the first rows of the data and the labels
print(test_data[:5])
print(test_labels[:5])

In [121]:
# Step 2: Normalize the data
train_dataN = (train_data - train_data.mean()) / train_data.std()
test_dataN  = (test_data - test_data.mean()) / test_data.std()

# train_dataN = train_data
# test_dataN  = test_data

print(train_dataN.shape, train_labels.shape)

# show the first rows of train_dataN and train_labels
print(np.hstack((train_dataN[:5], train_labels[:5].reshape(-1,1))))

In [122]:
# Step 3: convert into PyTorch Datasets
train_dataTds = TensorDataset(train_dataN, train_labels)
test_dataTds  = TensorDataset(test_dataN, test_labels)

# Step 4: translate into dataloader objects
batchsize    = 32
train_loader = DataLoader(train_dataTds, batch_size=batchsize, shuffle=True, drop_last=True)
test_loader  = DataLoader(test_dataTds, batch_size=test_dataTds.tensors[0].shape[0])

# show the dataloader object and the shape of the data (must be the same as the original data)
print("test loader batch size: ", train_loader.dataset.tensors[0].shape[0])
print(f'There are {len(train_loader)} batches, each with {batchsize} samples.')

# Create the DL model

## Funciones de pérdida en PyTorch
![Funciones de pérdida en PyTorch](../_USEFUL_TABLES/loss_functions_pytorch_nn.png)

## Optimizadores en PyTorch
![optimizadores en PyTorch](../_USEFUL_TABLES/optimizers_pytorch_optim.png)

In [123]:
# create a class for the model
def createTheModel():

  class Net(nn.Module):
    def __init__(self):
      super().__init__()

      ### input layer
      self.input = nn.Linear(2,4)
      
      ### hidden layer
      self.fc1 = nn.Linear(4,4)
      self.fc2 = nn.Linear(4,4)

      ### output layer
      self.output = nn.Linear(4,10)

    # forward pass
    def forward(self,x):
      x = F.relu( self.input(x) )
      x = F.relu( self.fc1(x) )
      x = F.relu( self.fc2(x) )
      return self.output(x)
  
  # create the model instance
  net = Net()
  
  # loss function
  lossfun = nn.CrossEntropyLoss()

  # optimizer
  optimizer = torch.optim.SGD(net.parameters(), lr=.01)  # Simple and efficient, especially for large-scale learning and convex problems. General problems (typically for large datasets)

  return net,lossfun,optimizer

In [124]:
net,lossfun,optimizer = createTheModel()
print(net, lossfun, optimizer, sep="\n----\n")

In [125]:
# Step 6: Verify the model with a sample (the shape of the output must be 1 column)
X = torch.tensor([[1.,2.],[3.,4.],[5.,6.],[7.,8.],[9.,10.]]).float()
yHat = net(X)
print(yHat)
print(X.shape, yHat.shape)

# Create a function that trains the model

In [126]:
# a function that trains the model

def trainTheModel():

  # number of epochs
  numepochs = 50
  
  # create a new model
  net, lossfun, optimizer = createTheModel()

  # initialize losses
  losses    = torch.zeros((numepochs, 2))
  trainAcc  = []
  testAcc   = []
  

  # loop over epochs
  for epochi in range(numepochs):

    # loop over training data batches
    batchAcc  = []
    batchLoss = []
    
    for X,y in train_loader:

      # forward pass and loss
      yHat = net(X)
    #   print(f'X: {X}')
    #   print(f'yHat: {yHat}')
    #   print(f'y: {y}')
      loss = lossfun(yHat, y)  # lossfun returns a Tensor with the loss

      # backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # loss from this batch
      batchLoss.append(loss.item())  # .item() returns the scalar value held in the loss tensor

      # compute accuracy
    #   print(f'y: {y}')
      matches = torch.argmax(yHat,axis=1) == y     # booleans (false/true)
      matchesNumeric = matches.float()             # convert to numbers (0/1)
      accuracyPct = 100*torch.mean(matchesNumeric) # average and x100
      batchAcc.append( accuracyPct )               # add to list of accuracies
    # end of batch loop...

    # now that we've trained through the batches, get their average training accuracy
    trainAcc.append( np.mean(batchAcc) )

    # and get average losses across the batches
    losses[epochi, 0] = np.mean(batchLoss)

    # test accuracy
    X,y = next(iter(test_loader)) # extract X,y from test dataloader
    with torch.no_grad(): # deactivates autograd
      yHat = net(X)
      
    # compare the following really long line of code to the training accuracy lines
    testAcc.append( 100*torch.mean((torch.argmax(yHat,axis=1)==y).float()) )
    loss = lossfun(yHat,y)
    losses[epochi, 1] = loss.item()

  # end epochs

  # function output
  return trainAcc,testAcc,losses,net


# Run the model and show the results!

In [127]:
trainAcc,testAcc,losses,net = trainTheModel()

print(f"trainAcc: {trainAcc}", f"testAcc: {testAcc}", f"losses: {losses}", sep="\n----\n")

yHat = net(X)
print(yHat)
print(X[:5], yHat[:5])


IndexError: Target 10 is out of bounds.

In [None]:
# Confirm ranges of train and test data

print('Training data range %g to %g' 
      %(torch.min(train_loader.dataset.tensors[0]),torch.max(train_loader.dataset.tensors[0])) )

print('Test data range %g to %g' 
      %(torch.min(test_loader.dataset.tensors[0]),torch.max(test_loader.dataset.tensors[0])) )