# Entrenamiento de los datos para seguimiento de carretera - **Anthony**
---
Para realizar el entrenamiento de la red neuronal para el seguimiento, se tomará el set de datos precapturado en el notebook data_collection. Para esto se utilizará pythorch y una red ya pre entrenada de resnet-18.

# Importaciones necesarias

In [1]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
import glob
import PIL.Image
import os
import numpy as np

# Instancias del dataset

---

En esta parte del código conseguimos los valores de "x" y "y" del label del nombre de la imagen. Además aplicamos algunas transformaciones de las imágenes como color jitter, hacemos horizontal flips en caso de querer seguir un camino que no sea recto, entonces toma algunas imágenes un poco torcidas.

In [2]:
def get_x(path):
    """Gets the x value from the image filename"""
    return (float(int(path[3:6])) - 50.0) / 50.0


def get_y(path):
    """Gets the y value from the image filename"""
    return (float(int(path[7:10])) - 50.0) / 50.0

class XYDataset(torch.utils.data.Dataset):
    
    def __init__(self, directory, random_hflips=False):
        self.directory = directory
        self.random_hflips = random_hflips
        self.image_paths = glob.glob(os.path.join(self.directory, '*.jpg'))
        self.color_jitter = transforms.ColorJitter(0.3, 0.3, 0.3, 0.3)
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        
        image = PIL.Image.open(image_path)
        x = float(get_x(os.path.basename(image_path)))
        y = float(get_y(os.path.basename(image_path)))
        
        if float(np.random.rand(1)) > 0.5:
            image = transforms.functional.hflip(image)
            x = -x
        
        image = self.color_jitter(image)
        image = transforms.functional.resize(image, (224, 224))
        image = transforms.functional.to_tensor(image)
        image = image.numpy()[::-1].copy()
        image = torch.from_numpy(image)
        image = transforms.functional.normalize(image, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        
        return image, torch.tensor([x, y]).float()
    
dataset = XYDataset('dataset', random_hflips=False)

# Dividir el dataset

---

Se divide el dataset en train y test. En este caso 90% destinado para entrenamiento y 10% para prueba.

In [3]:
print(len(dataset))
print(dataset.__len__())

711
711


In [4]:
test_percent = 0.1
num_test = int(test_percent * len(dataset))
print(num_test)
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [len(dataset) - num_test, num_test])

71


In [5]:
print(len(train_dataset))
print(len(test_dataset))

640
71


# Creación de los data loaders y batch size

---

Usamos la clase ``DataLoader`` para cargar los datos en los batches, además de que permitimos hacer shuffle para que las imágenes se tomen aleatoriamente.

In [6]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=4
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=4
)

# Definir la red neuronal

---

Utilizando ResNet-18, se crea el modelo ya pre-entrado.

In [7]:
model = models.resnet18(pretrained=True)

In [8]:
model.fc = torch.nn.Linear(512, 2)
device = torch.device('cuda')
model = model.to(device)

# Entrenamiento

---

Se entrena por 150 épocas, que fue el número de épocas que mostró mejor resultado.

In [9]:
NUM_EPOCHS = 150
BEST_MODEL_PATH = 'best_steering_model_xy.pth'
best_loss = 1e9

optimizer = optim.Adam(model.parameters())

for epoch in range(NUM_EPOCHS):
    model.train()
    train_loss = 0.0
    for images, labels in iter(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = F.mse_loss(outputs, labels)
        train_loss += float(loss)
        loss.backward()
        optimizer.step()
    train_loss /= len(train_loader)
    
    model.eval()
    test_loss = 0.0
    for images, labels in iter(test_loader):
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = F.mse_loss(outputs, labels)
        test_loss += float(loss)
    test_loss /= len(test_loader)
    
    print('Época: %f - ( TrainAccuracy: %f, TrainLoss: %f ) - ( TestAccuracy: %f, TestLoss: %f )' % (epoch, 1.0-train_loss, train_loss, 1.0-test_loss,test_loss))
    if test_loss < best_loss:
        torch.save(model.state_dict(), BEST_MODEL_PATH)
        best_loss = test_loss

Época: 0.000000 - ( TrainAccuracy: 0.684638, TrainLoss: 0.315362 ) - ( TestAccuracy: 0.858006, TestLoss: 0.141994 )
Época: 1.000000 - ( TrainAccuracy: 0.961070, TrainLoss: 0.038930 ) - ( TestAccuracy: 0.983156, TestLoss: 0.016844 )
Época: 2.000000 - ( TrainAccuracy: 0.968444, TrainLoss: 0.031556 ) - ( TestAccuracy: 0.977663, TestLoss: 0.022337 )
Época: 3.000000 - ( TrainAccuracy: 0.968376, TrainLoss: 0.031624 ) - ( TestAccuracy: 0.956763, TestLoss: 0.043237 )
Época: 4.000000 - ( TrainAccuracy: 0.977047, TrainLoss: 0.022953 ) - ( TestAccuracy: 0.986648, TestLoss: 0.013352 )
Época: 5.000000 - ( TrainAccuracy: 0.975510, TrainLoss: 0.024490 ) - ( TestAccuracy: 0.986962, TestLoss: 0.013038 )
Época: 6.000000 - ( TrainAccuracy: 0.983246, TrainLoss: 0.016754 ) - ( TestAccuracy: 0.985147, TestLoss: 0.014853 )
Época: 7.000000 - ( TrainAccuracy: 0.976358, TrainLoss: 0.023642 ) - ( TestAccuracy: 0.984133, TestLoss: 0.015867 )
Época: 8.000000 - ( TrainAccuracy: 0.986330, TrainLoss: 0.013670 ) - ( T

In [12]:
print(1-best_loss)

0.9953351861797273
