In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import torchvision
from torchvision import datasets, transforms

import numpy as np
import pandas as pd

device = 'cuda' if torch.cuda.is_available() else 'cpu'

from Agent import Agent
agent = Agent(n_heads=16).to(device)

from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('tensorboard/loss')

In [6]:
class dataset(Dataset):
    def __init__(self):
        columns = ['center', 'left', 'right', 'steering', 'throttle', 'reverse', 'speed']
        self.metadata = pd.read_csv("./sim-track-data/driving_log.csv", names=columns)
        
        self.transform = torchvision.transforms.Resize((256,256))

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        obs_path = self.metadata.iloc[idx]["center"]
        steer = self.metadata.iloc[idx]["steering"]
        throttle = self.metadata.iloc[idx]["throttle"]
        reverse = self.metadata.iloc[idx]["reverse"]
        
        obs = torchvision.io.read_image("./sim-track-data/IMG/"+os.path.basename(obs_path)).float()
        obs = self.transform(obs).to(device)
        return obs, [steer, throttle, reverse]
    

my_dataset = dataset()
batch_size = 32
dataloader = DataLoader(my_dataset, batch_size=batch_size, shuffle=True)


In [9]:
lr = 1e-2

brain_optimizer = torch.optim.Adam(agent.get_brain_parameters(), lr=lr*1e-2, 
                    betas=(0.92, 0.999))
loss_fn = nn.MSELoss()


def train_loop(epoch):
    size = len(dataloader)
    for batch, (observations, label) in enumerate(dataloader):
        
        label = torch.cat([x.float() for x in label])
        
        agent.attention_model.prev_Q = torch.zeros(16, 256, 16, 16).to(device) 

        task_memory_optimizer = torch.optim.Adam(agent.get_task_memory_parameters(task), lr=lr, 
                            betas=(0.92, 0.999))

        loss = 0
        for i in range(batch_size):
            pred_action_dist = agent(observations[i], task)
            loss = loss_fn(pred_action_dist, label[i])
        

        writer.add_scalar('action loss',
            loss / 6000,
            batch + size*epoch)

        brain_optimizer.zero_grad()
        task_memory_optimizer.zero_grad()

        loss.backward()

        brain_optimizer.step()
        task_memory_optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch
            print(f"loss: {loss:>7f}  [Epoch: {epoch}; {current:>5d}/{size:>5d}]")


In [10]:
epochs = 300
task = "driving"
for epoch in range(epochs):
    train_loop(epoch)
    agent.save_parameters()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
  return F.mse_loss(input, target, reduction=self.reduction)


loss: 71572.656250  [Epoch: 0;     0/  520]


KeyboardInterrupt: 