In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt

from PolicyNetwork import PolicyNetwork
from ValueNetwork import ValueNetwork

In [None]:
# Train using gpu
device = torch.device('cuda' if torch.cuda_is_available() else 'cpu')

accel_policy = PolicyNetwork()
turn_policy = PolicyNetwork()
value_function = ValueNetwork()

accel_policy = torch.load('acceleration_network.pth')
turn_policy = torch.load('turn_network.pth')
value_function = torch.load('value_network.pth')


In [None]:

# Load and read trajectory csv file
trajectories_file = 'trajectories.csv'
trajectories = pd.read_csv(trajectories_file)

# Store trajectory values into state, action, reward vectors
states = trajectories.iloc[:, :-3].values
actions = trajectories.iloc[:, -3:-1].values
rewards = trajectories.iloc[:, -1:].values

# Calculate reward-to-go
reward_to_go = np.array([[reward[0]] for reward in rewards])
for i in range(len(reward_to_go)-2,-1,-1): reward_to_go[i][0] += reward_to_go[i][0]

#Store in dataset
dataset = TensorDataset(states, reward_to_go)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

Train value function

In [None]:
loss_function = nn.MSELoss()
optimizer = optim.Adam(value_function.parameters(), lr=0.001, weight_decay=0.001)

losses = []
epochs = 1000
for epoch in range(epochs):
    for inputs_batch, outputs_batch in dataloader:
        # Move batches to GPU
        inputs_batch, outputs_batch = inputs_batch.to(device), outputs_batch.to(device)
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        predictions = value_function(inputs_batch)

        # Compute the loss
        loss = loss_function(predictions, outputs_batch)
        
        # Backward pass
        loss.backward()
        
        # Update weights
        optimizer.step()

    losses.append(loss.item())
    
plt.plot(losses)
plt.show()