In [6]:
import torch
import torch.nn as nn

import numpy as np
import matplotlib.pyplot as plt

In [21]:
class Actions:
    def __init__(self, actions):
        self.all_actions = actions
        self.history = [np.random.randint(len(actions))]
    
    def next(self, action):
        self.history.append(action)

    def last(self):
        return self.history[-1]

class Costs:
    def __init__(self):
        self.history = []
        self.accumulated = 0

    def receive(self, cost):
        self.accumulated += cost
        self.history.append(cost)

    def last(self):
        return self.history[-1]

class Brain:
    def __init__(self, input_size=2, hidden_size=4, num_layers=2):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)

        self.optimizer = torch.optim.Adam(self.rnn.parameters(), lr=0.01)

    def train(self, cost):
        self.optimizer.zero_grad()
        cost.backward()
        self.optimizer.step()

brain = Brain()

class Player:
    def __init__(self, actions):
        self.actions = Actions(actions)
        self.costs = Costs()

        self.brain = brain
        self.h = torch.randn(brain.num_layers, 1, brain.hidden_size)

    def take_action(self, state):
        self.out, self.h = self.brain.rnn(torch.as_tensor(state, dtype=torch.float32).view(1, 1, brain.input_size), self.h)
        action = self.out.argmax().item()
        self.actions.next(action)
        return action

    def receive_cost(self, cost):
        self.costs.receive(cost)

    def take_action_train(self, state):
        self.out, self.h = self.brain.rnn(torch.as_tensor(state, dtype=torch.float32).view(1, 1, brain.input_size), self.h)
        self.train_action = torch.softmax(self.out, dim=2)
        return self.train_action

In [66]:
class CongestionGame:
    def __init__(self, n_players=60, cost_setup=(15, 2, 0.2, 22.96)):
        self.cost_f, self.cost_a, self.cost_a_mult, self.cost_best = cost_setup
        self.actions = ['fa', 'fbf', 'af', 'aba']

        self.init_players(n_players)


    def init_players(self, n_players):
        self.players = [Player(self.actions) for _ in range(n_players)]
        first_actions = [player.actions.last() for player in self.players]

        self.n_a1 = first_actions.count(self.actions.index('af')) + first_actions.count(self.actions.index('aba'))
        self.n_a2 = first_actions.count(self.actions.index('fa')) + first_actions.count(self.actions.index('aba')) 

    def train_players(self, n_games=1):
        for _ in range(n_games):
            actions_sum = torch.zeros(1, 1, len(self.actions))
            for player in self.players:
                actions_sum += player.take_action_train((self.n_a1, self.n_a2))
            actions_sum = actions_sum.squeeze()
            self.n_a1 = actions_sum[2] + actions_sum[3]
            self.n_a2 = actions_sum[0] + actions_sum[3]

            self.total_cost = torch.tensor(0.0, requires_grad=True)
            self.total_cost = \
                actions_sum[0] * (self.cost_f + self.cost_a + self.cost_a_mult * self.n_a2) +\
                actions_sum[1] * (2 * self.cost_f ) +\
                actions_sum[2] * (self.cost_f + self.cost_a + self.cost_a_mult * self.n_a1) +\
                actions_sum[3] * (2 * self.cost_a + self.cost_a_mult * (self.n_a1 + self.n_a2))

            brain.train(self.total_cost)

In [67]:
game = CongestionGame()
game.players[0].actions.last()

1

In [58]:
game.n_a2

tensor(28.9331, grad_fn=<AddBackward0>)

In [69]:
game.total_cost

tensor(1382.1665, grad_fn=<AddBackward0>)

In [70]:
game.train_players(100)

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [10]:
class Brain:
    def __init__(self, input_size=2, hidden_size=4, num_layers=2):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)

        self.optimizer = torch.optim.Adam(self.rnn.parameters(), lr=0.01)

    def train(self, action, cost):
        self.optimizer.zero_grad()
        self.loss_fn(action, cost).backward()

brain = Brain()

In [17]:
x = torch.randn(1, 1, brain.input_size)
h = torch.randn(brain.num_layers, 1, brain.hidden_size)
out, h = brain.rnn(x, h)
action = torch.softmax(out, dim=2)
action.squeeze()[0]

tensor(0.0837, grad_fn=<SelectBackward0>)