In [None]:
from learningAgent import LearningAlgorithm
from environment import Model, AdversaryModes
from Qtable import QTable
from test import Test
import numpy as np
import matplotlib.pyplot as plt

In [None]:
np.random.seed(10)

In [None]:
total_demand = 400
agent_cost = 57
adversary_cost = 71 
costs = [agent_cost,adversary_cost]
total_stages = 25
adversary_probabilities=[0]*len(AdversaryModes)
# Below is where we decide what adversaries we train against- see environment.py for the numbers
# Replace * and ** with the two number associated with the opponents at the bottom of environment.py
adversary_probabilities[*]= 0.5
adversary_probabilities[**] = 0.5
game = Model(total_demand, costs, total_stages, adversary_probabilities)

In [None]:
number_actions = 50
number_demands = total_demand + 1
discount_factor = 1
number_episodes = 100_000_000
constant = int(number_episodes/49)
print(constant)

In [None]:
# Learning rate is given as [numerator,denominator] which gives us a learning rate function of 
# numerator/(n+denominator)
Qtable = QTable(number_demands, number_actions, total_stages , learning_rate = [constant,constant])

In [None]:
algorithm = LearningAlgorithm(game, Qtable, number_episodes, discount_factor)

In [None]:
# Below is the first stage of learning- actions are chosen randomly.
number_episodes_per_round = 100_000
number_rounds = int(number_episodes / number_episodes_per_round)
errors = np.zeros(number_rounds)
final_round = 0
for round_ in range(number_rounds):
    algorithm.continue_learning(number_episodes_per_round,number_episodes_per_round * round_ +1)
    result = Test(game, Qtable, discount_factor, adversary_probabilities)
    errors[round_] = result.error(1000)
    if round_ % 50 == 0:
        print(round_, errors[round_])
    if round_ > 10 and np.max(errors[round_-10:round_]) < 0.01:
        print(round_)
        final_round = round_
        break
plt.plot(errors[0:final_round+1])

In [None]:
# Below is the second stage of learning- actions are chosen according to an epsilon greedy strategy.
episode_counter = (final_round + 1) * number_episodes_per_round
episodes_left = number_episodes - episode_counter
number_episodes_per_round = 500_000
number_rounds = int(episodes_left / number_episodes_per_round)
for round_ in range(number_rounds): 
    print('Round ', round_, ' of ', number_rounds)
    algorithm.epsilon_greedy_learning(number_episodes_per_round, episode_counter)
    result = Test(game, Qtable, discount_factor, adversary_probabilities)
    payoff, _, actions, _, _ = result.total_payoff()
    print('Current payoff: ', payoff)
    print('Current actions:', actions)
    episode_counter += number_episodes_per_round

In [None]:
# We now have the trained agent. We want to test it against each opponent individually, so that
# we can compare it against the agent that is just trained against this opponent. 

In [None]:
adversary_probabilities=[0]*len(AdversaryModes)
# Testing against one type of opponent by changing the * below to be the first number
adversary_probabilities[*]=1
result = Test(game, Qtable, discount_factor, adversary_probabilities)
payoff, adversary_payoff, actions, adversary_actions, demand_potential = result.total_payoff()
print(payoff)
print(adversary_payoff)
print(actions)
print(adversary_actions)
print(demand_potential)

In [None]:
adversary_probabilities=[0]*len(AdversaryModes)
# Testing against one type of opponent by changing the * below to be the first number
adversary_probabilities[**]=1
result = Test(game, Qtable, discount_factor, adversary_probabilities)
payoff, adversary_payoff, actions, adversary_actions, demand_potential = result.total_payoff()
print(payoff)
print(adversary_payoff)
print(actions)
print(adversary_actions)
print(demand_potential)