In [1]:
from learningAgent import LearningAlgorithm
from environment import Model, AdversaryModes
from Qtable import QTable
from test import Test
import numpy as np

In [2]:
np.random.seed(10)

In [3]:
total_demand = 400
agent_cost = 57
adversary_cost = 71 
costs = [agent_cost,adversary_cost]
total_stages = 25
initial_state = [total_demand/2, total_demand/2]
adversary_probabilities=[0]*len(AdversaryModes)
adversary_probabilities[10]=1 
game = Model(total_demand, costs, total_stages, adversary_probabilities)

In [4]:
number_actions = 50
number_demands = total_demand + 1

In [5]:
# Learning rate is given as [numerator,denominator] which gives us a learning rate function of 
# numerator/(n+demoninator)
Qtable = QTable(number_demands, number_actions, total_stages , learning_rate = [1000000,1000000])

In [6]:
number_episodes = 1_000_000
discount_factor = 1

In [7]:
algorithm = LearningAlgorithm(game, Qtable, number_episodes, discount_factor)

In [8]:
algorithm.solver()

0
250000
500000
750000
124696
124696
124696


In [9]:
Qtable.Q_table[200, :, 0]

array([113911.83, 114009.83, 114142.18, 114228.91, 114705.89, 114795.89,
       114784.28, 114875.64, 115470.88, 115555.6 , 115793.71, 115871.71,
       116099.54, 116173.23, 116515.16, 116585.15, 116680.51, 116746.51,
       117124.79, 117186.79, 117251.72, 117251.93, 117368.36, 117438.03,
       117952.62, 118002.65, 117871.09, 117917.09, 117905.87, 117947.87,
       118067.29, 118086.84, 118379.51, 118413.51, 118214.4 , 118243.77,
       118520.11, 118546.11, 118767.77, 118790.73, 118630.41, 118648.54,
       118650.85, 118664.85, 118631.47, 118641.47, 118550.13, 118556.13,
       118541.42, 118543.41])

In [17]:
np.count_nonzero(Qtable.Q_table) / (number_actions * number_demands * total_stages)

0.22556608478802992

In [11]:
adversary_probabilities=[0]*len(AdversaryModes)
adversary_probabilities[10]=1 # We can test the Q-Table against any strategy. 

In [12]:
result = Test(game, Qtable, discount_factor, adversary_probabilities)

In [13]:
import numpy as np
import matplotlib.pyplot as plt

In [14]:
# Returns the optimal payoff and actions according to the Qtable
payoff, adversary_payoff, actions, adversary_actions, demand_potential = result.total_payoff()
print(payoff)
print(adversary_payoff)
print(actions)
print(adversary_actions)
print(demand_potential)

99478.0
62067.0
[118 132 107  99 110  97  91  94  98  86  91  77 104  80  90  99  83  95
  85  93  84  95  94 112 122]
[132 132 132  83  91  91  91  91  91  91  91  91  91  91  91  91  91  91
  91  91  91  91  91  91 142]
[200. 207. 207. 219. 211. 201. 198. 198. 196. 192. 194. 194. 201. 194.
 199. 199. 195. 199. 197. 200. 199. 202. 200. 198. 187.]


In [15]:
# The percentage error of the Qtable. This has to be measured against the same opponent that 
# it was trained against, as this is a measure of how 'complete' the training is.
result.error(10000)

Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error
Div by zero error


-0.13327514527667506

In [16]:
# The following calculates the payoff that the Q-Table gives against the different opponenets. 
# It may reach a state in which the Q-Table was not trained. This will cause an error saying 
# either 'max demand reached' or 'min demand reached'.
# for i in range(len(AdversaryModes)):
#     print(AdversaryModes(i))
#     adversaryProbs=[0]*len(AdversaryModes)
#     adversaryProbs[i]=1
#     result = Test(game, Qtable, discountFactor, adversaryProbs)
#     payoff, advPayoff, actions, advActions, demandPotential = result.totalPayoff()
#     print(payoff)
