In [1]:
from learningAgent import LearningAlgorithm
from environment import Model, AdversaryModes
from Qtable import QTable
from test import Test
import numpy as np

In [2]:
np.random.seed(10)

In [3]:
total_demand = 400
agent_cost = 57
adversary_cost = 71 
costs = [agent_cost,adversary_cost]
total_stages = 25
adversary_probabilities=[0]*len(AdversaryModes)
adversary_probabilities[0]=1 
game = Model(total_demand, costs, total_stages, adversary_probabilities)

In [4]:
number_actions = 50
number_demands = total_demand + 1

In [5]:
# Learning rate is given as [numerator,denominator] which gives us a learning rate function of 
# numerator/(n+demoninator)
Qtable = QTable(number_demands, number_actions, total_stages , learning_rate = [1000000,1000000])

In [6]:
number_episodes = 600_000
discount_factor = 1

In [7]:
algorithm = LearningAlgorithm(game, Qtable, number_episodes, discount_factor)

In [8]:
algorithm.solver()

0
200000
400000


In [9]:
Qtable.Q_table[200, :, 0, 0]

array([177352.63, 177449.74, 177542.09, 177417.78, 177510.47, 177497.56,
       177585.47, 177449.01, 177543.74, 177500.  , 177579.92, 177449.59,
       177511.67, 177475.29, 177546.65, 177405.91, 177462.66, 177446.03,
       177502.69, 177367.31, 177427.31, 177377.89, 177435.57, 177266.21,
       177330.67, 177255.09, 177313.57, 177160.47, 177207.57, 177114.32,
       177154.37, 176987.94, 177021.28, 176933.45, 176969.03, 176825.85,
       176829.1 , 176758.99, 176783.01, 176621.93, 176643.03, 176522.33,
       176538.27, 176374.67, 176387.39, 176264.46, 176272.46, 176067.85,
       176100.73, 175973.66])

In [10]:
np.count_nonzero(Qtable.Q_table) / (number_actions * number_demands * number_actions* total_stages)

0.04459942144638404

In [11]:
adversary_probabilities=[0]*len(AdversaryModes)
adversary_probabilities[0]=1 # We can test the Q-Table against any strategy. 

In [12]:
result = Test(game, Qtable, discount_factor, adversary_probabilities)

In [13]:
import numpy as np
import matplotlib.pyplot as plt

In [14]:
# Returns the optimal payoff and actions according to the Qtable
payoff, adversary_payoff, actions, adversary_actions, demand_potential = result.total_payoff()
print(payoff)
print(adversary_payoff)
print(actions)
print(adversary_actions)
print(demand_potential)

178382.0
48623.0
[ 85  99 113 108 114 116 108 109 117 111 113 113 113 115 111 113 111 110
 116 109 116 113 117 128 146]
[135 123 117 116 114 114 114 113 112 113 113 113 113 113 113 113 113 112
 112 113 112 113 113 114 117]
[200. 225. 237. 239. 243. 243. 242. 245. 247. 244. 245. 245. 245. 245.
 244. 245. 245. 246. 247. 245. 247. 245. 245. 243. 236.]


In [15]:
# The percentage error of the Qtable. This has to be measured against the same opponent that 
# it was trained against, as this is a measure of how 'complete' the training is.
result.error(10000)

0.45939050716032276