In [1]:
from learningAgent import LearningAlgorithm
from environment import Model, AdversaryModes
from Qtable import QTable
from test import Test
import numpy as np
import matplotlib.pyplot as plt

In [2]:
np.random.seed(10)

In [3]:
total_demand = 400
agent_cost = 57
adversary_cost = 71 
costs = [agent_cost,adversary_cost]
total_stages = 25
adversary_probabilities=[0]*len(AdversaryModes)
adversary_probabilities[10]=1 
game = Model(total_demand, costs, total_stages, adversary_probabilities)

In [4]:
number_actions = 50
number_demands = total_demand + 1
discount_factor = 1
number_episodes = 50_000_000
constant = int(number_episodes/49)
print(constant)

1020408


In [5]:
# Learning rate is given as [numerator,denominator] which gives us a learning rate function of 
# numerator/(n+demoninator)
Qtable = QTable(number_demands, number_actions, total_stages , learning_rate = [constant,constant])

In [6]:
algorithm = LearningAlgorithm(game, Qtable, 0, discount_factor)

In [7]:
algorithm.solver()

In [8]:
number_episodes_per_round = 10_000
number_rounds = int(number_episodes / number_episodes_per_round)
errors = np.zeros(number_rounds)
final_round = 0
for round_ in range(number_rounds):
    algorithm.continue_learning(number_episodes_per_round,number_episodes_per_round * round_ +1)
    result = Test(game, Qtable, discount_factor, adversary_probabilities)
    errors[round_] = result.error(1000)
    if round_ % 50 == 0:
        print(round_, errors[round_])
    if errors[round_] < 0.01 and errors[round_ - 1] < 0.01:
        print(round_)
        final_round = round_
        break
plt.plot(errors[0:final_round+1])

0 0.795472038355887
50 0.25050192282848305
100 0.1314533270309719
150 0.08882459163313133
200 0.06712096299079127
250 0.05085760304495093
300 0.04097417041593418
350 0.03637579194950349
400 0.030898606499155667
450 0.028683529377610595
500 0.027463582940689926
550 0.02634285283995803
600 0.021433646034025044
650 0.01992216408164488
700 0.020794801476325335
750 0.017869043215106652
800 0.01727571257992942
850 0.01644042859699101
900 0.015756602501259643
950 0.014035095672107754
1000 0.013823627186053261
1050 0.01492686627069453
1100 0.01430506965806551
1150 0.012976279575543096
1200 0.011005894323642228
1250 0.014643302949923536
1300 0.01205352789222388


KeyboardInterrupt: 

In [9]:
payoff, adversary_payoff, actions, adversary_actions, demand_potential = result.total_payoff()
print(payoff)
print(adversary_payoff)
print(actions)
print(adversary_actions)
print(demand_potential)

100730.0
70906.0
[118 131 115 104 113 111  95 100  86  93 100  90  99  90  91  97  87  94
 101  83  93  91 100 106 121]
[132 132 132  99  94  94  94  94  94  94  94  94  94  94  94  94  94  94
  94  94  94  94  94  94 142]
[200. 207. 207. 215. 212. 202. 193. 192. 189. 193. 193. 190. 192. 189.
 191. 192. 190. 193. 193. 189. 194. 194. 195. 192. 186.]


In [11]:
episodes_left = number_episodes - ((final_round + 1) * number_episodes_per_round)
number_episodes_per_round = 500_000
number_rounds = int(episodes_left / number_episodes_per_round)
for round_ in range(number_rounds): 
    algorithm.epsilon_greedy_learning(number_episodes_per_round)
    result = Test(game, Qtable, discount_factor, adversary_probabilities)
    payoff, _, actions, _, _ = result.total_payoff()
    print('Round ', round_, ' of ', number_rounds)
    print('Current payoff: ', payoff)
    print('Current actions:', actions)

Round  0  of  73
Best payoff:  119120
Best actions:  [118, 132, 115, 113, 132, 92, 119, 119, 115, 105, 110, 105, 120, 116, 95, 129, 109, 90, 109, 124, 98, 130, 81, 124, 117]
Current payoff:  115534.0
Current actions: [118 121 113 109 133  90 108 115 105  98  98 109 120 102  89 103 118  99
 100  92 108  98 113 115 118]
Round  1  of  73
Best payoff:  119416
Best actions:  [122, 127, 128, 127, 115, 132, 113, 97, 110, 115, 111, 99, 113, 92, 105, 115, 101, 115, 89, 113, 110, 100, 99, 116, 127]
Current payoff:  56584.0
Current actions: [122 127 128 127  94 103  99  72  72  69  72  72  62  67  65  63  65  65
  62  60  89  75  87  87  89]
Round  2  of  73
Best payoff:  121707
Best actions:  [122, 127, 128, 127, 126, 124, 116, 133, 104, 123, 118, 92, 108, 119, 96, 116, 95, 128, 111, 103, 106, 108, 88, 102, 128]
Current payoff:  68001.0
Current actions: [122 127 128 127 126 124  94  99 114  85  68  68  68  64  67  70  67  68
  67  61  74  79  76  92  96]
Round  3  of  73
Best payoff:  123015
Bes

Best payoff:  133453
Best actions:  [118, 132, 128, 125, 122, 122, 128, 113, 117, 123, 119, 127, 114, 122, 116, 124, 117, 120, 123, 116, 126, 112, 111, 113, 131]
Current payoff:  133434.0
Current actions: [118 132 128 125 122 122 128 113 117 123 119 127 114 122 116 124 117 120
 123 116 126 112 110 114 131]
Round  27  of  73
Best payoff:  133441
Best actions:  [118, 132, 128, 125, 122, 122, 128, 113, 117, 123, 119, 128, 114, 122, 116, 124, 117, 120, 123, 116, 126, 112, 110, 114, 131]
Current payoff:  133434.0
Current actions: [118 132 128 125 122 122 128 113 117 123 119 127 114 122 116 124 117 120
 123 116 126 112 110 114 131]
Round  28  of  73
Best payoff:  133649
Best actions:  [117, 132, 128, 125, 122, 122, 127, 113, 117, 123, 119, 122, 119, 122, 116, 124, 117, 120, 123, 116, 126, 112, 110, 114, 131]
Current payoff:  133286.0
Current actions: [118 132 128 125 122 122 127 113 117 123 122 128 105 128 116 124 117 122
 114 131 107 123 110 114 131]
Round  29  of  73
Best payoff:  134014
B

Best payoff:  135720
Best actions:  [119, 129, 132, 126, 123, 120, 120, 119, 116, 118, 118, 123, 114, 116, 117, 118, 118, 117, 121, 118, 116, 115, 118, 112, 132]
Current payoff:  135648.0
Current actions: [119 129 132 126 123 120 122 118 115 118 118 123 114 116 117 118 118 117
 121 118 116 115 118 112 133]
Round  53  of  73
Best payoff:  136017
Best actions:  [118, 131, 132, 126, 123, 120, 120, 118, 117, 117, 117, 120, 118, 116, 117, 118, 118, 120, 120, 117, 114, 118, 118, 112, 133]
Current payoff:  135954.0
Current actions: [118 131 132 126 123 120 120 119 116 117 117 120 118 116 117 118 118 120
 120 117 114 118 118 112 133]
Round  54  of  73
Best payoff:  136178
Best actions:  [118, 131, 132, 131, 125, 122, 120, 119, 118, 118, 118, 120, 117, 115, 117, 118, 118, 120, 120, 117, 114, 118, 118, 112, 133]
Current payoff:  134947.0
Current actions: [122 128 131 125 123 120 132 106 123 113 116 120 118 116 120 118 116 120
 120 117 114 118 118 112 133]
Round  55  of  73
Best payoff:  136252
B

In [12]:
result = Test(game, Qtable, discount_factor, adversary_probabilities)

In [13]:
payoff, adversary_payoff, actions, adversary_actions, demand_potential = result.total_payoff()
print(payoff)
print(adversary_payoff)
print(actions)
print(adversary_actions)
print(demand_potential)

135067.0
88978.0
[118 131 132 132 126 124 121 118 117 118 116 119 111 114 118 113 120 112
 116 118 114 116 114 116 131]
[132 132 132 132 132 120 122 119 116 116 116 116 116 116 116 116 116 116
 116 116 116 116 116 112 132]
[200. 207. 207. 207. 207. 210. 208. 208. 208. 207. 206. 206. 204. 206.
 207. 206. 207. 205. 207. 207. 206. 207. 207. 208. 206.]


In [14]:
np.count_nonzero(Qtable.Q_table) / (number_actions * number_demands * number_actions* total_stages)

0.17069629925187033