In [1]:
from learningActorCritic import ReinforceAlgorithm
from environmentModel import Model, AdversaryModes
from neuralNetwork import NeuralNetwork
import torch
import torch.nn as nn
from torch.distributions import Categorical

In [2]:
adversaryProbs=torch.zeros(len(AdversaryModes))
adversaryProbs[0]=1/3
adversaryProbs[2]=1/3
adversaryProbs[10]=1/3
game = Model(totalDemand = 400, 
               tupleCosts = (57, 71),
              totalStages = 25, adversaryProbs=adversaryProbs, advHistoryNum=2)
adversaryProbs

tensor([0.3333, 0.0000, 0.3333, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.3333, 0.0000])

In [3]:
game.reset()

(tensor([  0.0000, 200.0000, 128.5000,   0.0000,   0.0000]), 0, False)

In [4]:
game.adversaryChoosePrice()

135.5

In [5]:
neuralNet=NeuralNetwork(num_input=3+game.advHistoryNum, lr=0.00001)
algorithm = ReinforceAlgorithm(game, neuralNet, numberIterations=1, numberEpisodes=5_000_000, discountFactor =0.9)

In [None]:
algorithm.solver()

--------------------------------------------------
0   adversary:  AdversaryModes.constant_95
  actions:  tensor([ 1, 48, 19, 38, 38, 28, 14, 15, 16, 42,  8,  2, 40, 38, 20, 40, 17, 25,
        38, 20, 19, 36, 41, 10, 45])
loss=  tensor(539.3247, grad_fn=<AddBackward0>)   , actor=  tensor(209.6115, grad_fn=<DivBackward0>)   , critic=  tensor(3297.1318, grad_fn=<SumBackward0>)   , return=  83198.79199274778
probs of actions:  tensor([0.0224, 0.0256, 0.0198, 0.0197, 0.0197, 0.0186, 0.0208, 0.0187, 0.0219,
        0.0183, 0.0189, 0.0173, 0.0178, 0.0196, 0.0201, 0.0178, 0.0230, 0.0217,
        0.0196, 0.0201, 0.0194, 0.0203, 0.0168, 0.0195, 0.0236],
       grad_fn=<ExpBackward0>)
shouldBreak: False
--------------------------------------------------
10000   adversary:  AdversaryModes.guess_132
  actions:  tensor([ 2, 38, 13,  4, 40, 20, 44, 23,  3,  1, 47, 49, 25, 24,  2, 22,  5, 27,
         9, 16, 40, 39, 18, 28, 21])
loss=  tensor(27.6274, grad_fn=<AddBackward0>)   , actor=  tensor(-159.

In [None]:
advModeNames=""
for i in range(len(adversaryProbs)):
    if adversaryProbs[i]!=0:
        tmp="{:.1f}".format(adversaryProbs[i])
        advModeNames+=f"{(AdversaryModes(i)).name}-{tmp}-"
    
name=f"ep {algorithm.numberEpisodes}, {advModeNames}, {game.advHistoryNum} hist, {neuralNet.lr} lr"
neuralNet.save(name=name)

In [None]:
import pandas as pd
profits = pd.DataFrame(game.profit).T
prices = pd.DataFrame(game.prices).T
demandPotential = pd.DataFrame(game.demandPotential).T
learning = pd.DataFrame(algorithm.returns[0],columns=['entry'])
loss = pd.DataFrame(algorithm.loss[0],columns=['entry'])
loss.plot()

In [None]:
demandPotential.plot()
demandPotential

In [None]:
profits.sum(axis=0)

In [None]:
profits.plot()
profits


In [None]:
prices.plot()
prices

In [None]:
learning.plot()

In [None]:
# pricelearning = pd.DataFrame(game.prices.mean(axis = 0))
# pricelearning.plot()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
learning = learning.to_numpy()
learning_2 = [0]*len(learning)
for i in range(len(learning)):
    learning_2[i] = learning[i][0]

In [None]:
avg_learning = np.convolve(learning_2, np.ones(1000)/1000, mode = 'valid')

In [None]:
plt.plot(avg_learning)
plt.show()