In [1]:
from learningAgent import LearningAlgorithm
from environment import Model
from Qtable import QTable
from policyTable import PolicyTable
from test import Test
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from policyTable import PolicyTable
from play import Tournament
import re
import bimatrix
from fractions import Fraction

In [2]:
# np.random.seed(0)

In [3]:
# Parameters for the game:
total_demand = 400
initial_demands = [total_demand/2, total_demand/2]
discount_factor = 1
low_cost = 57
high_cost = 71 
total_stages = 25

In [4]:
# Parameters for the adversary:
number_actions = 60
number_demands = total_demand + 1

In [5]:
high_cost_player = PolicyTable(number_demands,number_actions,high_cost,total_stages)
high_cost_player = high_cost_player.random_reset()
high_cost_probabilities = [1]
high_cost_players = [high_cost_player]
low_cost_players = []
number_episodes = 500

In [6]:
def training(total_demand, discount_factor, costs, total_stages, adversary_probabilities, adversaries, number_episodes):
    
        game = Model(total_demand, costs, total_stages, adversaries, adversary_probabilities)
        constant = int(number_episodes/49)
        Qtable = QTable(total_demand + 1, number_actions, total_stages , learning_rate = [constant,constant])
        algorithm = LearningAlgorithm(game, Qtable, number_episodes, discount_factor)
        
        episode_counter = 0
        number_rounds = 10
        number_episodes_per_round = int(number_episodes / number_rounds)       
        for round_ in range(number_rounds): 
#             print('Round ', round_, ' of ', number_rounds)
            algorithm.epsilon_greedy_learning(number_episodes_per_round, episode_counter, number_episodes)      
            episode_counter += number_episodes_per_round
        return Qtable.to_policy_table(costs[0])

In [7]:
def new_equilibrium(low_cost_players, high_cost_players, discount_factor, costs, total_stages, initial_demands):
    
    tournament = Tournament(low_cost_players, high_cost_players, discount_factor, costs, total_stages, initial_demands)
    A,B = tournament.run_tournament()
    number_rows = len(A)
    number_columns = len(A[0])
    
    with open('game.txt', 'w') as f:
        f.write(str(number_rows) + " " + str(number_columns))
        f.write('\n')
        f.write('\n')
        for row in A:
            for column in row:
                f.write(np.array2string(np.array(int(column))) + " ")
            f.write('\n')
        f.write('\n')
        for row in B:
            for column in row:
                f.write(np.array2string(np.array(int(column))) + " ")
            f.write('\n')
        f.write('\n')
    game = bimatrix.bimatrix("game.txt")
    equilibrium = game.tracing(100)
    return recover_probs(equilibrium)
    

In [8]:
def recover_probs(test):
    high_cost_probs, low_cost_probs, rest = test.split(")")
    _, high_cost_probs = high_cost_probs.split("(")
    high_cost_probs = [float(Fraction(s)) for s in high_cost_probs.split(',')]
    low_cost_probs = low_cost_probs.split("(")[1]
    low_cost_probs = [float(Fraction(s)) for s in low_cost_probs.split(',')]
    _, high_cost_support, low_cost_support = rest.split('[')
    low_cost_support, _ = low_cost_support.split(']')
    low_cost_support = [int(s) for s in low_cost_support.split(',')]
    high_cost_support, _ = high_cost_support.split(']')
    high_cost_support = [int(s) for s in high_cost_support.split(',')]
    return low_cost_probs, high_cost_probs, low_cost_support, high_cost_support

In [9]:
def return_distribution(number_players, cost_probs, cost_support):
    player_probabilities = [0]* number_players
    for index, support in enumerate(cost_support):
        player_probabilities[support] = cost_probs[support]
    return player_probabilities

In [None]:
for round_ in range(100):
    print(round_)
    low_cost_player = training(total_demand, discount_factor, [low_cost, high_cost], total_stages, high_cost_probabilities, high_cost_players, number_episodes)
    low_cost_players.append(low_cost_player)
    low_cost_probs, _, low_cost_support, _ = new_equilibrium(low_cost_players, high_cost_players, discount_factor, [low_cost, high_cost], total_stages, initial_demands)
    low_cost_probabilities = return_distribution(len(low_cost_players), low_cost_probs, low_cost_support)
    print()
    new_high_cost_player = training(total_demand, discount_factor, [high_cost, low_cost], total_stages, low_cost_probabilities, low_cost_players, number_episodes)
    high_cost_players.append(new_high_cost_player)
    _, high_cost_probs, _, high_cost_support = new_equilibrium(low_cost_players, high_cost_players, discount_factor, [low_cost, high_cost], total_stages, initial_demands)
    high_cost_probabilities = return_distribution(len(high_cost_players), high_cost_probs, high_cost_support)

0
100 times found  (1),(1)
    supports: [0][0]

100 times found  (1,0),(1)
    supports: [0][0]
1
100 times found  (1,0),(1,0)
    supports: [0][0]

100 times found  (1,0,0),(1,0)
    supports: [0][0]
2
100 times found  (1,0,0),(1,0,0)
    supports: [0][0]

100 times found  (7/647,0,0,640/647),(203/204,1/204,0)
    supports: [0, 3][0, 1]
3
100 times found  (7/647,0,0,640/647),(203/204,1/204,0,0)
    supports: [0, 3][0, 1]

100 times found  (7/647,0,0,640/647,0),(203/204,1/204,0,0)
    supports: [0, 3][0, 1]
4
