In [1]:
''' Policy Iteration Algorithm vs Random Agent
'''

from env import Env
# from human_agent import HumanAgent
from policy_iteration_agent import PolicyIterationAgent
from random_agent import RandomAgent
# from threshold_agent import ThresholdAgent

# Make environment
env = Env()
# human_agent1 = HumanAgent(env.num_actions)
# human_agent2 = HumanAgent(env.num_actions)
random_agent = RandomAgent(env.np_random, False)
# threshold_agent = ThresholdAgent(False)
print("Running Policy Iteration algorithm for Random Agent...")
pi_random_agent = PolicyIterationAgent(env.np_random, False, random_agent)
env.set_agents([
    # human_agent1,
    # human_agent2,
    # threshold_agent,
    pi_random_agent,
    random_agent,
])

num_of_games = 10000
pi_random_agent_payoffs = []
print("Running ", num_of_games, " games \"Policy Iteration Agent vs Random Agent\"...")
for i in range(num_of_games):
    # print("Game: ", i)

    trajectories, payoffs = env.run(is_training=False)
    pi_random_agent_payoffs.append(payoffs[0])

print("Average payoffs:  ", sum(pi_random_agent_payoffs)/num_of_games)

Running Policy Iteration algorithm for Random Agent...
converged after 3 iterations
Running  10000  games "Policy Iteration Agent vs Random Agent"...
Average payoffs:   0.8595


In [2]:
''' Policy Iteration Algorithm vs Threshold Agent
'''

from env import Env
# from human_agent import HumanAgent
from policy_iteration_agent import PolicyIterationAgent
# from random_agent import RandomAgent
from threshold_agent import ThresholdAgent

# Make environment
env = Env()
# human_agent1 = HumanAgent(env.num_actions)
# human_agent2 = HumanAgent(env.num_actions)
# random_agent = RandomAgent(env.np_random, False)
threshold_agent = ThresholdAgent(False)
print("Running Policy Iteration algorithm for Threshold Agent...")
pi_threshold_agent = PolicyIterationAgent(env.np_random, False, threshold_agent)
env.set_agents([
    # human_agent1,
    # human_agent2,
    # threshold_agent,
    # pi_random_agent,
    pi_threshold_agent,
    # random_agent,
    threshold_agent,
])

num_of_games = 10000
q_learning_agent_payoffs = []
print("Running ", num_of_games, " games \"Policy Iteration Agent vs Threshold Agent\"...")
for i in range(num_of_games):
    # print("Game: ", i)

    trajectories, payoffs = env.run(is_training=False)
    q_learning_agent_payoffs.append(payoffs[0])

print("Average payoffs:  ", sum(q_learning_agent_payoffs)/num_of_games)

Running Policy Iteration algorithm for Threshold Agent...
converged after 3 iterations
Running  10000  games "Policy Iteration Agent vs Threshold Agent"...
Average payoffs:   0.2255


In [3]:
''' Q Learning Algorithm vs Threshold Agent
'''

from env import Env
# from human_agent import HumanAgent
from q_learning_agent import QLearningAgent
# from random_agent import RandomAgent
from threshold_agent import ThresholdAgent

# Make environment
env = Env()
# human_agent1 = HumanAgent(env.num_actions)
# human_agent2 = HumanAgent(env.num_actions)
# random_agent = RandomAgent(env.np_random, False)
threshold_agent = ThresholdAgent(False, False)
print("Running Q Learning algorithm for Threshold Agent...")
q_learning_agent = QLearningAgent(env.np_random, False, is_learning = True, slow_decay = False)
env.set_agents([
    # human_agent1,
    # human_agent2,
    # threshold_agent,
    # pi_random_agent,
    q_learning_agent,
    # random_agent,
    threshold_agent,
])

num_of_games = 10**7
q_learning_agent_payoffs = []
print("Running ", num_of_games, " games \"Q Learning Agent vs Threshold Agent\"...")
for i in range(num_of_games):
    # print("Game: ", i)

    trajectories, payoffs = env.run(is_training=False)
    q_learning_agent_payoffs.append(payoffs[0])

print("Average payoffs:  ", sum(q_learning_agent_payoffs)/num_of_games)
print("Storing trained model...")

# num_of_games = 100000
# q_learning_agent_payoffs = []
# print("Running ", num_of_games, " games \"Q Learning Agent vs Threshold Agent\"...")
# for i in range(num_of_games):
#     # print("Game: ", i)

#     trajectories, payoffs = env.run(is_training=False)
#     q_learning_agent_payoffs.append(payoffs[0])

# print("Average payoffs after pretraining:  ", sum(q_learning_agent_payoffs)/num_of_games)
# print("Storing trained model...")

import json
import os
with open(os.path.dirname(os.path.abspath(''))+'\\python\\q_threshold_model.json', 'w') as json_file:
    json.dump(q_learning_agent.model, json_file, indent=4, sort_keys=True)
print("Stored trained model successfully!")

with open(os.path.dirname(os.path.abspath(''))+'\\python\\q_threshold_payoffs.json', 'w') as json_file:
    json.dump(q_learning_agent_payoffs, json_file, indent=4, sort_keys=True)

Running Q Learning algorithm for Threshold Agent...
Running  10000000  games "Q Learning Agent vs Threshold Agent"...
Average payoffs:   0.116933
Storing trained model...
Stored trained model successfully!


In [4]:
''' Q Learning Algorithm vs Random Agent
'''

from env import Env
# from human_agent import HumanAgent
from q_learning_agent import QLearningAgent
from random_agent import RandomAgent
# from threshold_agent import ThresholdAgent

# Make environment
env = Env()
# human_agent1 = HumanAgent(env.num_actions)
# human_agent2 = HumanAgent(env.num_actions)
random_agent = RandomAgent(env.np_random, False)
# threshold_agent = ThresholdAgent(False, False)
print("Running Q Learning algorithm for Random Agent...")
q_learning_agent = QLearningAgent(env.np_random, False, is_learning = True, slow_decay = True)
env.set_agents([
    # human_agent1,
    # human_agent2,
    # threshold_agent,
    # pi_random_agent,
    q_learning_agent,
    random_agent,
    # threshold_agent,
])

num_of_games = 10**7
q_learning_agent_payoffs = []
print("Running ", num_of_games, " games \"Q Learning Agent vs Random Agent\"...")
for i in range(num_of_games):
    # print("Game: ", i)

    trajectories, payoffs = env.run(is_training=False)
    q_learning_agent_payoffs.append(payoffs[0])

print("Average payoffs:  ", sum(q_learning_agent_payoffs)/num_of_games)
print("Storing trained model...")

# num_of_games = 100000
# q_learning_agent_payoffs = []
# print("Running ", num_of_games, " games \"Q Learning Agent vs Random Agent\"...")
# for i in range(num_of_games):
#     # print("Game: ", i)

#     trajectories, payoffs = env.run(is_training=False)
#     q_learning_agent_payoffs.append(payoffs[0])

# print("Average payoffs after pretraining:  ", sum(q_learning_agent_payoffs)/num_of_games)
# print("Storing trained model...")

import json
import os
with open(os.path.dirname(os.path.abspath(''))+'\\python\\q_random_model.json', 'w') as json_file:
    json.dump(q_learning_agent.model, json_file, indent=4, sort_keys=True)
print("Stored trained model successfully!")

with open(os.path.dirname(os.path.abspath(''))+'\\python\\q_random_payoffs.json', 'w') as json_file:
    json.dump(q_learning_agent_payoffs, json_file, indent=4, sort_keys=True)

Running Q Learning algorithm for Random Agent...
Running  10000000  games "Q Learning Agent vs Random Agent"...
Average payoffs:   0.82415625
Storing trained model...
Stored trained model successfully!
