# Experiments

## Libraries/Setup
***

In [None]:
# Ensure that all requisite public libraries are installed
import sys
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install tqdm
!{sys.executable} -m pip install matplotlib
!{sys.executable} -m pip install seaborn

In [None]:
# Custom libraries
import environment as uno
import q_learning_agent as rlagent
import strategy_agent as sagent

# Public libraries
import pandas as pd
import numpy as np
import ipywidgets as widgets
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import seaborn as sns

sns.set()

## Performance of Q-Learning Agents Trained Against a Random Strategy
***

### Performance When Played Against Random Strategy
***

In [None]:
# Agent parameters
agent_info = {"epsilon"  : .1,
              "gamma": .2,
              "alpha": 0, #decay
              "model": "../assets/models/q_v_rand/model",
              "learn": False
              }

# Load Q-Learning agent
q_v_rand = rlagent.QLearningAgent(agent_info)

# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = q_v_rand,
                     agent2 = None, #random strategy agent
                     comment = False)


winners, turns = run

#### Win Rate

In [None]:
winners.count("q-learning")/len(winners)

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "q-learning" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

### Performance When Played Against Unoptimized Strategic Agent
***

In [None]:
# Load Strategic Agent
strat_unopt = sagent.StrategicAgent({"model": "../assets/models/strat_unopt/model", "parameters": None})

# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = strat_unopt,
                     agent2 = q_v_rand, 
                     comment = False)


winners, turns = run

#### Win Rate

In [None]:
q_winrate = winners.count("q-learning")/len(winners)
s_winrate = winners.count("strategic")

print(f"q-learning win rate: {q_winrate}")
print(f"strategic win rate: {s_winrate}")

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "q-learning" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result1 = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result1 = result1.rename(columns={0:"Winner",1:"Turns"})

result1["Win_rate"] = result1["Winner"].apply(lambda x: 1 if x == "strategic" else 0)
result1["Win_rate"] = result1["Win_rate"].cumsum()/(result1.index+1)

result1.tail()

In [None]:
result["Winner"][-10000:].value_counts()
result1["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result1.index, result1["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

## Performance of Q-Learning Agents Trained Against an Unoptimized Strategy Agent
***

### Performance When Played Against Random Strategy
***

In [None]:
# Agent parameters
agent_info = {"epsilon"  : .1,
              "gamma": .2,
              "alpha": 0, #decay
              "model": "../assets/models/q_v_strat/model",
              "learn": False
              }

# Load Q-Learning agent
q_v_strat = rlagent.QLearningAgent(agent_info)

# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = q_v_strat,
                     agent2 = None, #random strategy agent
                     comment = False)


winners, turns = run

#### Win Rate

In [None]:
winners.count("q-learning")/len(winners)

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "q-learning" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

### Performance When Played Against Unoptimized Strategic Agent
***

In [None]:
# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = strat_unopt,
                     agent2 = q_v_strat, 
                     comment = False)

winners, turns = run

#### Win Rate

In [None]:
q_winrate = winners.count("q-learning")/len(winners)
s_winrate = winners.count("strategic")

print(f"q-learning win rate: {q_winrate}")
print(f"strategic win rate: {s_winrate}")

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "q-learning" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result1 = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result1 = result1.rename(columns={0:"Winner",1:"Turns"})

result1["Win_rate"] = result1["Winner"].apply(lambda x: 1 if x == "strategic" else 0)
result1["Win_rate"] = result1["Win_rate"].cumsum()/(result1.index+1)

result1.tail()

In [None]:
result["Winner"][-10000:].value_counts()
result1["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result1.index, result1["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

### Performance When Played Against a Q-Learning Agent Trained Against a Random Strategy
***

In [None]:
q_v_rand.name = "q_v_rand"
q_v_strat.name = "q_v_strat"

# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = q_v_rand,
                     agent2 = q_v_strat, 
                     comment = False)

winners = run[0]

#### Win Rate

In [None]:
q_winrate = winners.count("q_v_rand")/len(winners)
s_winrate = winners.count("q_v_strat")

print(f"q-learning win rate: {q_winrate}")
print(f"strategic win rate: {s_winrate}")

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "q_v_rand" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result1 = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result1 = result1.rename(columns={0:"Winner",1:"Turns"})

result1["Win_rate"] = result1["Winner"].apply(lambda x: 1 if x == "q_v_strat" else 0)
result1["Win_rate"] = result1["Win_rate"].cumsum()/(result1.index+1)

result1.tail()

In [None]:
result["Winner"][-10000:].value_counts()
result1["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result1.index, result1["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

## Performance of an Unoptimized Strategy Agent Against a Random Strategy
***

In [None]:
# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = strat_unopt,
                     agent2 = None, #random strategy agent
                     comment = False)


winners = run[0]

#### Win Rate

In [None]:
winners.count("strategic")/len(winners)

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "strategic" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

In [None]:
# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = strat_unopt,
                     agent2 = q_v_strat, 
                     comment = False)

winners = run[0]

#### Win Rate

In [None]:
q_winrate = winners.count("q_v_strat")/len(winners)
s_winrate = winners.count("strategic")

print(f"q-learning win rate: {q_winrate}")
print(f"strategic win rate: {s_winrate}")

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "q_v_strat" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

In [None]:
q_v_rand.name = "q_v_rand"
q_v_strat.name = "q_v_strat"

# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = q_v_rand,
                     agent2 = q_v_strat, 
                     comment = False)

winners = run[0]

#### Win Rate

In [None]:
winners.count("q_v_strat")/len(winners)

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "q_v_strat" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

## Performance of the Optimized Strategy Agent Trained Against a Random Strategy
***

### Performance When Played Against Random Strategy
***

In [None]:
# Load optimized strategic agent
agent_info = {
    "model": "../assets/models/strat_opt_rand/model",
    "parameters": None
}

strat_opt_rand = sagent.StrategicAgent(agent_info)

# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = strat_opt_rand,
                     agent2 = None, #random strategy agent
                     comment = False)


winners, turns = run

#### Win Rate

In [None]:
winners.count("strategic")/len(winners)

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "q-learning" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

### Performance When Played Against Unoptimized Strategic Agent
***

In [None]:
strat_opt_rand.name = "strategic-opt"

# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = strat_unopt,
                     agent2 = strat_opt_rand, 
                     comment = False)

winners, turns = run

#### Win Rate

In [None]:
q_winrate = winners.count("strategic")/len(winners)
s_winrate = winners.count("strategic-opt")

print(f"strategic win rate: {q_winrate}")
print(f"strategic-opt win rate: {s_winrate}")

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "strategic" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result1 = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result1 = result1.rename(columns={0:"Winner",1:"Turns"})

result1["Win_rate"] = result1["Winner"].apply(lambda x: 1 if x == "strategic-opt" else 0)
result1["Win_rate"] = result1["Win_rate"].cumsum()/(result1.index+1)

result1.tail()

In [None]:
result["Winner"][-10000:].value_counts()
result1["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result1.index, result1["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

### Performance When Played Against a Q-Learning Agent
***

In [None]:
# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = strat_opt_rand,
                     agent2 = q_v_strat, 
                     comment = False)

winners = run[0]

#### Win Rate

In [None]:
q_winrate = winners.count("q_v_strat")/len(winners)
s_winrate = winners.count("strategic-opt")

print(f"q-learning win rate: {q_winrate}")
print(f"strategic win rate: {s_winrate}")

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "q_v_strat" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result1 = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result1 = result1.rename(columns={0:"Winner",1:"Turns"})

result1["Win_rate"] = result1["Winner"].apply(lambda x: 1 if x == "strategic-opt" else 0)
result1["Win_rate"] = result1["Win_rate"].cumsum()/(result1.index+1)

result1.tail()

In [None]:
result["Winner"][-10000:].value_counts()
result1["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result1.index, result1["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

## Performance of the Optimized Strategy Agent Trained Against a Q-Learning Agent
***

### Performance When Played Against Random Strategy
***

In [None]:
# Load optimized strategic agent
agent_info = {
    "model": "../assets/models/strat_opt_q/model",
    "parameters": None
}

strat_opt_q = sagent.StrategicAgent(agent_info)

# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = strat_opt_q,
                     agent2 = None, #random strategy agent
                     comment = False)


winners, turns = run

#### Win Rate

In [None]:
winners.count("strategic")/len(winners)

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "strategic" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

### Performance When Played Against Unoptimized Strategic Agent
***

In [None]:
strat_opt_q.name = "strategic-opt-q"

# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = strat_unopt,
                     agent2 = strat_opt_q, 
                     comment = False)

winners, turns = run

#### Win Rate

In [None]:
q_winrate = winners.count("strategic")/len(winners)
s_winrate = winners.count("strategic-opt-q")

print(f"unoptimized strategic win rate: {q_winrate}")
print(f"optimized strategic win rate: {s_winrate}")

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "strategic" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result1 = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result1 = result1.rename(columns={0:"Winner",1:"Turns"})

result1["Win_rate"] = result1["Winner"].apply(lambda x: 1 if x == "strategic-opt-q" else 0)
result1["Win_rate"] = result1["Win_rate"].cumsum()/(result1.index+1)

result1.tail()

In [None]:
result["Winner"][-10000:].value_counts()
result1["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result1.index, result1["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

### Performance When Played Against Strategic Agent Optimized Against Random Strategy
***

In [None]:
# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = strat_opt_rand,
                     agent2 = strat_opt_q, 
                     comment = False)

winners, turns = run

#### Win Rate

In [None]:
q_winrate = winners.count("strategic-opt")/len(winners)
s_winrate = winners.count("strategic-opt-q")

print(f"optimized strategic win rate: {q_winrate}")
print(f"optimized-q strategic win rate: {s_winrate}")

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "strategic-opt" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result1 = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result1 = result1.rename(columns={0:"Winner",1:"Turns"})

result1["Win_rate"] = result1["Winner"].apply(lambda x: 1 if x == "strategic-opt-q" else 0)
result1["Win_rate"] = result1["Win_rate"].cumsum()/(result1.index+1)

result1.tail()

In [None]:
result["Winner"][-10000:].value_counts()
result1["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result1.index, result1["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

### Performance When Played Against a Q-Learning Agent
***

In [None]:
# Run simulations
run = uno.tournament(iterations = 1000,
                     agent1 = strat_opt_q,
                     agent2 = q_v_strat, 
                     comment = False)

winners = run[0]

#### Win Rate

In [None]:
q_winrate = winners.count("q_v_strat")/len(winners)
s_winrate = winners.count("strategic-opt-q")

print(f"q-learning win rate: {q_winrate}")
print(f"strategic win rate: {s_winrate}")

In [None]:
result = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result = result.rename(columns={0:"Winner",1:"Turns"})

result["Win_rate"] = result["Winner"].apply(lambda x: 1 if x == "q_v_strat" else 0)
result["Win_rate"] = result["Win_rate"].cumsum()/(result.index+1)

result.tail()

In [None]:
result1 = pd.concat([pd.Series(winners), pd.Series(turns)], axis = 1)
result1 = result1.rename(columns={0:"Winner",1:"Turns"})

result1["Win_rate"] = result1["Winner"].apply(lambda x: 1 if x == "strategic-opt-q" else 0)
result1["Win_rate"] = result1["Win_rate"].cumsum()/(result1.index+1)

result1.tail()

In [None]:
result["Winner"][-10000:].value_counts()
result1["Winner"][-10000:].value_counts()

#### Cumulative Win-Rate per Turn

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result.index, result["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()

In [None]:
fig = plt.figure(1, figsize=(15,7))

plt.plot(result1.index, result1["Win_rate"])
plt.hlines(0.5, 0, len(winners), colors="grey", linestyles="dashed")

# Formatting
plt.title("Win-Rate with Starting Advantage")
plt.xlabel("Simulations")
plt.ylabel("Win Rate")
plt.ylim((0.45,0.55))

plt.show()