In [7]:
# test theoretical value for different 
# sigmaA sigmaB levels

from noisy_zsc.game import NoisyBailLeverGame
from noisy_zsc.heuristic_learner import ArgmaxAgent, Argmaxof2Agent, StubbornAgent, argmax_of_2
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy
import random
from collections import deque
import pandas as pd
import csv
import seaborn as sns

In [5]:
# function: given sigma, sigma1, sigma2
# output value of argmax
# out of 1 million simulations

mean_payoffs = [5., 5., 5.]
bail_payoff = 5 # this can change

def argmax_score(payoffs1, payoffs2, true_payoff):
    
    payoffs1 = list(payoffs1)
    payoffs2 = list(payoffs2)

    action1 = np.argmax(payoffs1)
    action2 = np.argmax(payoffs2)
    if action1 == action2:
        return true_payoff[action1]
    else:
        return 0


def argmax_val(sigma, sigma1, sigma2, bail_payoff = bail_payoff, runs = 10000):
    val_list = []

    for i in range(runs):
        # init a run
        env = NoisyBailLeverGame(mean_payoffs, bail_payoff, sigma, sigma1, sigma2, episode_length=1)
        score = argmax_score(env.payoffs1, env.payoffs2, env.true_payoffs)
        val_list.append(score) 
    
    val = np.mean(val_list)
    return val



In [6]:
# fix sigma, create dataframe with columns sigma1, sigma2, value
sigma_list = [0, 1, 2,3,4,5,6,7, 8, 9, 10]
bail_list = [3,4,5,6,7,8]
sigma1_list = np.linspace(0, 10, 11)
sigma2_list = np.linspace(0, 10, 11)

for sigma in sigma_list:
    for bail in bail_list:
        df = pd.DataFrame(columns = ['sigma1', 'sigma2', 'value'])
        for sigma1 in sigma1_list:
            for sigma2 in sigma2_list:
                value = argmax_val(sigma, sigma1, sigma2, bail_payoff = bail)
                entry = pd.DataFrame.from_dict({'sigma1': [sigma1], 
                'sigma2': [sigma2], 'value':[value]})        
                df = pd.concat([df, entry], ignore_index = True)

        fig, ax = plt.subplots()
        pivot = df.pivot(index='sigma1', columns='sigma2', values='value')
        ax = sns.heatmap(pivot, annot = True, fmt='.1f', cmap='viridis')
        ax.invert_yaxis()
        plt.title("argmax learner value; bail="+str(bail)+" ; $\sigma$="+str(sigma))
        plt.xlabel("$\sigma_A$")
        plt.ylabel("$\sigma_B$")
        plt.savefig('heatmap/argmax_'+ str(sigma) + '_' + str(bail) +'.png')

KeyboardInterrupt: 

In [None]:
sigma12_list = np.linspace(0, 10, 100)

for sigma12 in sigma12_list:
    for bail in bail_list:
        val = argmax_val(sigma, sigma12, sigma12, bail_payoff = bail)

In [3]:
mean_payoffs = [5., 5., 5.]
bail_payoff = 5 # this can change

def argmax_of_2_score(payoffs1, payoffs2, true_payoff):
    
    payoffs1 = list(payoffs1)
    payoffs2 = list(payoffs2)

    action1 = np.argmax(payoffs1[1:])
    action2 = np.argmax(payoffs2[1:])
    if action1 == action2:
        return true_payoff[action1+1]
    else:
        return 0



def argmax_of_2_val(sigma, sigma1, sigma2, bail_payoff = bail_payoff, runs = 10000):
    val_list = []

    for i in range(runs):
        # init a run
        env = NoisyBailLeverGame(mean_payoffs, bail_payoff, sigma, sigma1, sigma2, episode_length=1)
        score = argmax_of_2_score(env.payoffs1, env.payoffs2, env.true_payoffs)
        val_list.append(score) 
    
    val = np.mean(val_list)
    return val

# fix sigma, create dataframe with columns sigma1, sigma2, value
sigma_list = [3, 4,5]
bail_list = [5]
sigma1_list = np.linspace(0, 10, 11)
sigma2_list = np.linspace(0, 10, 11)

for sigma in sigma_list:
    for bail in bail_list:
        df = pd.DataFrame(columns = ['sigma1', 'sigma2', 'value'])
        for sigma1 in sigma1_list:
            for sigma2 in sigma2_list:
                value = argmax_of_2_val(sigma, sigma1, sigma2, bail_payoff = bail)
                entry = pd.DataFrame.from_dict({'sigma1': [sigma1], 
                'sigma2': [sigma2], 'value':[value]})        
                df = pd.concat([df, entry], ignore_index = True)

        fig, ax = plt.subplots()
        pivot = df.pivot(index='sigma1', columns='sigma2', values='value')
        ax = sns.heatmap(pivot, annot = True, fmt='.1f', cmap='viridis')
        ax.invert_yaxis()
        plt.title("argmax of 2 learner value; bail="+str(bail)+" ; $\sigma$="+str(sigma))
        plt.xlabel("$\sigma_A$")
        plt.ylabel("$\sigma_B$")
        plt.savefig('heatmap2/argmax_'+ str(sigma) + '_' + str(bail) +'.png')

NameError: name 'argmax_val' is not defined