In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#set initial var
Narms              = 2
Nblocks            = 4
Ntrials            = 100
data = pd.read_csv('./multiArmedSimulationData.csv')
choices = data['choice'].to_numpy()
rewards = data['reward'].to_numpy()
alphas = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
betas = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [None]:
#for each parameter combination, get probability of choosing the action that was chosen
all_likelihoods = []

for alpha in alphas:
    for beta in betas:
        log_likelihood = []

        for block in range(Nblocks):

            Qval = np.repeat(0.5, Narms)

            for trial in range(Ntrials):

                #players choice probabilities
                p = np.exp(beta * Qval) / np.sum(np.exp(beta * Qval))
                choice = choices[trial] - 1
                log_likelihood.append(np.log(p[choice]))

                #updating Qvalues
                reward = rewards[trial]
                Qval[choice] = Qval[choice] + alpha*(reward - Qval[choice])
        
        all_likelihoods.append(np.sum(log_likelihood))    

In [None]:
#plot results as a heat map
prob_matrix = np.array(all_likelihoods).reshape((len(alphas), len(betas))).T

fig, ax = plt.subplots()
c = ax.pcolormesh(prob_matrix, cmap='viridis', edgecolors='k', linewidths=4)

ax.set_xlabel('alpha')
ax.set_ylabel('beta')
ax.set_xticks(np.arange(len(alphas)) + 0.5, minor=False)
ax.set_yticks(np.arange(len(betas)) + 0.5, minor=False)
ax.set_xticklabels(alphas, minor=False)
ax.set_yticklabels(betas, minor=False)

fig.colorbar(c, ax=ax)
ax.set_title('Log likelihood of alpha and beta parameter combinations')

#show plot
plt.show()