In [1]:
import numpy as np
from scipy import special
import pandas as pd
import random
import matplotlib.pyplot as plt
from numpy.random import shuffle

from plotnine import *
import plotnine as gg
import warnings

In [2]:
def HRLtask_BayesStick(parameters,numtrials,pval,pswitch,numbandits,agentid):

  softmaxbeta = parameters[0] # softmax beta
  epsilon = parameters[1]
  stick = parameters[2]

  iter =0
  cb = 1
  prior = 1/numbandits*np.ones([1,numbandits])[0]


  a=np.array([[0,1,0],[1,0,1],[0,0,1],[0,1,1],[1,0,0]])
  a = np.repeat(a,numtrials/len(a),axis=0)
  shuffle(a)



  allrewards =[] #initialize list that will store all rewards
  allcorrectcues =[]
  alltrials =[]
  alliscorrectcue =[]
  alliters=[]
  allindexofselcolor=[]
  allchosenside =[]
  allp =[]
  isswitch = [0] * numtrials
  likelihood = np.nan*np.ones([1,numbandits])[0]
  allstims0 = []
  allstims1 = []
  allstims2 = []
  for i in range(numtrials):
    stim = a[i] #[random.choice([0,1]) for x in range(numbandits)]
    W = np.log(prior)

    if i >0:
       W[b]=W[b]+stick

    sftmx_p = special.softmax(softmaxbeta * W) # generate the action probability using the softmax
    b = np.random.choice(numbandits, p = sftmx_p) # generate the action using the probability
    s = stim[b]

    if np.random.uniform(0,1,1)[0]<epsilon:
      s=1-s

    cor = int(s==stim[cb])
    r = int(np.random.uniform(0,1,1)[0] < pval[cor])

    for n in range(numbandits):
      likelihood[n]=pval[stim[n]==s]

    if r == 0:
      likelihood = 1-likelihood

    posterior=likelihood*prior
    p = posterior/np.sum(posterior)
    prior = (1-pswitch)*p+pswitch*(1-p)/np.sum(1-p)


    if (iter>10) and (np.random.uniform(0,1,1)[0]<pswitch):
      iter=1
      bs = np.array([x for x in list(np.arange(numbandits)) if x!=cb])
      cb = bs[random.choice([0,1])]
      if i<numtrials-1:
        isswitch[i+1]=1
    else:
      iter += 1


    alltrials.append(i)
    allcorrectcues.append(cb) #store the action that was correct on the current trial
    alliters.append(iter)
    allindexofselcolor.append(b)
    allchosenside.append(s)
    alliscorrectcue.append(cor)
    allrewards.append(r) #store
    allp.append(p)
    allstims0.append(stim[0])
    allstims1.append(stim[1])
    allstims2.append(stim[2])

  data = pd.DataFrame({"agentid" : [agentid] * len(alltrials),
                         'correctcue' : allcorrectcues,
                         'rewards': allrewards,
                         'isswitch': isswitch,
                         'iscorrectcue': alliscorrectcue,
                         'trials':alltrials,
                         'chosenside':allchosenside,
                         'chosencue':allindexofselcolor,
                         'correctruleiteration':alliters,
                         'allp':allp,
                         'beta': [softmaxbeta]*len(alltrials),
                         'epsilon': [epsilon]*len(alltrials),
                         'stickiness':[stick]*len(alltrials),
                         'allstims0':allstims0,
                         'allstims1':allstims1,
                         'allstims2':allstims2})

  return data





parameters = [10,0,2]
numtrials = 100
pval = [0.1,0.9]
pswitch = 0.05
numbandits = 3
agentid=1
modelname='StickyBayes'
data=HRLtask_BayesStick(parameters,numtrials,pval,pswitch,numbandits,agentid) # test out the function...



In [4]:
warnings.filterwarnings("ignore")

N_AGENTS = 10000
import tqdm
train_data_list = []
numtrials = 2000
pval = [0.1,0.9]
pswitch = 0.05
numbandits = 3

for a in tqdm.tqdm(range(N_AGENTS)):#tqdm.tqdm(range(N_AGENTS)):
    # simulate N_AGENTS agents with randomly sampled alphas and betas
    rand_beta = random.uniform(1,10)
    rand_epsilon= random.uniform(0,0.3)
    rand_stick = random.uniform(0,2)
    parameters = [rand_beta,rand_epsilon,rand_stick]
    data=HRLtask_BayesStick(parameters,numtrials,pval,pswitch,numbandits,a)
    train_data_list.append(data)

train_df = pd.concat(train_data_list)


100%|██████████| 10000/10000 [11:30<00:00, 14.48it/s]


In [5]:
train_df.to_csv('10000agent_2000t_StickyBayes_ABCtest_p3.csv')

In [6]:
dropped = train_df.drop(columns=['allp', 'correctcue', 'isswitch', 'iscorrectcue', 'trials', 'chosencue', 'correctruleiteration'])
dropped.head()

Unnamed: 0,agentid,rewards,chosenside,beta,epsilon,stickiness,allstims0,allstims1,allstims2
0,0,1,0,9.945477,0.289884,1.693297,1,0,1
1,0,1,0,9.945477,0.289884,1.693297,0,0,1
2,0,0,0,9.945477,0.289884,1.693297,0,1,0
3,0,1,1,9.945477,0.289884,1.693297,0,1,1
4,0,0,1,9.945477,0.289884,1.693297,1,0,1


In [7]:
dropped.to_hdf('10000agent_2000t_StickyBayes_ABCtest_p3.h5', key='df', mode='w')  

In [None]:
def analyzeSwitch(subdata,numPreSwitchTrials,numPostSwitchTrials):
  switches= [k for k, x in enumerate(subdata['isswitch']) if x]

  corr =  np.array(subdata['iscorrectcue']) #np.equal(np.array(subdata['correct_actions']),np.array(subdata['actions']))
  corr=corr.astype(int)
  switches=[x for x in switches if x <= subdata.shape[0]-numPostSwitchTrials]
  allswitchaccuracy=np.nan*np.ones([len(switches),numPreSwitchTrials+numPostSwitchTrials])

  for s in range(len(switches)):
    sw = switches[s]
    allswitchaccuracy[s]= np.array(corr[np.arange(sw-numPreSwitchTrials,sw+numPostSwitchTrials)])#np.array(corr[np.arange[sw-numPreSwitchTrials:sw+numPostSwitchTrials]])

  LC=np.nanmean(allswitchaccuracy,0)

  return LC




# train_df = pd.read_csv("/content/gdrive/My Drive/dl4rl/PRL/behall.csv")
numPreSwitchTrials =4 # number of trials before the switch
numPostSwitchTrials = 10 # number of trials after the switch

allsims=train_df['agentid'].unique()
LCall = np.nan*np.ones([len(allsims),numPreSwitchTrials+numPostSwitchTrials])
avgLCs = []
for i in range(50):
  minidataa = train_df.loc[train_df['agentid']==allsims[i],:]

  LCall[i]=analyzeSwitch(minidataa,numPreSwitchTrials,numPostSwitchTrials)

  avgLCs.append([minidataa.beta[0],minidataa.epsilon[0],minidataa.stickiness[0],np.mean(LCall[i][5:])])



prepostswitchaverage = np.nanmean(LCall,axis =0) # average across the entire sample.
prepostswitchaverageSEM =np.nanstd(LCall,axis=0)/np.sqrt(len(allsims))
trialsSinceSwitch = np.arange(-numPreSwitchTrials,numPostSwitchTrials) #for x axis, trials before and after the switch


from matplotlib.pyplot import figure

from matplotlib.pyplot import figure

figure(figsize=(6, 7), dpi=80)
for a in LCall:
    # x_new = np.linspace(1,len(trialsSinceSwitch),50)
    # a_BSpline = interpolate.make_interp_spline(np.arange(len(trialsSinceSwitch))+1, a)
    # y_new = a_BSpline(x_new)
    plt.plot(trialsSinceSwitch,a,color='grey')


plt.plot(trialsSinceSwitch,prepostswitchaverage)
plt.errorbar(trialsSinceSwitch,prepostswitchaverage,prepostswitchaverageSEM,color='k')
plt.xlabel('trials since switch')
plt.xticks(trialsSinceSwitch)
plt.ylabel('p(Corr)')
plt.title('Aligned learning curve')
plt.yticks([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1])
# plt.ylim([0.4,1])
# plt.hlines(0.5, -numPreSwitchTrials, numPostSwitchTrials, colors='r', linestyles='dashed', label='chance')
# plt.hlines(0.8, -numPreSwitchTrials, numPostSwitchTrials, colors='k', linestyles='dashed', label='p(reward)')
# leg = plt.legend(loc='upper right')
# plt.show()


In [None]:


df = pd.DataFrame(avgLCs)
df.columns = ['beta','epsilon','stickiness','sumstats']

xx = 'beta'
yy = 'sumstats'


r_value, p_value = spearmanr(df[xx], df[yy])

plott = gg.ggplot(df, gg.aes(x = xx, y = yy)) + gg.geom_point(color = 'blue') + gg.stat_smooth(method = 'lm') + gg.xlab('True ' + xx) + gg.ylab('DL ' + yy ) +gg.labels.ggtitle('r='+str(np.round(r_value,2))+',p='+str(np.round(p_value,2)))


plott

In [None]:

fig, ax = plt.subplots()
figure(figsize=(6, 7), dpi=80)

ax.bar(1,
       height=[np.mean(train_df['beta'].unique())],
       yerr=np.std(train_df['beta'].unique()),    # error bars
       capsize=12, # error bar cap width in points
       width=0.5,    # bar width
       tick_label=["beta"],
       color=(0,0,0,0),  # face color transparent
       edgecolor=(0, 0, 1, 1),
       )

w =0.9
ax.scatter(1 + np.random.random(len(train_df['beta'].unique())) * w - w / 2, train_df['beta'].unique(), color=(0, 0, 1, 1),alpha=0.5)
# ax.set_ylim([0,12])
ax.set_xlim([0,2])

plt.show()

In [None]:
fig, ax = plt.subplots()
figure(figsize=(6, 7), dpi=80)

ax.bar(1,
       height=[np.mean(train_df['epsilon'].unique())],
       yerr=np.std(train_df['epsilon'].unique()),    # error bars
       capsize=12, # error bar cap width in points
       width=0.5,    # bar width
       tick_label=["epsilon"],
       color=(0,0,0,0),  # face color transparent
       edgecolor=(0, 0, 1, 1),
       )

w =0.9
ax.scatter(1 + np.random.random(len(train_df['epsilon'].unique())) * w - w / 2, train_df['epsilon'].unique(), color=(0, 0, 1, 1),alpha=0.5)
# ax.set_ylim([0,12])
ax.set_xlim([0,2])

plt.show()

In [None]:
fig, ax = plt.subplots()
figure(figsize=(6, 7), dpi=80)

ax.bar(1,
       height=[np.mean(train_df['stickiness'].unique())],
       yerr=np.std(train_df['stickiness'].unique()),    # error bars
       capsize=12, # error bar cap width in points
       width=0.5,    # bar width
       tick_label=["stickiness"],
       color=(0,0,0,0),  # face color transparent
       edgecolor=(0, 0, 1, 1),
       )

w =0.9
ax.scatter(1 + np.random.random(len(train_df['stickiness'].unique())) * w - w / 2, train_df['stickiness'].unique(), color=(0, 0, 1, 1),alpha=0.5)
# ax.set_ylim([0,12])
ax.set_xlim([0,2])

plt.show()

In [None]:
N_AGENTS = 3000
import tqdm
val_data_list = []
numtrials = 2000
pval = [0.1,0.9]
pswitch = 0.05
numbandits = 3

for a in tqdm.tqdm(range(N_AGENTS)):#tqdm.tqdm(range(N_AGENTS)):
    # simulate N_AGENTS agents with randomly sampled alphas and betas
    rand_beta = random.uniform(1,10)
    rand_epsilon= random.uniform(0,0.3)
    rand_stick = random.uniform(0,2)
    parameters = [rand_beta,rand_epsilon,rand_stick]
    data=HRLtask_BayesStick(parameters,numtrials,pval,pswitch,numbandits,a)
    val_data_list.append(data)

# val_df = pd.concat(val_data_list)


In [None]:
val_df = pd.concat(val_data_list)

val_df.to_csv(f'/content/gdrive/MyDrive/dl4rl/HRL/data/{modelname}/{N_AGENTS}_{numtrials}t_{modelname}_validation_ABCtest.csv')

In [None]:
N_AGENTS = 3000
import tqdm
test_data_list = []
numtrials = 2000
pval = [0.1,0.9]
pswitch = 0.05
numbandits = 3

for a in tqdm.tqdm(range(N_AGENTS)):#tqdm.tqdm(range(N_AGENTS)):
    # simulate N_AGENTS agents with randomly sampled alphas and betas
    rand_beta = random.uniform(1,10)
    rand_epsilon= random.uniform(0,0.3)
    rand_stick = random.uniform(0,2)
    parameters = [rand_beta,rand_epsilon,rand_stick]
    data=HRLtask_BayesStick(parameters,numtrials,pval,pswitch,numbandits,a)
    test_data_list.append(data)

test_df = pd.concat(test_data_list)
test_df.to_csv(f'/content/gdrive/MyDrive/dl4rl/HRL/data/{modelname}/{N_AGENTS}agent_{numtrials}t_{modelname}_test_ABCtest.csv')