In [1]:
# load all fxns and data
%matplotlib qt

from opconNosepokeFunctions import *
from supplementaryFunctions import *
# from scipy.optimize import minimize
# from scipy.stats import entropy
# from scipy.stats import ttest_rel
# from hmmlearn import hmm
# import statsmodels.api as sm
# import statsmodels.formula.api as smf

import ssm
sessdf = pd.read_csv('L:/4portProb_processed/sessdf.csv')

In [285]:
mask = (~sessdf.duplicated(subset = ['trialstart',
                                     'eptime'])) & (sessdf.task.isin(['dms', 'dls'])) & (sessdf.animal == 'Chikorita')

In [289]:
sequences = (sessdf[mask].groupby('session#')
             .filter(lambda x: x.reward.size >= 150)
             .groupby('session#').head(150))

g = sequences.groupby('session#').cumcount()
X = np.array(sequences.set_index(['session#',g])
       .unstack(fill_value=0)
       .stack().groupby(level=0)
       .apply(lambda x: x.port.values.astype(int).tolist())
       .tolist())

In [290]:
model = hmm.CategoricalHMM(n_components=4, n_iter=100, random_state = 42)
model.fit(X)

In [291]:
# Generate samples
X_hat, Z_hat = model.sample(150)

# Plot the sampled data
plt.figure()
plt.plot(X_hat, 'o', color = 'white', label = 'sampled', markeredgecolor = 'k', markeredgewidth = 0.5)
plt.fill_between(np.arange(len(X_hat)), y1 = 1, y2 = 4, alpha = 0.5, where = (Z_hat==0),
                 color = 'xkcd:light blue', label = 'state 0')
plt.fill_between(np.arange(len(X_hat)), y1 = 1, y2 = 4, alpha = 0.5, where = (Z_hat==1),
                 color = 'pink', label = 'state 1')
plt.fill_between(np.arange(len(X_hat)), y1 = 1, y2 = 4, alpha = 0.5, where = (Z_hat==2),
                 color = 'xkcd:light green', label = 'state 2')
plt.fill_between(np.arange(len(X_hat)), y1 = 1, y2 = 4, alpha = 0.5, where = (Z_hat==3),
                 color = 'xkcd:pale yellow', label = 'state 3')
plt.legend()
sns.despine()
plt.title('sampling states and generating actions using HMM')
plt.xlabel('trials')
plt.ylabel('actions')
plt.yticks(np.arange(1,5))

([<matplotlib.axis.YTick at 0x1f1abe4d0f0>,
  <matplotlib.axis.YTick at 0x1f1ef024490>,
  <matplotlib.axis.YTick at 0x1f1abe4fe20>,
  <matplotlib.axis.YTick at 0x1f1abe2ebc0>],
 [Text(0, 1, '1'), Text(0, 2, '2'), Text(0, 3, '3'), Text(0, 4, '4')])

In [195]:
ll, Z_, = model.decode(X)

# plot original state assigned to data
plt.figure()
plt.plot(X.flatten(), 'o')
plt.plot(Z_+1.1, 'og')
for i in range(0, len(X.flatten()), 150):
    plt.axvline(i, color = 'grey', linewidth = 0.2)

In [271]:
plt.figure()
sns.heatmap(model.emissionprob_[:, 1:], cmap = 'Blues', annot = True, fmt = '.2f', xticklabels = np.arange(1,5))
plt.xlabel('Actions')
plt.ylabel('Likelihood of gen from state')

Text(47.25, 0.5, 'Likelihood of gen from state')

In [272]:
plt.figure()
sns.heatmap(model.transmat_, cmap = 'Blues', annot = True, fmt = '.2f')
plt.xlabel('to state')
plt.ylabel('from state')

Text(47.25, 0.5, 'from state')

In [273]:
pd.crosstab(Z_, X.flatten(), normalize = 'index')

col_0,1,2,3,4
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.020018,0.218562,0.042169,0.719252
1,0.72468,0.023834,0.243587,0.007898


In [274]:
pd.crosstab(Z_hat, X_hat.flatten(), normalize = 'index')

col_0,1,2,3,4
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.0,0.294118,0.647059,0.058824
1,0.676692,0.015038,0.015038,0.293233


In [276]:
fig = plt.figure(figsize = (8,7))
ax = plt.subplot(111)
filtered = sessdf[mask].groupby('session#').filter(lambda x: x.reward.size >= 150)
for i in range(1,5):
    fewih = filtered[filtered['port']==i].groupby(['session#', 'rewprob']).count()['port']/filtered.groupby(['session#']).size()

    sns.scatterplot(data=filtered[filtered['port']==i].groupby(['session#', 'rewprob']).count(), 
            x = 'rewprob',
            y=filtered[filtered['port']==i]
                    .groupby(['session#', 'rewprob'])
                    .count()['port']/filtered.groupby(['session#']).size(), 
                    alpha=0.1, ax=ax)
    sns.lineplot(data=fewih.groupby('rewprob').mean(), ax=ax, legend = 'auto', linewidth = 2)
#     ax.set_title(box)
    ax.set_ylabel('choice probability')
    ax.set_xlabel('reward percent')
    sns.despine()
    ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
plt.suptitle(f'Choice Probability vs. Reward Percent, per arm, animal = {animal}')

plt.tight_layout()
plt.show()

  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3', '_','_','4', '_'])
  ax.legend(['_','1','_', '_','2', '_', '_','3',

NameError: name 'animal' is not defined

In [134]:
# functions and classes go here
def fb_alg(A_mat, O_mat, observ):
    # set up
    k = observ.size
    (n,m) = O_mat.shape
    prob_mat = np.zeros( (n,k) )
    fw = np.zeros( (n,k+1) )
    bw = np.zeros( (n,k+1) )
    # forward part
    fw[:, 0] = 1.0/n
    for obs_ind in range(k):
        f_row_vec = np.matrix(fw[:,obs_ind])
        fw[:, obs_ind+1] = f_row_vec * \
                           np.matrix(A_mat) * \
                           np.matrix(np.diag(O_mat[:,int(observ[obs_ind])]))
        fw[:,obs_ind+1] = fw[:,obs_ind+1]/np.sum(fw[:,obs_ind+1])
    # backward part
    bw[:,-1] = 1.0
    for obs_ind in range(k, 0, -1):
        b_col_vec = np.matrix(bw[:,obs_ind]).transpose()
        bw[:, obs_ind-1] = (np.matrix(A_mat) * \
                            np.matrix(np.diag(O_mat[:,int(observ[obs_ind-1])])) * \
                            b_col_vec).transpose()
        bw[:,obs_ind-1] = bw[:,obs_ind-1]/np.sum(bw[:,obs_ind-1])
    # combine it
    prob_mat = np.array(fw)*np.array(bw)
    prob_mat = prob_mat/np.sum(prob_mat, 0)
    # get out
    return prob_mat, fw, bw
 
def baum_welch( num_states, num_obs, observ ):
    # allocate
    A_mat = np.ones( (num_states, num_states) )
    A_mat = A_mat / np.sum(A_mat,1)
    O_mat = np.ones( (num_states, num_obs) )
    O_mat = O_mat / np.sum(O_mat,1)
    theta = np.zeros( (num_states, num_states, observ.size) )
    while True:
        old_A = A_mat
        old_O = O_mat
        A_mat = np.ones( (num_states, num_states) )
        O_mat = np.ones( (num_states, num_obs) )
        # expectation step, forward and backward probs
        P,F,B = fb_alg( old_A, old_O, observ)
        # need to get transitional probabilities at each time step too
        for a_ind in range(num_states):
            for b_ind in range(num_states):
                for t_ind in range(observ.size):
                    theta[a_ind,b_ind,t_ind] = \
                    F[a_ind,t_ind] * \
                    B[b_ind,t_ind+1] * \
                    old_A[a_ind,b_ind] * \
                    old_O[b_ind, int(observ[t_ind])]
        # form A_mat and O_mat
        for a_ind in range(num_states):
            for b_ind in range(num_states):
                A_mat[a_ind, b_ind] = np.sum( theta[a_ind, b_ind, :] )/ \
                                      np.sum(P[a_ind,:])
        A_mat = A_mat / np.sum(A_mat,1)
        for a_ind in range(num_states):
            for o_ind in range(num_obs):
                right_obs_ind = np.array(np.where(observ == o_ind))+1
                O_mat[a_ind, o_ind] = np.sum(P[a_ind,right_obs_ind])/ \
                                      np.sum( P[a_ind,1:])
        O_mat = O_mat / np.sum(O_mat,1)
        # compare
        if np.linalg.norm(old_A-A_mat) < .00001 and np.linalg.norm(old_O-O_mat) < .00001:
            break
    # get out
    return A_mat, O_mat
 
num_obs = 25
observations1 = np.random.randn( num_obs )
observations1[observations1>0] = 1
observations1[observations1<=0] = 0
A_mat, O_mat = baum_welch(2,2,observations1)
print (observations1)
print (A_mat)
print (O_mat)
observations2 = np.random.random(num_obs)
observations2[observations2>.15] = 1
observations2[observations2<=.85] = 0
A_mat, O_mat = baum_welch(2,2,observations2)
print (observations2)
print (A_mat)
print (O_mat)
A_mat, O_mat = baum_welch(2,2,np.hstack( (observations1, observations2) ) )
print (A_mat)
print (O_mat)

[1. 0. 1. 1. 0. 1. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1.
 1.]
[[0.5 0.5]
 [0.5 0.5]]
[[0.56 0.44]
 [0.56 0.44]]
[1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1.
 1.]
[[0.5 0.5]
 [0.5 0.5]]
[[0.24 0.76]
 [0.24 0.76]]
[[0.5 0.5]
 [0.5 0.5]]
[[0.4 0.6]
 [0.4 0.6]]


In [None]:
################################## simulated etc
# info
arms = 4
params = [(0.1, 0.1)]
m = 7
sessions = 10
trials = 150
tau = 0.1
alpha = 0.1
gamma = 0.5
animal = 'sim'
task = 'sarsa'
sim_qldf_list = []

# reward prob generator
def fxn_str(mean, arms):
    x = np.linspace(1, arms, arms)
    sig = 1.75/2
    amp = 0.7
    vo = 0.1
    gx = (amp*np.exp(-0.5*((x-mean)**2)/(sig**2)))+vo
    return gx

def fxn_unstr(mean, arms):
    x = np.linspace(1, arms, arms)
    sig = 1.75/2
    amp = 0.7
    vo = 0.1
    gx = (amp*np.exp(-0.5*((x-mean)**2)/(sig**2)))+vo
    gx = np.random.permutation(gx)
    return gx

def rewarding(prob, reward_val):
    temp = reward_val
    rand = np.random.uniform(0, 1)
    return temp if rand <= prob else 0


# for paramset in params:  
#     alpha, tau = paramset
    
#     for (animal, task), dataset in sessdf[mask].groupby(['animal','task']):
#         q = np.zeros(arms)
#         if 'unstr' in task:
            
# for sessnum in range(sessions):#dataset.groupby('session#'):

rp = fxn_unstr(np.random.choice(np.arange(1,5)), arms)

# initialize q function
states = [11, 12, 13, 14, 21, 22, 23, 24, 31, 32, 33, 34, 41, 42, 43, 44]
actions = [1, 2, 3, 4]
q = np.ones(shape = (len(states), len(actions)))*(1/len(actions))

# initialize state, use it to pick action 1 and 2
s = np.where(states == np.random.choice(states))[0][0]
a = int(str(states[s[0][0]])[0])
a_ = int(str(states[s[0][0]])[-1])

for trial in range(trials):#group.iterrows():

    # softmax prob of choosing actions
    invtemp=1/tau
    P = np.exp(invtemp*(q)) 
    P = P/ np.sum(P)

    # probability of each action on this trial
    p = P[s, a]

    # rewarded?
    r = rewarding(rp[a], 1)
    
    # get s'
    s_ =  
    
    # choose an action from available states
    a_ind = np.random.multinomial(1, P[s_])
    a = range(arms)[a_ind.nonzero()[0][0]]

    
    # compute q value - sarsa algorithm
    q[s, a] = (1 - alpha)*q[s, a] + alpha*(r + gamma*q[s, a])
    
    # store everything
    sim_qldf_list.append([animal, task, alpha, tau, sessnum, rp[a], a + 1, r, q[s, a]])
    
    # update s_, a_
    s_ = s
    a_ = a

#         else:
            
#             for sessnum, group in dataset.groupby('session#'):
# #                 q = np.zeros(arms)
#                 p = np.zeros(group.shape[0])
#                 rp = fxn_str(np.random.choice(np.arange(1,5)), arms)

#                 for ind, trial in group.iterrows():
#                     # softmax prob of choosing actions
#                     invtemp=1/tau
#                     P = np.exp(invtemp*(q))
#                     P = P/ np.sum(P)

#                     # which action on this trial
#                     a_ind = np.random.multinomial(1, P)
#                     a = range(arms)[a_ind.nonzero()[0][0]]

#                     # probability of each action on this trial
#                     p = P[a]

#                     # rewarded?
#                     r = rewarding(rp[a], 1)

#                     # compute q value
#                     q[a] = q[a] + alpha*(r - q[a])
                    
#                     # store everything
#                     sim_qldf_list.append([animal, task, alpha, tau, sessnum, rp[a], ind, a + 1, r, q[a]])
    
#         print('iter done,', paramset, task, animal)
sim_qldf = pd.DataFrame(sim_qldf_list, columns=['animal', 'task', 'alpha',
                                                'tau', 'session#', 'rewprob',
                                                'port', 'reward', 'qvalue'])