In [None]:
import numpy as np
import random

class MABModel(object):
    q = np.zeros(1) # bernoulli correlation probabilities
    p = np.zeros(1) # bernoulli arm probabilities
    curState = 0 # previous arm pulled
    
    def __init__(self, K):
        self.q = np.zeros(K)
        self.p =np.zeros([K,K])
        for i in range(K):
            self.q[i] = random.uniform(0, 1)
            for j in range(K):
                self.p[i,j] = random.uniform(0, 1)
        self.curState = random.randint(0,2)
    def getArm(self, state):
        temp = 0
        if(random.uniform(0,1) < self.q[state]):
            temp += 1
        if(random.uniform(0,1) < self.p[self.curState,state]):
            temp += 1
        self.curState = state
        return temp/2
    def getq(self):
        return self.q
    def getp(self):
        return self.p
x = MABModel(3)

##for i in range(10):
 #   for j in range(3):
        print(x.getArm(j))
#print(x.getp())
#print(x.getq())

In [15]:
import numpy as np
import copy


# p is the array of codependencies
# q is array of individual probabilities
# n is dimension of q
# start is the starting character of the sequence
# length is the number of arms to pull
# return a sequence of length number of arms that maximizes payoff
def bestSequence(n,p,q,start,length):
    seq = {} # dict mapping first arm to a tuple of best list starting with this arm and a total sum for that list
    # initialize the dictionary to all the possible final arms pulled
    for i in range(n):
        seq[i] = ([i],q[i])
        
    # loop through to generate arms of size length
    # only need to keep track of at most n sequences of pulls, one for each most recent arm pulled
    for i in range(length+1):
        tempseq = {} # temporary dictionary for the best lists of length one longer
        for ind in range(n):
            for recent in seq:
                l,val = seq[recent]
                # copy the list and insert this arm at front
                lind = copy.copy(l) 
                lind.insert(0,ind) 
                # insert new list into dictionary if better than previous best
                if ind not in tempseq:
                    tempseq[ind] = (lind,val+p[ind,recent]+q[ind])
                else:
                    l2,val2 = tempseq[ind]
                    if val + p[ind,recent] + q[ind] > val2:
                        tempseq[ind] = (lind,val+p[ind,recent]+q[ind])                    
        seq = copy.copy(tempseq)
    # extract best list and value for given start
    a,b = seq[start]
    return a[1:]


#### Testing
n = 10
p = np.random.rand(n,n)
q = np.random.rand(n)
print(q)
print(p)

print(bestSequence(n,p,q,3,1000))

[ 0.59694669  0.19688806  0.33564817  0.06901231  0.47142318  0.54615751
  0.36381843  0.27802346  0.45894593  0.72230667]
[[ 0.05438892  0.04095409  0.83446801  0.34440415  0.80086718  0.27595901
   0.26026603  0.28081894  0.24565316  0.24658011]
 [ 0.36339877  0.59259681  0.2666603   0.13321195  0.27118509  0.39615297
   0.98165093  0.57069038  0.90233633  0.04621909]
 [ 0.58321826  0.42049111  0.2236279   0.74032985  0.15209047  0.9185485
   0.26955491  0.02404623  0.56848995  0.27920103]
 [ 0.25379231  0.68365861  0.82190837  0.76797396  0.38474495  0.65678693
   0.22653555  0.15236813  0.76007924  0.54211259]
 [ 0.08334527  0.1536195   0.73075261  0.44027347  0.06442505  0.03151978
   0.14163496  0.50304269  0.16032351  0.54603802]
 [ 0.72443171  0.81033532  0.85802054  0.7293987   0.44850712  0.1428518
   0.70886892  0.06835604  0.01301718  0.03135295]
 [ 0.65576025  0.09432585  0.15983805  0.86137306  0.68158055  0.46036226
   0.08089415  0.38277727  0.64102905  0.85623637]
 [ 0