In [40]:
import gym
import numpy as np
#from gym import spaces
#from gym.utils import seeding


class NChainEnv(gym.Env):

    def __init__(self):
        self.state = np.zeros((7,3))
        self.top = [1000,6,6,6,6,6,6] #Arb top for Col 0, High so it doesn't Accidentally Trigger Col Done Count
        self.dice = (np.random.randint(1, 7),np.random.randint(1, 7),np.random.randint(1, 7))
        self.state[0] = self.dice
        self.blackDots = 2
        self.turn = 0 #Total Turns, or how many times opponent had a chance to counter attack
        self.round = 0 #Rounds within a current turn, used to track reward
        self.reward = 0
        self.bust = False
        self.done = False
        
    def step(self, action):
        if self.done:
            return self.state, self.dice, self.blackDots, self.turn, 0, self.done
        else:
            #assert self.action_space.contains(action)
            assert action in [0,1,2,3,4,5]
            # 0 is dice 1 and stay
            # 1 is dice 1 and go
            # 2 is dice 2 and stay
            # 3 is dice 2 and go
            # 4 is dice 3 and stay
            # 5 is dice 3 and go
            dicePick = self.dice[action//2] #Numerical Value of Dice Picked
            stay = (action%2 == 0) #Stay or push value

            #Code for checking if move is legal because top has been reached previously
            #in either Black or Colors
            if any (X >= self.top[dicePick] for X in env.state[dicePick]):
                self.bust=True

            #Code for checking if move is illegal due to out of black dots and attempting to place another
            if env.state[dicePick][2]==0 and self.blackDots<=0:
                self.bust=True

            #Bust, Reset all Black Dots to 0 and increase Turn. Reset and set what is needed for next round
            if self.bust:
                for i in range(len(env.state)):
                    env.state[i,2]=0
                self.turn+=1
                self.blackDots=2
                self.bust=False
                self.reward=(-1)*self.round
                self.round=0
                self.dice = (np.random.randint(1, 7),np.random.randint(1, 7),np.random.randint(1, 7))
                self.state[0] = self.dice

            #Increase picked value. Decrease BlackDot count if needed
            else:
                #If new blackdot Placement, initialize blackdot
                if self.state[dicePick][2]==0:
                    self.state[dicePick][2]=self.state[dicePick][0]
                    self.blackDots-=1            
                self.state[dicePick][2]+=1
                self.round+=1
                self.reward=0
                self.dice = (np.random.randint(1, 7),np.random.randint(1, 7),np.random.randint(1, 7))
                self.state[0] = self.dice

                #If stay also chosen, increase turn count, lock in and reset blackDots, reset BD count
                if stay:
                    for i in range(1,7):
                        if env.state[i][2]>0:
                            env.state[i][0]=env.state[i][2] 
                            env.state[i][2]=0
                    self.turn+=1
                    self.blackDots=2
                    self.bust=False
                    self.reward=self.round
                    self.round=0
                    
                    countDone=0
                    for i in range(1,7):
                        if env.state[i][0]>=self.top[i]:
                            countDone+=1
                    if (countDone>=3):
                        self.done = True
                        self.reward = 100/self.turn #Arb Reward for completing Game
                

            return self.state, self.dice, self.blackDots, self.turn, self.reward, self.done
        
    def softreset(self):
        self.turn+=1
        self.blackDots=2
        self.bust=False
        self.round=0

    def reset(self):
        self.state = np.zeros((7,3))
        self.top = [1000,6,6,6,6,6,6]
        self.dice = (np.random.randint(1, 7),np.random.randint(1, 7),np.random.randint(1, 7))
        self.blackDots = 2
        self.turn = 0
        self.round = 0
        self.reward = 0
        self.bust = False
        self.done = False
        return self.state, self.dice, self.blackDots, self.turn, self.reward, self.done

In [41]:
env=NChainEnv()

In [42]:
env.state, env.dice, env.blackDots, env.turn

(array([[5., 6., 1.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]),
 (5, 6, 1),
 2,
 0)

In [None]:
# 0 is dice 1 and stay
# 1 is dice 1 and go
# 2 is dice 2 and stay
# 3 is dice 2 and go
# 4 is dice 3 and stay
# 5 is dice 3 and go

In [44]:
env.step(9)

AssertionError: 

range(1, 7)

In [39]:
env=NChainEnv()
done=False
steps=0
while (not done) and steps<5000:
    _,_,_,_,_,done= env.step(np.random.randint(0, 6))
    steps+=1
    
print(env.turn, env.state)

28 [[6. 6. 6.]
 [5. 0. 0.]
 [5. 0. 0.]
 [6. 0. 0.]
 [6. 0. 0.]
 [1. 0. 0.]
 [6. 0. 0.]]


In [211]:
env.step(1)

(array([[0., 0., 0.],
        [4., 0., 0.],
        [6., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [6., 0., 0.],
        [3., 0., 0.]]),
 (2, 2, 2),
 2,
 5000,
 0,
 False)

In [171]:
env.state, env.done

(array([[0., 0., 0.],
        [6., 0., 0.],
        [5., 0., 0.],
        [6., 0., 0.],
        [1., 0., 0.],
        [5., 0., 0.],
        [6., 0., 0.]]),
 True)

In [56]:
]

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 0.]])

In [37]:
env.state[:,2]

array([0., 3., 0., 0., 1., 0., 0.])