In [64]:
#simulating with the blocks paradigm

#pamop @nyu.edu
#nov 2018

# imports
from random import randint
import numpy as np
import random
import datetime # for limiting calculation to wall clock time
import math
import copy
import matplotlib.pyplot as plt
import pdb

#0 r, g, b
#1 r,gb
#2 r,bg
#3 g,rb
#4 g,br
#5 b,rg
#6 b,gr
#7 rgb
#8 rbg
#9 grb
#10 gbr
#11 brg
#12 bgr

In [65]:
#transitions
# in form of a transition function: 

class BlockGame():
    def __init__(self,target):
        self.Actions = ["Put x on y.", "Take x off y.", "Is x on y?", "Final test"]
        self.ActBrief = ["put","take","ask","test"]
        self.Observations = ['no','yes']
        self.Colors = ['red','green','blue']
        self.ColorsShort = ['r','g','b']
        self.StateNames = ['r,g,b','r,gb','r,bg','g,rb','g,br','b,rg','b,gr','rgb','rbg','grb','gbr','brg','bgr']
        # reward dynamics:
        self.ActionCost = -1
        self.CorrectRwd = 100
        self.IncorrectRwd = -80
        self.IllegalActionPenalty = 5
        
        self.target = target
#         if target in self.StateNames:
#             self.target = target
#         else:
#             raise Exception("Error initializing block game object: Target state must be a valid state.")
        
    # transitition function
    # state is 0 to 12
    def next_state(self, state, action):    
        # check action to end game
        if action == 3 or action[0]==2: # ask or check
            return state 
        
        # if you chose an "illegal" action, the state doesn't change (just a time penalty in rwd function)
        if action not in self.legal_actions_state(state):
            return state
        
        # actions should look like: [0,'r','g'] for Put red on green, [1,'b','r'] for Take blue off red, etc.
        assert action[1] in ['r','b','g'], 'Err in next_state function: invalid action color labels'
        assert action[2] in ['r','b','g'], 'Err in next_state function: invalid action color labels'
        
        # index of colors x and y in state name
        s = self.StateNames[state]
        x = s.index(action[1])
        y = s.index(action[2])
        
#         if action[0]==2: # ask: is x on y
#             return state
        if action[0]==1: # take x off y
            if len(s)==4: # e.g., 'a,bc'
                return 0 #'r,g,b' #state[:x] + ',' + state[y] # put a comma between x and y ## ACTUALLY this should be :1 and 2 really
            elif len(s)==3: # e.g., 'xyz' 
                # note that legal actions (above) should only pass through cases of taking first off of second here
                return self.StateNames.index(s[0] + ',' + s[1:])
            else:
                raise Exception('Can only legally take a block off another block if length of state string is 3 or 4')
        elif action[0]==0: # put x on y
            if len(s)==5:
                # gotta handle this more specially because there are all possible "put" options
                allcolors = set(['r','g','b'])
                allcolors.remove(action[1])
                allcolors.remove(action[2])
                solocolor = allcolors.pop() # so now we know which one is left out
                return self.StateNames.index(solocolor + ',' + action[1] + action[2])
            elif len(s)==4:
                # remove the comma (remember, only legal actions being handled here)
                return self.StateNames.index(s[0] + s[2:]) # NO MORE COMMA
            else:
                raise Exception('Can only put a block on another block if length of state string is 4 or 5')
        else:
            raise Exception('Err in next_state fxn: First value in action should be 0, 1, or 2.')
    
    
    # observation function
    def observation(self, state, action):
        s = self.StateNames[state]
        if action == 3:
            return []
        elif action[0] == 2: # ask is x on y
            if s.index(action[2]) - s.index(action[1]) == 1: # if they are x is just left of y,
                return 1
            else:
                return 0 # should these be true / false? 'yes' 'no' ? shouldn't really matter besides what's convenient
        elif action[0] == 1:
            return []
        elif action[0] == 0:
            return []
        
    def reward(self, state, action):
        done = False
        
#         punishment for choosing an illegal action 
        if action not in self.legal_actions_state(state):
            return self.IllegalActionPenalty*self.ActionCost, done
        
        if action == 3: # check
            done = True
#             if self.StateNames[state] == self.target or self.StateNames[state] in self.target: #if correct
            if state == self.target or state in self.target: # i want target to be an int (or list of ints) as well
                return self.CorrectRwd, done
            else:
                return self.IncorrectRwd, done
        elif action[0] == 2: # ask is x on y
            return self.ActionCost, done
        elif action[0] == 1: # take x off y
            return self.ActionCost, done
        elif action[0] == 0: # put x on y
            return self.ActionCost, done
        
    # i could do this state by state? or try to parse the state string by color and commas
    def legal_actions_state(self,state):
        
        put_acts = [[0,'r','g'],[0,'r','b'],[0,'g','r'],[0,'g','b'],[0,'b','r'],[0,'b','g']]
        take_acts = [[1,'r','g'],[1,'r','b'],[1,'g','r'],[1,'g','b'],[1,'b','r'],[1,'b','g']]
        ask_acts = [[2,'r','g'],[2,'r','b'],[2,'g','r'],[2,'g','b'],[2,'b','r'],[2,'b','g']]
        check = [3] # submit for checking if correct
        
        # or could state as number instead of string, i'm not sure yet...
        if state==0: #self.StateNames[0]: # 'r,g,b'
            return put_acts + ask_acts + check
        elif state==1: #self.StateNames[1]: # 'r,gb'
            return [put_acts[0]] + [take_acts[3]] + ask_acts + check
        elif state==2: #self.StateNames[2]: # 'r,bg'
            return [put_acts[1]] + [take_acts[5]] + ask_acts + check
        elif state==3: #self.StateNames[3]: # 'g,rb'
            return [put_acts[2]] + [take_acts[1]] + ask_acts + check
        elif state==4: #self.StateNames[4]: # 'g,br'
            return [put_acts[3]] + [take_acts[4]] + ask_acts + check
        elif state==5: #self.StateNames[5]: # 'b,rg'
            return [put_acts[4]] + [take_acts[0]] + ask_acts + check
        elif state==6: #self.StateNames[6]: # 'b,gr'
            return [put_acts[5]] + [take_acts[2]] + ask_acts + check
        elif state==7: #self.StateNames[7]: # 'rgb'
            return [take_acts[0]] + ask_acts + check
        elif state==8: #self.StateNames[8]: # 'rbg'
            return [take_acts[1]] + ask_acts + check
        elif state==9: #self.StateNames[9]: # 'grb'
            return [take_acts[2]] + ask_acts + check
        elif state==10: #self.StateNames[10]: # 'gbr'
            return [take_acts[3]] + ask_acts + check
        elif state==11: #self.StateNames[11]: # 'brg'
            return [take_acts[4]] + ask_acts + check
        elif state==12: #self.StateNames[12]: # 'bgr'
            return [take_acts[5]] + ask_acts + check
        else:
            raise Exception("in legal_actions_state method, can't have a state besides the 13 allowed. State name is ",state)
            
    # this is wrong but fine
    def legal_actions(self,history):
        put_acts = [[0,'r','g'],[0,'r','b'],[0,'g','r'],[0,'g','b'],[0,'b','r'],[0,'b','g']]
        take_acts = [[1,'r','g'],[1,'r','b'],[1,'g','r'],[1,'g','b'],[1,'b','r'],[1,'b','g']]
        ask_acts = [[2,'r','g'],[2,'r','b'],[2,'g','r'],[2,'g','b'],[2,'b','r'],[2,'b','g']]
        check = [3] # submit for checking if correct
        return put_acts + take_acts + ask_acts + check
            
    def G_model(self,state,action):
        s = self.next_state(state,action)
        obs = self.observation(state,action)
        rwd, done = self.reward(state,action) # note that this should be more like immediate reward of state, not long-term?
        return s, obs, rwd, done
    
    # prior might be different on a given trial! come up with ways to manipulate this
    def sample_prior(self):
        return np.random.randint(13)
    
#     # i need to know what action "real_obs" was said yes or no to
#     def keep_particle(self, part, action, real_obs):
#         if real_obs == []:
#             return True
#         if self.Observation(part,action) == real_obs: # e.g., if real_obs consistent with obs function on this particle,
#             return True # good particle
#         else:
#             return False # bad particle because doesn't match what we see
        
#     # generate a new particle from one randomly sampled from current belief (e.g., could add a lil noise if desired)
#     def new_particle(self, part):
#         return self.sample_prior() # idk nothing fancy here yet

    def print_state(self,state):
        print(self.StateNames[state])
        return
        
    

In [90]:
# Make an interactive game (so I can play it here in input/output)
def play_blocks_game(debug=0):
    play = int(input("Play game? 0 = no, 1 = yes\n"))

    if not play:
        return

# create some instructions: "To finish the game, choose the test action." 
    print("Welcome to the blocks game! Your goal is to teach a correct stack of blocks to a student.")
    print("However, there are some rules about how you can communicate with the student.")
    print("Let's play!")

# decide the original structure (self.StateNames[0:6]) and target structure (self.StateNames[7:12])
    state = random.choice(range(0,6))
    target = random.choice(range(7,12))
    
    game = BlockGame(target)
    r = 0 # initial reward at zero.
    
# Do you want it to be an omnipotent game or no? (E.g., you know the true state of the student blocks)
    if debug:
        print("Spoiler alert: The current state is", game.StateNames[state])
        
# Prior (?) 
    # maybe later...
    
    print("The target state is",game.StateNames[target])

    
    prompta = "Choose action:"+"\n[0] Put x on y"+"\n[1] Take x off y"+"\n[2] Is x on y?"+"\n[3] Final test (ends game)\n"
    
    promptx = "Choose x:"+"\n[r] Red"+"\n[g] Green"+"\n[b] Blue\n"
    
    prompty = "Choose y:"+"\n[r] Red"+"\n[g] Green"+"\n[b] Blue\n"
    
#LOOP
    while True:
    # Take action
        # action type: put, take, ask, test
        
        act_type = int(input(prompta))
        
        if act_type == 3:
            break
        else:
            # blocks to replace "x" and "y" in action: red, blue, green 
            x = str(input(promptx))
            y = str(input(prompty))

            # total action
            a = [act_type,x,y]

        # Check if action is legal

            # firstly, x and y must be different. 
            # also, "legal actions" method should return that it is allowed. (this will help me make sure legalactions is good)

        # if action is illegal. two possible options:

            illegal = a not in game.legal_actions_state(state)

            # option 1 
            if debug: # tell user that action is illegal and allow them to choose an action again (to help me debug legal_actions)
                while illegal:
                    print("You chose an illegal action, please try again.")
                    # action type: put, take, ask, test
                    act_type = int(input(prompta))
                    if act_type == 3:
                        break
                    else:
                        # blocks to replace "x" and "y" in action: red, blue, green 
                        x = str(input(promptx))
                        y = str(input(prompty))

                        # total action
                        a = [act_type,x,y]
                        illegal = a not in game.legal_actions_state(state)
                        continue
                    break

            # option 2
            # OR, give no information and just don't change the state (as in real exp)
            if illegal: # (This condition can only be met in the non-debug case)
                next_state = state
                obs = []
            else: # action has effect on state OR returns observation
                next_state = game.next_state(state, a)
                obs = game.observation(state, a)
        # action has effect on state OR returns observation
            if obs == []:
                print("The student remained silent.")
            elif obs == 0:
                print("The student said 'No.'")
            elif obs == 1:
                print("The student said 'Yes.'")
        
        # reward accumulates
            rwd, done = game.reward(state, a)
            r = r+rwd
            
            if debug:
                print("Moving to next state:",game.StateNames[next_state])
                print("You gained",rwd,"on this round, for a total score of",r,"so far.")
        

        # repeat until test action is taken
            state = next_state
            # (Break in action selection above)


# outside of loop: after test action, check if final orientation is correct
# check how many timesteps (actions) it took, to give user their final score
    if state == target:
        print("Congratulations! You achieved the goal target state of",game.StateNames[target])
        print("Your total reward was:",r)
    else:
        print("Game over! You lose. You did not achieve the goal target state of",game.StateNames[target])
        print("Instead, your final state was",game.StateNames[state])

In [91]:
#next, be doing adding reward function (e.g., time cost). but let's see if it works! 
play_blocks_game(1)

Play game? 0 = no, 1 = yes
1
Welcome to the blocks game! Your goal is to teach a correct stack of blocks to a student.
However, there are some rules about how you can communicate with the student.
Let's play!
Spoiler alert: The current state is g,br
The target state is brg
Choose action:
[0] Put x on y
[1] Take x off y
[2] Is x on y?
[3] Final test (ends game)
1
Choose x:
[r] Red
[g] Green
[b] Blue
b
Choose y:
[r] Red
[g] Green
[b] Blue
r
The student remained silent.
Moving to next state: r,g,b
You gained -1  on this round, for a total score of -1  so far.
Choose action:
[0] Put x on y
[1] Take x off y
[2] Is x on y?
[3] Final test (ends game)
0
Choose x:
[r] Red
[g] Green
[b] Blue
r
Choose y:
[r] Red
[g] Green
[b] Blue
g
The student remained silent.
Moving to next state: b,rg
You gained -1  on this round, for a total score of -2  so far.
Choose action:
[0] Put x on y
[1] Take x off y
[2] Is x on y?
[3] Final test (ends game)
0
Choose x:
[r] Red
[g] Green
[b] Blue
g
Choose y:
[r] Red
[