### Tamagotchi MDP
pam o.p. 2018
Not a real tamagotchi, but a simple version of one

In [74]:
# imports
from random import randint
import numpy as np
import random
import datetime # for limiting calculation to wall clock time
from math import log, sqrt

In [75]:
# tamagotchi class adapted from https://github.com/bitterfly/tamagotchi/blob/master/tamagotchi/core/tamagotchi.py

class Tamagotchi:
    def __init__(self):
        self.stats = {"food": 100, "happiness": 100, "hygiene": 100,
                 "health": 100, "energy": 100}
        self.is_sleeping = False
        self.is_dead = False
        self.is_playing = False
        self.is_sick = False
        self.number_of_poo = 0
        self.money = 0

    #Makes sure the statistic isn't below 0 or above 100
    def constrain(self, value):
        value = min(100, value)
        value = max(0, value)
        return value

    #Constrains all the stats
    def constrain_stats(self):
        for statistic, value in self.stats.items():
            self.stats[statistic] = self.constrain(value)

    #Takes a dictionary with statistics and adds each value
    #to the tamagotchi statistics
    def apply(self, item):
#         for statistic, value in self.stats.items():
#             self.stats[statistic] += item[statistic]
        self.stats[item['stats']] += item['effect']
        self.constrain_stats()

    #Takes a statistic and decreases it to zero in "full hours" time
    def decrease_to_minimum(self, statistic, full_hours, time_given):
        self.stats[statistic] -= (time_given * 10) / (full_hours * 36)

    #Takes a statistic and increases it to max in "full hours" time
    def increase_to_maximum(self, statistic, full_hours, time_given):
        self.stats[statistic] += (time_given * 10) / (full_hours * 36)

    #Generates random sickness and poo
    def random_event(self):
        if (not self.is_playing and not self.is_sleeping):
            random_number = randint(0, 600)
            if random_number == 0:
                self.is_sick = True
            if random_number == 1:
                self.number_of_poo = min (self.number_of_poo + 1, 4)

    #Used in mainwindow - removes sickness
    def cure(self):
        self.is_sick = False

    #The function witch decreases all the stats every second
    #or is called when tamagotchi is sleeping
    def second_pass(self, seconds=1):
        "Докато спиш всички статове падат за 8 часа, освен сънят, който се възстановява"
        "As long as you sleep, all the stats fall for 8 hours, except for the sleep that is recovering"
        if self.is_sleeping:
            self.increase_to_maximum("energy", 8, seconds)
            self.decrease_to_minimum("happiness", 12, seconds)
            self.decrease_to_minimum("hygiene", (12 - self.number_of_poo), seconds)
            self.decrease_to_minimum("food", 12, seconds)
            
            if self.stats["energy"] > 70:
                self.is_sleeping = False
        else:
            "Докато играеш, повечето статистики падат по-бързо."
            "As you play, most stats fall faster"
            if self.is_playing:
                self.decrease_to_minimum("energy", 3, seconds)
                self.decrease_to_minimum("hygiene", 3, seconds)
                self.decrease_to_minimum("food", 4, seconds)
                self.increase_to_maximum("happiness", 1, seconds)
                self.money += seconds # pam added
            else:
                self.decrease_to_minimum("energy", 4, seconds)
                self.decrease_to_minimum("hygiene", 4 / (self.number_of_poo + 1), seconds)
                self.decrease_to_minimum("food", 4, seconds)
                self.decrease_to_minimum("happiness", 4, seconds)

        if (self.stats["happiness"] <= 50 or
               self.stats["hygiene"] <= 50):
                self.is_sick = True
        # add that poo around acts like sickness
        if self.is_sick or (self.number_of_poo >=1):
            self.decrease_to_minimum("health", 3, seconds)

        self.constrain_stats()

        self.random_event()

        if (self.stats["food"] == 0 or self.stats["health"] == 0):
            self.is_dead = True
        
        #add (?) that if energy is <=20 then goes to sleep
        if self.stats["energy"] <=20:
            self.is_sleeping = True
#         elif self.stats["energy"]

In [87]:
# game class
# gaining money to buy items in store can be simple time cost/money reward exchange
# agent decides when to gain money, and then which items to buy/apply to tamagotchi
class Tamagotchi_Game():
    def __init__(self, tamagotchi):
        self.tamagotchi = tamagotchi
        self.time = 0 #seconds
        self.time_passing = False
        self.in_store = False
    
    # i need to wrap all the tamagotchi fxns into the structure of the game.
    # e.g. the items that can be bought at the store, and how they cause .apply() or .cure() to the tama
    # depending on the item
    
    # start game 
    def start(self):
        self.time_passing = True
        return self.tamagotchi
        
    # currently, this is sorta useless; don't need the pomdp to be able to pause the game
    # but would be useful for a human playing the game to take breaks
    def pause(self):
        self.time_passing = False
        return self.tamagotchi
        
    #The function is called when an item is chosen and
    #applies its statistics
    def buy_item(self, item):
        self.tamagotchi.apply(item)
        self.tamagotchi.money -= item["price"]
        return self.tamagotchi
        
    # given current tamagotchi "state" and action "action", what would the next tama state be?
    def next_state(self, state, action):
        
        # if time is passing
        if self.time_passing:
            # time passes - currently same amount for each action but could change this 
#             self.tamagotchi.second_pass() # does this need to be state.second_pass()
            self.time +=1
            
            if self.in_store:
                self.tamagotchi.is_playing = False
                self.tamagotchi.second_pass() # does this need to be state.second_pass()
                
                if action == 'nothing':
                    self.in_store = False
                    return state
                elif action == 'coffee':
                    return self.buy_item({"stats":"energy","price":30,"effect":5})
                elif action == 'snack':
                    return self.buy_item({"stats":"food","price":30,"effect":5})
                elif action == 'clean':
                    self.tamagotchi.number_of_poo -= 1
                    return self.buy_item({"stats":"hygiene","price":50,"effect":5})
                elif action == 'medicine':
                    self.tamagotchi.cure()
                    return self.buy_item({"stats":"health","price":80,"effect":10})
                else:
                    raise Exception('Invalid action while in store')
            
            if action == 'play':
                self.tamagotchi.is_playing = True
                self.tamagotchi.second_pass() # does this need to be state.second_pass()
                # if the tama is sleeping, playing won't earn any money - tama needs to be awake
                return self.tamagotchi
            
            # "walk" to the store (takes one turn)
            if action == 'store':
                self.tamagotchi.is_playing = False
                self.in_store = True
                self.tamagotchi.second_pass()
                return self.tamagotchi
            
            if action == 'nothing':
                self.tamagotchi.is_playing = False
                self.tamagotchi.second_pass()
                return self.tamagotchi
                
        else: # if paused
            return state
            
                
        
    # Take a sequence of game states representing the full game history, and return the full list
    # of actions that are legal actions
    def legal_actions(self, state_history): 
        # this method is important for MDPs/games where the actions might change based on the game state
        # for instance, in checkers, if player put a piece on square A4, no other piece can move to A4
        
        # Should the MCTS store as state history the tamagotchi at each time point? so saving n tamagotchi
        # objects for n timepoints... so then legal_actions could just take recent state and might depend
        # on whether the tamagotchi is alive or dead, asleep or not, sick or not, etc... 
        # but i don't think this is a problem that depends on prev states (just need to call self.tamagotchi)'
        tama = state_history[-1]
        
        if self.time_passing and not tama.is_dead:
            if self.in_store:
                if tama.money >= 3:
                    return['coffee','snack','nothing']
                elif tama.money >= 5:
                    return['coffee','snack','clean','nothing']
                elif tama.money >= 8:
                    return['coffee','snack','clean','medicine','nothing']
                else: # too poor to shop :(
                    return['nothing']
            else: 
                return ['play','store','nothing']
        else:
            return ['nothing']
    
    # Should this be the length of state_history (how long tama alive for)? or is it trial by trial rwd?
    def reward(self, state_history):
        done = False
        if self.tamagotchi.is_dead:
            reward = 0 # you killed the tama :'(
            done = True
        else:
            # reward is how long you kept the tama alive
            reward = len(state_history)
        return reward, done
    
    def print_tama(self):
        print("Time:",self.time," seconds elapsed")
        if self.tamagotchi.is_dead:
            print("Tama is dead!")
    
        print("TAMA STATS: ", self.tamagotchi.stats)
        if self.tamagotchi.is_sleeping:
            print("Tama is asleep.")
        else:
            print("Tama is awake.")
            if self.tamagotchi.is_playing:
                print("Tama is playing.")
            if self.tamagotchi.is_sick:
                print("Tama is sick.")
        print("Number of poo:",self.tamagotchi.number_of_poo)
        print("Money:",self.tamagotchi.money)
        if self.in_store:
            print("You're in the store, so you can buy an item if you want.")
        else:
            print("You're not in the store, so you can play, do nothing, or travel to the store.")
                
        print("\n")
        
    

In [88]:
tama = Tamagotchi()
game = Tamagotchi_Game(tama)
s = game.start()
game.print_tama()

s1 = game.next_state(s,'play')
s2 = game.next_state(s1,'play')
s3 = game.next_state(s2,'play')
s4 = game.next_state(s3,'play')
s5 = game.next_state(s4,'store')
game.print_tama()

game.reward([s1,s2,s3,s4,s5])

Time: 0  seconds elapsed
TAMA STATS:  {'health': 100, 'energy': 100, 'happiness': 100, 'hygiene': 100, 'food': 100}
Tama is awake.
Number of poo: 0
Money: 0
You're not in the store, so you can play, do nothing, or travel to the store.


Time: 5  seconds elapsed
TAMA STATS:  {'health': 100, 'energy': 99.56018518518518, 'happiness': 99.93055555555556, 'hygiene': 99.56018518518518, 'food': 99.65277777777779}
Tama is awake.
Number of poo: 0
Money: 4
You're in the store, so you can buy an item if you want.




(5, False)

In [89]:
class MCTS(object):
    
    def __init__(self, tamagotchi_game, **kwargs):
        self.game = tamagotchi_game
        self.states = []
        seconds = kwargs.get('time',30)
        self.calculation_time = datetime.timedelta(seconds=seconds)
        self.max_moves = kwargs.get('max_moves')
        self.C = kwargs.get('C', 1.4)
        self.rewards = {}
        self.plays = {}
        
    # Take a game state and append it to the history
    def update(self,state):
        self.states.append(state)
        
    # AI calculates best move and returns it
    def get_play(self):
        self.max_depth = 0
        state = self.states[-1] # last state
        legal = self.game.legal_actions(self.states[:])
        
        if not legal:
            return
        if len(legal)==1:
            return legal[0]
        
        games = 0 # counter for number of games simulated (e.g., number of times "run_sim" is called)
        
        # begin the tree! keep track of time so do not exceed computation time limit
        begin = datetime.datetime.utcnow()
        while (datetime.datetime.utcnow() - begin) < self.calculation_time:
            self.run_simulation()
            games+=1
        # may need for debugging:
        # self.print_tree()
        
        # makes [(state, action1),...,(state, action_n)] for n legal actions
        moves_states = [(state, a) for a in legal]
        
        # display number of calls of 'run_simulation' and the time elapsed
        print('Num sims run:',games, '  Time elapsed:', datetime.datetime.utcnow() - begin)
        
        # pick the move with the highest average reward
        percent_wins, move = max((self.rewards.get((s,a), 0) / self.plays.get((s,a), 1), a)
                                for s, a in moves_states)
        
        # display the stats for each possible play
        # or maybe not if there are lotta actions
        # **** also this might be wrong, reconsider this code for the tamagotchi
        for x in sorted(
            ((100 * self.rewards.get((s,a), 0) / self.plays.get((s,a), 1),
             self.rewards.get((s,a), 0), self.plays.get((s,a), 0), 
             a)
             for s,a in moves_states),
            reverse=True
        ):
            print("{3}: {0:.2f}% ({1} / {2})".format(*x))
        
        # print the max depth tree search
        print("Maximum depth search:", self.max_depth)
        
        # return the move chosen
        return move
        
        
    # this don't do anything yet
    def print_tree(self):
        board = self.game.tamagotchi_game
        pass
    
    # play out a "random" game from the current position, then update stats with result
    def run_simulation(self):
        plays, rewards = self.plays, self.rewards
        
        visited_qs = set()
        states_copy = self.states[:]
        state = states_copy[-1]
        
        expand = True # you only expand once #YOEO
        for t in range(self.max_moves):
            legal = self.game.legal_actions(states_copy)  # get valid actions
            
            moves = [(state, a) for a in legal]
            
            if all(plays.get((s,a)) for s,a in moves):
                # if we have statistics on all legal moves, use them.
                # upper confidence bound (UCB) algorithm
                
                ## *** might need to change the structure of plays, not sure if this is
                ## doing the right thing re: mean payouts
                log_total = log(sum(plays[(s,a)] for s,a in moves))
                
                value, action = max(((rewards[(s,a)] / plays[(s,a)]) + 
                                 self.C * sqrt(log_total / plays[(s,a)]), a)
                                for s,a in moves)
            else:
                # if we don't have stats on all legal moves, randomly pick one
                action = random.choice(legal) 
                
            # if we are in the expand phase and this is a new state-action pair
            if expand and (state, action) not in self.plays: 
                expand = False # you only expand once so this is it
                self.plays[(state, action)] = 0 # initialize
                self.rewards[(state, action)] = 0
                if t > self.max_depth:
                    self.max_depth = t
                    
            visited_qs.add((state,action)) # add this state as visited
            
            state = self.game.next_state(state,action) # get next state
            states_copy.append(state) # record
            
            reward, done = self.game.reward(states_copy) # compute reward if any
            #print(states_copy, reward, done)
            if done: 
                break
        
        # print(visited_states, reward)
        for q in visited_qs: # for each visited state
            if q not in self.plays: # if we don't have stats on this state yet
                continue
            self.plays[q]+=1 # increase plays
            self.rewards[q]+=reward # add up the reward you got
        
        

In [90]:
# Initialize the tamagotchi
tama = Tamagotchi()
game = Tamagotchi_Game(tama)
state = game.start()
print("INITIAL STATE: ")
game.print_tama() # initial tamagotchi state

tree = MCTS(game, time=1., C=1.4, max_moves = 100)

action_seq = []

while game.time < 100: # play for a certain amount of time (better rule?)
    tree.update(state)
    action = tree.get_play()
    action_seq.append(action)
    state = game.next_state(state,action)
    print("Taking action %s. Tama update:"% (action))
    game.print_tama()
    if game.time % 10 == 0:
        game.print_tama()


INITIAL STATE: 
Time: 0  seconds elapsed
TAMA STATS:  {'health': 100, 'energy': 100, 'happiness': 100, 'hygiene': 100, 'food': 100}
Tama is awake.
Number of poo: 0
Money: 0
You're not in the store, so you can play, do nothing, or travel to the store.


Num sims run: 31431   Time elapsed: 0:00:01.000020
play: 10100.00% (909 / 9)
store: 9090.00% (909 / 10)
nothing: 4.18% (1313 / 31427)
Maximum depth search: 3
Taking action play. Tama update:
Time: 33167  seconds elapsed
Tama is dead!
TAMA STATS:  {'health': 0, 'energy': 68.66898148149909, 'happiness': 0, 'hygiene': 0, 'food': 0}
Tama is asleep.
Number of poo: 4
Money: -11
You're not in the store, so you can play, do nothing, or travel to the store.


