### Tamagotchi MDP
pam o.p. 2018
Not a real tamagotchi, but a simple version of one

In [1]:
# imports
from random import randint
import numpy as np
import random
import datetime # for limiting calculation to wall clock time
from math import log, sqrt
import copy

In [2]:
# tamagotchi class adapted from https://github.com/bitterfly/tamagotchi/blob/master/tamagotchi/core/tamagotchi.py

class Tamagotchi:
    def __init__(self):
        self.stats = {"food": 100, "happiness": 100, "hygiene": 100,
                 "health": 100, "energy": 100}
        self.is_sleeping = False
        self.is_dead = False
        self.is_playing = False
        self.is_sick = False
        self.number_of_poo = 0
        self.money = 0
        self.time = 0
        self.in_store = False

    #Makes sure the statistic isn't below 0 or above 100
    def constrain(self, value):
        value = min(100, value)
        value = max(0, value)
        return value

    #Constrains all the stats
    def constrain_stats(self):
        for statistic, value in self.stats.items():
            self.stats[statistic] = self.constrain(value)

    #Takes a dictionary with statistics and adds each value
    #to the tamagotchi statistics
    def apply(self, item):
#         for statistic, value in self.stats.items():
#             self.stats[statistic] += item[statistic]
        self.stats[item['stats']] += item['effect']
        self.constrain_stats()

#     #Takes a statistic and decreases it to zero in "full hours" time
    def decrease_to_minimum(self, statistic, full_hours, time_given):
        self.stats[statistic] -= np.ceil( (time_given * 10) / (full_hours * 36) ) 
#     def decrease_to_minimum(self, statistic, full_hours, time_given):
#         self.stats[statistic] -= int(full_hours/2)

#     #Takes a statistic and increases it to max in "full hours" time
    def increase_to_maximum(self, statistic, full_hours, time_given):
        self.stats[statistic] += np.ceil( (time_given * 10) / (full_hours * 36) ) 
#     def increase_to_maximum(self, statistic, full_hours, time_given):
#         self.stats[statistic] += int(full_hours)

    #Generates random sickness and poo
    def random_event(self):
        if (not self.is_playing and not self.is_sleeping):
            random_number = randint(0, 600)
            if random_number == 0:
                self.is_sick = True
            if random_number == 1:
                self.number_of_poo = min (self.number_of_poo + 1, 4)

    #Used in mainwindow - removes sickness
    def cure(self):
        self.is_sick = False

    #The function witch decreases all the stats every second
    #or is called when tamagotchi is sleeping
    def second_pass(self, seconds=1):
        "Докато спиш всички статове падат за 8 часа, освен сънят, който се възстановява"
        "As long as you sleep, all the stats fall for 8 hours, except for the sleep that is recovering"
        self.time += 1 
        
        if self.is_sleeping:
            self.increase_to_maximum("energy", 8, seconds)
            self.decrease_to_minimum("happiness", 12, seconds)
            self.decrease_to_minimum("hygiene", (12 + 2*self.number_of_poo), seconds)
            self.decrease_to_minimum("food", 12, seconds)
            
            if self.stats["energy"] > 70:
                self.is_sleeping = False
        else:
            "Докато играеш, повечето статистики падат по-бързо."
            "As you play, most stats fall faster"
            if self.is_playing:
                self.decrease_to_minimum("energy", 3, seconds)
                self.decrease_to_minimum("hygiene", 3, seconds)
                self.decrease_to_minimum("food", 4, seconds)
                self.increase_to_maximum("happiness", 1, seconds)
                self.money += seconds # pam added
            else:
                self.decrease_to_minimum("energy", 4, seconds)
                self.decrease_to_minimum("hygiene", 4 / (self.number_of_poo + 1), seconds)
                self.decrease_to_minimum("food", 4, seconds)
                self.decrease_to_minimum("happiness", 4, seconds)

        if (self.stats["happiness"] <= 50 or
               self.stats["hygiene"] <= 50):
                self.is_sick = True
                
        if self.is_sick:
            self.decrease_to_minimum("health", 3, seconds)

        self.constrain_stats()

        self.random_event()

        if (self.stats["food"] == 0 or self.stats["health"] == 0):
            self.is_dead = True
        
        #add (?) that if energy is <=20 then goes to sleep
        if self.stats["energy"] <=20:
            self.is_sleeping = True

    #The function is called when an item is chosen and
    #applies its statistics
    def buy_item(self, item):
        self.apply(item)
        self.money -= item["price"]
        return self
    
    # so that tuple(tamagotchi) can be called, make tamagotchi iterable
    def __iter__(self):
        traits = [tuple(self.stats.items()),
                 self.is_sleeping,
                 self.is_dead,
                 self.is_playing,
                 self.is_sick,
                 self.number_of_poo,
                 self.money,
                 self.time,
                 self.in_store]
        for i in range(len(traits)):
            yield traits[i]
                
    
    def print_tama(self):
        print("Time:",self.time," seconds elapsed")
        if self.is_dead:
            print("Tama is dead!")
    
        print("TAMA STATS: ", self.stats)
        if self.is_sleeping:
            print("Tama is asleep.")
        else:
            print("Tama is awake.")
            if self.is_playing:
                print("Tama is playing.")
            if self.is_sick:
                print("Tama is sick.")
        print("Number of poo:",self.number_of_poo)
        print("Money:",self.money)
        if self.in_store:
            print("You're in the store, so you can buy an item if you want.")
        else:
            print("You're not in the store, so you can play, do nothing, or travel to the store.")
                
        print("\n")
        
    

In [3]:
# # game class
# # gaining money to buy items in store can be simple time cost/money reward exchange
# # agent decides when to gain money, and then which items to buy/apply to tamagotchi
class Tamagotchi_Game():
    def __init__(self):
#         self.tamagotchi = tamagotchi
        self.time_passing = True
    
    # start game 
#     def start(self,state):
#         self.time_passing = True
#         return state
        
    # not used, but would be useful for a human playing the game to take breaks
#     def pause(self):
#         self.time_passing = False
#         return self.tamagotchi
        
    # given a tuple tamagotchi "state" and action "action", what would the next tama state be?
    def next_state(self, tupstate, action):
        state = self.unpack(tupstate)
        
        if state.in_store:
            state.is_playing = False
            state.second_pass() # does this need to be state.second_pass()

            if action == 'nothing':
                state.in_store = False
                return tuple(state)
            elif action == 'coffee':
                return tuple(state.buy_item({"stats":"energy","price":3,"effect":5}))
            elif action == 'snack':
                return tuple(state.buy_item({"stats":"food","price":3,"effect":5}))
            elif action == 'clean':
                state.number_of_poo =0
                return tuple(state.buy_item({"stats":"hygiene","price":5,"effect":5}))
            elif action == 'medicine':
                state.cure()
                return tuple(state.buy_item({"stats":"health","price":8,"effect":10}))
            else:
                raise Exception('Invalid action while in store')

        if action == 'play':
            state.is_playing = True
            state.second_pass()
            # if the tama is sleeping, playing won't earn any money - tama needs to be awake
            return tuple(state)

        # "walk" to the store (takes one turn)
        if action == 'store':
            state.is_playing = False
            state.in_store = True
            state.second_pass()
            return tuple(state)

        if action == 'nothing':
            state.is_playing = False
            state.second_pass()
            return tuple(state)
       

        
    # Take a sequence of game states representing the full game history, and return the full list
    # of actions that are legal actions
    def legal_actions(self, state_history): 
        # this method is important for MDPs/games where the actions might change based on the game state
        # for instance, in checkers, if player put a piece on square A4, no other piece can move to A4
        
        # get object version of most recent tamagotchi state
        tama = self.unpack(state_history[-1])

        if not tama.is_dead:
            if tama.in_store:
                if tama.money >= 8:
                    return ['coffee','snack','clean','medicine','nothing']
                elif tama.money >= 5:
                    return ['coffee','snack','clean','nothing']
                elif tama.money >= 3:
                    return ['coffee','snack','nothing']
                else: # too poor to shop :(
                    return ['nothing']
            else: 
                return ['play','store','nothing']
        else:
            return []
    
    # Should this be the length of state_history (how long tama alive for)? or is it trial by trial rwd?
    def reward(self, state_history):
        tupletama = state_history[-1]
        
        done = False
        if tupletama[2]: # tama dead
            reward = tupletama[7] # you killed the tama :'( enjoy your MONEY
            done = True
        else:
            # reward is how long you kept the tama alive
            reward = tupletama[7] 

        return reward, done
    
    def unpack(self,tupletama):
        tama = Tamagotchi()
        tama.stats = dict(tupletama[0])
        tama.is_sleeping = tupletama[1]
        tama.is_dead = tupletama[2]
        tama.is_playing = tupletama[3]
        tama.is_sick = tupletama[4]
        tama.number_of_poo = tupletama[5]
        tama.money = tupletama[6]
        tama.time = tupletama[7]
        tama.in_store = tupletama[8]
        return tama

    

In [4]:
# Double check that the tama works - if we manually take actions, do we end up where expected? 
# Then we will create the MCTS solver so that a robot can choose actions instead of us.

tama = Tamagotchi()
tama.print_tama()
game = Tamagotchi_Game()

s = game.next_state(tuple(tama),'play')
s = game.next_state(s,'play')
s = game.next_state(s,'play')
s = game.next_state(s,'play')
s = game.next_state(s,'store')
tama = game.unpack(s)
tama.print_tama()

game.reward([s])

Time: 0  seconds elapsed
TAMA STATS:  {'happiness': 100, 'health': 100, 'food': 100, 'energy': 100, 'hygiene': 100}
Tama is awake.
Number of poo: 0
Money: 0
You're not in the store, so you can play, do nothing, or travel to the store.


Time: 5  seconds elapsed
TAMA STATS:  {'happiness': 99.0, 'health': 100, 'food': 95.0, 'energy': 95.0, 'hygiene': 95.0}
Tama is awake.
Number of poo: 0
Money: 4
You're in the store, so you can buy an item if you want.




(5, False)

In [5]:
class MCTS(object):
    
    def __init__(self, tamagotchi, tamagotchi_game, **kwargs):
        self.tamagotchi = tamagotchi
        self.game = tamagotchi_game
        self.states = []
        seconds = kwargs.get('time',30)
        self.calculation_time = datetime.timedelta(seconds=seconds)
        self.max_moves = kwargs.get('max_moves')
        self.C = kwargs.get('C', 1.4)
        self.rewards = {}
        self.plays = {}
        
    # Take a game state and append it to the history
    def update(self,state):
        self.states.append(state)
        
    # AI calculates best move and returns it
    def get_play(self):
        self.max_depth = 0
        state = self.states[-1]
        legal = self.game.legal_actions(self.states[:])
        
        if legal == []:
            return
        if len(legal)==1:
            return legal[0]
        
        games = 0 # counter for number of games simulated (e.g., number of times "run_sim" is called)
        
        # begin the tree! keep track of time so do not exceed computation time limit
        begin = datetime.datetime.utcnow()
        while (datetime.datetime.utcnow() - begin) < self.calculation_time:
            self.run_simulation()
            games+=1
        # may need for debugging:
        # self.print_tree()
        
        # makes [(state, action1),...,(state, action_n)] for n legal actions
        moves_states = [(p, self.game.next_state(state,p)) for p in legal]
        
        # display number of calls of 'run_simulation' and the time elapsed
        print('Num sims run:',games, '  Time elapsed:', datetime.datetime.utcnow() - begin)
        
        # pick the move with the highest average reward
        percent_wins, move = max((self.rewards.get((S), 0) / self.plays.get((S), 1), p)
                                for p, S in moves_states)
        
        
        # display the stats for each possible play
        for x in sorted(
            ((100 * self.rewards.get((S), 0) /
              self.plays.get((S), 1),
              self.rewards.get((S), 0),
              self.plays.get((S), 0), p)
             for p, S in moves_states),
            reverse=True
        ):
            print("{3}: {0:.2f}% ({1} / {2})".format(*x))
        
        # print the max depth tree search
        print("Maximum depth search:", self.max_depth)
        
        # return the move chosen
        return move
        
        
#     # this don't do anything yet
#     def print_tree(self):
#         board = self.game.tamagotchi_game
#         pass
    
    # play out a "random" game from the current position, then update stats with result
    def run_simulation(self):
        plays, rewards = self.plays, self.rewards
        
        visited_qs = set()
        states_copy = self.states[:] 
        state = states_copy[-1]
        
        expand = True # you only expand once #YOEO
        for t in range(self.max_moves):
            legal = self.game.legal_actions(states_copy)  # get valid actions
            if legal == []:
                # THE TAMA IS DEAD
                reward, done = self.game.reward(states_copy)
                break

            moves_states = [(p, self.game.next_state(state, p)) for p in legal]
    
            if all(plays.get((S)) for p, S in moves_states):
                # if we have statistics on all legal moves, use them.
                # upper confidence bound (UCB) algorithm
#                 print("UCB choice")
                log_total = log(
                    sum(plays[(S)] for p, S in moves_states)
                )
                # value of best
                value, action, state = max(
                    ((rewards[(S)] / plays[(S)]) +
                     self.C * sqrt(log_total / plays[(S)]), p, S)
                    for p, S in moves_states
                )
            else:
                # if we don't have stats on all legal moves, randomly pick one
#                 print("Random choice")
                move, state = random.choice(moves_states)
    
    
            # if we are in the expand phase and this is a new state-action pair
            if expand and (state) not in plays: 
                expand = False # you only expand once so this is it
                plays[(state)] = 0 # initialize
                rewards[(state)] = 0
                if t > self.max_depth:
                    self.max_depth = t
                    
            visited_qs.add((state)) # add this state as visited
            
            states_copy.append(state) # record
            
            reward, done = self.game.reward(states_copy) # compute reward if any
            
            if done: 
                break
        
        # print(visited_states, reward)
        for q in visited_qs: # for each visited state
            if q not in plays: # if we don't have stats on this state yet
                continue
            self.plays[q]+=1 # increase plays
            self.rewards[q]+=reward # add up the reward you got
        
        

In [6]:
# Create a tamagotchi game and a search tree
realtama = Tamagotchi()
game = Tamagotchi_Game()

tree = MCTS(realtama, game, time = 1., C=10, max_moves = 200)


In [7]:
# Check that the tree run simulations to choose the next action using the get_play() method

tree.update(tuple(realtama))
action = tree.get_play()
print("Taking action:", action)
realtama = game.next_state(tuple(realtama),action)


Num sims run: 72   Time elapsed: 0:00:01.003310
nothing: 11500.00% (3335 / 29)
play: 11439.13% (2631 / 23)
store: 11400.00% (2280 / 20)
Maximum depth search: 8
Taking action: nothing


In [8]:
# Now, let the MCTS do its thing for several actions in a row

# Initialize the tamagotchi
realtama = Tamagotchi()
game = Tamagotchi_Game()

print("INITIAL STATE: ")
realtama.print_tama() # initial tamagotchi state

tree = MCTS(realtama, game, time=3., C=1.4, max_moves = 100)

action_seq = []
state = tuple(realtama)

while len(action_seq) <= 20: #and not game.unpack(state).is_dead: # play for a certain amount of time (better rule?)
    tree.update(state)
    action = tree.get_play()
    action_seq.append(action)
    print("\nTaking action %s. Next tama state:"% (action))
    
    state = game.next_state(state,action)
    game.unpack(state).print_tama()
    
    print("\nNumber of actions taken: ",len(action_seq))
    print("Reward so far: ",game.reward(tree.states))

INITIAL STATE: 
Time: 0  seconds elapsed
TAMA STATS:  {'happiness': 100, 'health': 100, 'food': 100, 'energy': 100, 'hygiene': 100}
Tama is awake.
Number of poo: 0
Money: 0
You're not in the store, so you can play, do nothing, or travel to the store.


Num sims run: 245   Time elapsed: 0:00:03.003454
store: 10000.00% (8200 / 82)
play: 10000.00% (8100 / 81)
nothing: 10000.00% (8100 / 81)
Maximum depth search: 15

Taking action store. Next tama state:
Time: 1  seconds elapsed
TAMA STATS:  {'food': 99.0, 'happiness': 99.0, 'health': 100, 'energy': 99.0, 'hygiene': 99.0}
Tama is awake.
Number of poo: 0
Money: 0
You're in the store, so you can buy an item if you want.



Number of actions taken:  1
Reward so far:  (0, False)

Taking action nothing. Next tama state:
Time: 2  seconds elapsed
TAMA STATS:  {'happiness': 98.0, 'health': 100, 'food': 98.0, 'energy': 98.0, 'hygiene': 98.0}
Tama is awake.
Number of poo: 0
Money: 0
You're not in the store, so you can play, do nothing, or travel to t

Num sims run: 226   Time elapsed: 0:00:03.009721
play: 11021.35% (274652 / 2492)
nothing: 10433.33% (626 / 6)
store: 10333.33% (620 / 6)
Maximum depth search: 70

Taking action play. Next tama state:
Time: 19  seconds elapsed
TAMA STATS:  {'food': 86.0, 'happiness': 93.0, 'health': 100, 'energy': 81.0, 'hygiene': 81.0}
Tama is awake.
Tama is playing.
Number of poo: 0
Money: 3
You're not in the store, so you can play, do nothing, or travel to the store.



Number of actions taken:  19
Reward so far:  (18, False)
Num sims run: 228   Time elapsed: 0:00:03.008683
nothing: 11120.35% (289018 / 2599)
store: 10487.50% (839 / 8)
play: 10418.10% (10939 / 105)
Maximum depth search: 85

Taking action nothing. Next tama state:
Time: 20  seconds elapsed
TAMA STATS:  {'happiness': 92.0, 'health': 100, 'food': 85.0, 'energy': 80.0, 'hygiene': 80.0}
Tama is awake.
Number of poo: 0
Money: 3
You're not in the store, so you can play, do nothing, or travel to the store.



Number of actions taken:  20
Rewa

In [9]:
# let's observe what actions the tree chose!
print(action_seq)

['store', 'nothing', 'store', 'nothing', 'play', 'play', 'store', 'nothing', 'play', 'play', 'store', 'snack', 'nothing', 'play', 'store', 'nothing', 'store', 'nothing', 'play', 'nothing', 'play']


In [10]:
# might need to make the game a little harder to put some pressure on the solver to actually earn money
# and buy good stuff (it takes a long time for it to die right now, maybe just multiply all
# decrements by 2, e.g.)

In [11]:
# now let's compare the performance of different computation times