In [2]:
from type_chart import type_chart
from pokemon_class import Pokemon
from attacks import attacks
import numpy as np

def attack_power(attack_name: str, 
                 attacking_pokemon: Pokemon, 
                 defending_pokemon: Pokemon) -> int:
    
    attack = attacks[attack_name]
    defending_type = defending_pokemon.type
    if len(defending_type) == 2:
        effectiveness_type1 = type_chart.loc[type_chart['Attacking'] == attack['type'], defending_type[0]].values[0]
        effectiveness_type2 = type_chart.loc[type_chart['Attacking'] == attack['type'], defending_type[1]].values[0]
        effectiveness = effectiveness_type1 * effectiveness_type2
    else:
        effectiveness = type_chart.loc[type_chart['Attacking'] == attack['type'], defending_type[0]].values[0]
    attack_power = attack['power'] * effectiveness
    hit = (np.random.random() < attack['accuracy']) * (np.random.random() < attacking_pokemon.accuracy)
    attack_power = attack_power * hit
    return int(attack_power)

def attack_effect(attack_name: str, 
                  attacking_pokemon: Pokemon, 
                  defending_pokemon: Pokemon):
    # get attack
    attack = attacks[attack_name]

    # get current state of pokemon
    attacking_pokemon_attr = attacking_pokemon.__dict__
    defending_pokemon_attr = defending_pokemon.__dict__

    # calculate attack_power
    power = attack_power(attack_name, attacking_pokemon, defending_pokemon)
    if 'fixed_power' in attack:
        power = attack['fixed_power']
    new_defending_current_hp = max(defending_pokemon.current_hp - power, 0)
    defending_pokemon_attr['current_hp'] = new_defending_current_hp
    
    # status move
    if 'status_move' in attack and attack['status_move'] == True:
        new_defending_status = attack['status'] if (np.random.random() < attack['status_affliction_probability']) else None
        if defending_pokemon_attr['status'] is None:
            defending_pokemon_attr['status'] = new_defending_status
        if defending_pokemon_attr['status'] == 'Paralysis':
            defending_pokemon_attr['accuracy'] = 0.75
    
    # heal move
    if 'heal_move' in attack and attack['heal_move'] == True:
        heal_amount = int(attacking_pokemon.max_hp * attack['heal_amount'])
        new_attacking_current_hp = min(attacking_pokemon.current_hp + heal_amount, attacking_pokemon.max_hp)
        attacking_pokemon_attr['current_hp'] = new_attacking_current_hp

    attacking_pokemon.__dict__.update(attacking_pokemon_attr)
    defending_pokemon.__dict__.update(defending_pokemon_attr)

    # return new
    return attacking_pokemon.__dict__, defending_pokemon.__dict__

    # return dictionary of new attributes
    # return attacking_pokemon_attr, defending_pokemon_attr
    

In [3]:
import numpy as np
from pokemon_class import Pokemon
from type_chart import type_chart
from attacks import attacks

class PokemonBattleMDP:
    def __init__(self, ai_pokemon_name: str, opponent_pokemon_name: str):
        self.discount_factor = 0.9  # Discount factor for future rewards

        self.ai_pokemon = Pokemon(ai_pokemon_name)
        self.opponent_pokemon = Pokemon(opponent_pokemon_name)

    def reset(self):
        self.ai_pokemon.current_hp = self.ai_pokemon.max_hp
        self.opponent_pokemon.current_hp = self.opponent_pokemon.max_hp
        self.ai_pokemon.status = None
        self.opponent_pokemon.status = None
        self.ai_pokemon.accuracy = 1
        return self.get_state()

    def get_state(self):
        return (self.ai_pokemon.current_hp, self.opponent_pokemon.current_hp)
    
    def get_actions(self):
        return (self.ai_pokemon.moveset)
    
    def attack_power(self, attack_name, attacking_pokemon, defending_pokemon):
        attack = attacks[attack_name]
        defending_type = defending_pokemon.type
        effectiveness = type_chart.loc[type_chart['Attacking'] == attack['type'], defending_type].values[0]
        attack_power = attack['power'] * effectiveness
        hit = (np.random.random() < attack['accuracy'])
        attack_power = attack_power * hit
        return int(attack_power)

    def choose_ai_action(self, action):
        ai_action_name = self.ai_pokemon.moveset[action]
        return ai_action_name

    def ai_action_effect(self, action_name):
        attack_power = self.attack_power(attack_name = action_name, attacking_pokemon = self.ai_pokemon, defending_pokemon = self.opponent_pokemon)
        self.opponent_pokemon.current_hp -= attack_power

    def choose_opponent_action(self):
        action = np.random.choice(len(self.opponent_pokemon.moveset))
        opponent_action = self.opponent_pokemon.moveset[action]
        return opponent_action
    
    def opponent_action_effect(self, action_name):
        attack_power = self.attack_power(attack_name = action_name, attacking_pokemon = self.opponent_pokemon, defending_pokemon = self.ai_pokemon)
        self.ai_pokemon.current_hp -= attack_power

    def is_terminal(self):
        return self.ai_pokemon.current_hp <= 0 or self.opponent_pokemon.current_hp <= 0

    def get_reward(self):
        if self.opponent_pokemon.current_hp <= 0:  # ai wins
            return 1
        elif self.ai_pokemon.current_hp <= 0:  # ai loses
            return -1
        else:
            return 0

    def step(self, ai_action):
        ai_action = self.choose_ai_action(ai_action)
        opponent_action = self.choose_opponent_action()
        #self.ai_action_effect(ai_action)
        #self.opponent_action_effect(opponent_action)

        attack_effect(ai_action, self.ai_pokemon, self.opponent_pokemon)
        attack_effect(opponent_action, self.opponent_pokemon, self.ai_pokemon)

        reward = self.get_reward()
        return self.get_state(), reward, self.is_terminal()

In [4]:
mdp = PokemonBattleMDP('Dragonite', 'Venusaur')

# Q-learning algorithm
dim_ai_hp = mdp.ai_pokemon.max_hp + 1
dim_opponent_hp = mdp.opponent_pokemon.max_hp + 1
dim_actions = len(mdp.ai_pokemon.moveset)
Q = np.zeros((dim_ai_hp, dim_opponent_hp, dim_actions))  # Q-table with dimensions (AI HP, Opponent HP, Action)

learning_rate = 0.1
epsilon = 0.01
# gamma = mdp.discount_factor
gamma = 0.9

num_episodes = 100

for episode in range(num_episodes):
    state = mdp.reset()
    done = False
    first_move = True
    while not done:
        ai_current_hp, opponent_current_hp = state
        if first_move == True:
            action = np.random.randint(dim_actions)
            first_move = False
        else:
            if np.random.uniform(0, 1) < epsilon:
                action = np.random.randint(dim_actions)  # Explore: choose random action
            else:
                action = np.argmax(Q[ai_current_hp, opponent_current_hp])

        next_state, reward, done = mdp.step(action)

        next_ai_current_hp, next_opponent_current_hp = next_state
        Q[ai_current_hp, opponent_current_hp, action] += learning_rate * \
            (reward + gamma * np.max(Q[next_ai_current_hp, next_opponent_current_hp]) - Q[ai_current_hp, opponent_current_hp, action])
        state = next_state

In [5]:
# Initialize variables to track wins, losses, and ties
wins = []
losses = [] 

# Initialize lists to store data for each episode
episode_data = []

# Define the number of episodes to run
num_episodes = 1000

# Run multiple episodes
for episode in range(num_episodes):
    state = mdp.reset()
    episode_data.append([])  # Initialize list to store data for this episode
    done = False
    while not done:
        ai_hp, opponent_current_hp = state
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.randint(dim_actions)  # Explore: choose random action
        else:
            action = np.argmax(Q[ai_hp, opponent_current_hp])

        # Store data for this step in the episode
        episode_data[-1].append((ai_hp, opponent_current_hp, action))

        next_state, reward, done = mdp.step(action)
        state = next_state

    # Update win/loss count based on the final reward
    if reward == 1:
        wins.append(1)
        losses.append(0)
    elif reward == -1:
        losses.append(1)
        wins.append(0)

# Print out the distribution of wins, losses, and ties
print("Number of wins:", sum(wins))
print("Number of losses:", sum(losses))

Number of wins: 1000
Number of losses: 0


In [6]:
i = 0
j = 0

In [67]:
from IPython.display import display, HTML
from display_battle import *

ai_name = mdp.ai_pokemon.name
ai_pokemon_gif = ai_name.lower() + '-f.gif'

opponent_name = mdp.opponent_pokemon.name
opponent_pokemon_gif = opponent_name.lower() + '-f.gif'


html_ai_gif = '<img src="images/{agent_pokemon_gif}" style="width:10%;">'.format(agent_pokemon_gif = ai_pokemon_gif)
html_opponent_gif = '<img src="images/{opponent_pokemon_gif}" style="width:10%;">'.format(opponent_pokemon_gif = opponent_pokemon_gif)

episode = episode_data[i]
if j < len(episode):
    turn = episode[j]

ai_hp, opponent_current_hp, ai_attack = turn
print('Episode:', i+1)
if j < len(episode):
    print('Turn:', j+1)
else:
    print('Turn:', j)
#print()

if j == len(episode):
    if wins[i] == 1:
        display(HTML(html_ai_gif))
        print_ai_turn(mdp.ai_pokemon, ai_current_hp=ai_hp)
        display(HTML(html_opponent_gif))
        print_opponent_turn(mdp.opponent_pokemon, opponent_current_hp=0)
        print()
        print(mdp.ai_pokemon.name + " \033[92m" + 'WINS!' + "\033[0m")
    else:
        display(HTML(html_ai_gif))
        print_ai_turn(mdp.ai_pokemon, ai_current_hp=0)
        display(HTML(html_opponent_gif))
        print_opponent_turn(mdp.opponent_pokemon, opponent_current_hp=opponent_current_hp)
        print()
        print(mdp.ai_pokemon.name + " \033[91m" + 'LOSES!' + "\033[0m")
    i += 1
    j = 0
else:
    display(HTML(html_ai_gif))
    print_ai_turn(mdp.ai_pokemon, ai_current_hp=ai_hp)
    display(HTML(html_opponent_gif))
    print_opponent_turn(mdp.opponent_pokemon, opponent_current_hp=opponent_current_hp)
    print()
    print_ai_attack(ai_pokemon=mdp.ai_pokemon, ai_attack=ai_attack)
    print()
    print_opponent_attack(opponent_pokemon=mdp.opponent_pokemon)
    j += 1

Episode: 15
Turn: 1


Dragonite:
[92m[##################################################][0m
100 / 100


Venusaur:
[92m[##################################################][0m
100 / 100

Dragonite uses Wing Attack!

Venusaur uses Bodyslam!
