In [1]:
import numpy as np
from mdp_pokemon_battle import PokemonBattleMDP

In [2]:
mdp = PokemonBattleMDP('Dragonite', 'Venusaur')

# Q-learning algorithm
dim_ai_hp = mdp.ai_pokemon.max_hp + 1
dim_opponent_hp = mdp.opponent_pokemon.max_hp + 1
dim_actions = len(mdp.ai_pokemon.moveset)
Q = np.zeros((dim_ai_hp, dim_opponent_hp, dim_actions))  # Q-table with dimensions (AI HP, Opponent HP, Action)

learning_rate = 0.1
epsilon = 0.01
# gamma = mdp.discount_factor
gamma = 0.9

num_episodes = 100

for episode in range(num_episodes):
    state = mdp.reset()
    done = False
    first_move = True
    while not done:
        ai_current_hp, opponent_current_hp = state
        if first_move == True:
            action = np.random.randint(dim_actions)
            first_move = False
        else:
            if np.random.uniform(0, 1) < epsilon:
                action = np.random.randint(dim_actions)  # Explore: choose random action
            else:
                action = np.argmax(Q[ai_current_hp, opponent_current_hp])

        next_state, reward, done = mdp.step(action)

        next_ai_current_hp, next_opponent_current_hp = next_state
        Q[ai_current_hp, opponent_current_hp, action] += learning_rate * \
            (reward + gamma * np.max(Q[next_ai_current_hp, next_opponent_current_hp]) - Q[ai_current_hp, opponent_current_hp, action])
        state = next_state

In [3]:
# Initialize variables to track wins, losses, and ties
wins = []
losses = [] 

# Initialize lists to store data for each episode
episode_data = []

# Define the number of episodes to run
num_episodes = 1000

# Run multiple episodes
for episode in range(num_episodes):
    state = mdp.reset()
    episode_data.append([])  # Initialize list to store data for this episode
    done = False
    while not done:
        ai_hp, opponent_current_hp = state
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.randint(dim_actions)  # Explore: choose random action
        else:
            action = np.argmax(Q[ai_hp, opponent_current_hp])

        # Store data for this step in the episode
        episode_data[-1].append((ai_hp, opponent_current_hp, action))

        next_state, reward, done = mdp.step(action)
        state = next_state

    # Update win/loss count based on the final reward
    if reward == 1:
        wins.append(1)
        losses.append(0)
    elif reward == -1:
        losses.append(1)
        wins.append(0)

# Print out the distribution of wins, losses, and ties
print("Number of wins:", sum(wins))
print("Number of losses:", sum(losses))

Number of wins: 1000
Number of losses: 0


In [4]:
i = 0
j = 0

In [65]:
from IPython.display import display, HTML
from display_battle import *

ai_name = mdp.ai_pokemon.name
ai_pokemon_gif = ai_name.lower() + '-f.gif'

opponent_name = mdp.opponent_pokemon.name
opponent_pokemon_gif = opponent_name.lower() + '-f.gif'


html_ai_gif = '<img src="images/{agent_pokemon_gif}" style="width:10%;">'.format(agent_pokemon_gif = ai_pokemon_gif)
html_opponent_gif = '<img src="images/{opponent_pokemon_gif}" style="width:10%;">'.format(opponent_pokemon_gif = opponent_pokemon_gif)

# episode = episode_data[i]
# if j < len(episode):
#     turn = episode[j]

episode = mdp.episode_history[i]
if j < len(episode):
    step = episode[j]

ai_action = step['ai_pokemon']['action']
ai_hit = step['ai_pokemon']['hit']
ai_effectiveness = step['ai_pokemon']['effectiveness']
ai_hp = step['ai_pokemon']['current_hp']
ai_status = step['ai_pokemon']['status']

opponent_action = step['opponent_pokemon']['action']
opponent_hit = step['opponent_pokemon']['hit']
opponent_effectiveness = step['opponent_pokemon']['effectiveness']
opponent_hp = step['opponent_pokemon']['current_hp']
opponent_status = step['opponent_pokemon']['status']

# ai_hp, opponent_current_hp, ai_attack = turn

print('Episode:', i+1)
if j < len(episode):
    print('Turn:', j+1)
else:
    print('Turn:', j)
#print()

if j == len(episode):
    if wins[i] == 1:
        display(HTML(html_ai_gif))
        print_ai_turn(mdp.ai_pokemon, ai_current_hp=ai_hp, ai_status=ai_status)
        display(HTML(html_opponent_gif))
        print_opponent_turn(mdp.opponent_pokemon, opponent_current_hp=0, opponent_status=opponent_status)
        print()
        print(mdp.ai_pokemon.name + " \033[92m" + 'WINS!' + "\033[0m")
    else:
        display(HTML(html_ai_gif))
        print_ai_turn(mdp.ai_pokemon, ai_current_hp=0, ai_status=ai_status)
        display(HTML(html_opponent_gif))
        print_opponent_turn(mdp.opponent_pokemon, opponent_current_hp=opponent_hp, opponent_status=opponent_status)
        print()
        print(mdp.ai_pokemon.name + " \033[91m" + 'LOSES!' + "\033[0m")
    i += 1
    j = 0
else:
    display(HTML(html_ai_gif))
    print_ai_turn(mdp.ai_pokemon, ai_current_hp=ai_hp, ai_status=ai_status)
    display(HTML(html_opponent_gif))
    print_opponent_turn(mdp.opponent_pokemon, opponent_current_hp=opponent_hp, opponent_status=opponent_status)
    print()
    print_attack(pokemon=mdp.ai_pokemon, attack=ai_action, hit=ai_hit, effectiveness=ai_effectiveness)
    print()
    print_attack(pokemon=mdp.opponent_pokemon, attack=opponent_action, hit=opponent_hit, effectiveness=opponent_effectiveness)
    j += 1

Episode: 13
Turn: 1


Dragonite:
[92m[##################################################][0m
100 / 100
Status:


Venusaur:
[92m[##################################################][0m
100 / 100
Status:

Dragonite uses Wing Attack! It's super effective!

Venusaur uses Stun Spore! 


In [6]:
mdp.episode_history

[[{'ai_pokemon': {'action': 'Wing Attack',
    'max_hp': 100,
    'current_hp': 100,
    'status': None,
    'accuracy': 1,
    'power': 60,
    'hit': 1,
    'effectiveness': 2.0},
   'opponent_pokemon': {'action': 'Bodyslam',
    'max_hp': 100,
    'current_hp': 100,
    'status': None,
    'accuracy': 1,
    'power': 30,
    'hit': 1,
    'effectiveness': 1.0},
   'is_terminal': False,
   'reward': 0},
  {'ai_pokemon': {'action': 'Dragon Rage',
    'max_hp': 100,
    'current_hp': 70,
    'status': None,
    'accuracy': 1,
    'power': 40,
    'hit': 1,
    'effectiveness': 1.0},
   'opponent_pokemon': {'action': 'Synthesis',
    'max_hp': 100,
    'current_hp': 40,
    'status': None,
    'accuracy': 1,
    'power': 0,
    'hit': 1,
    'effectiveness': 0.25},
   'is_terminal': False,
   'reward': 0},
  {'ai_pokemon': {'action': 'Dragon Rage',
    'max_hp': 100,
    'current_hp': 70,
    'status': None,
    'accuracy': 1,
    'power': 40,
    'hit': 1,
    'effectiveness': 1.0},
  