In [1]:
import numpy as np

In [None]:
from pokemon import pokedex # importing a dictionary of all info for every pokemon
from pokemon_class import Pokemon
from mdp_pokemon_battle import PokemonBattleMDP

In [1589]:
# All types
types = ['Fire', 'Water', 'Grass', 'Psychic']

mdp = {type:PokemonBattleMDP(opponent_type=type) for type in types}

# Q-learning algorithm
# Q = np.zeros((101, 201, 2, 4))  # Q-table with dimensions (AI HP, Opponent HP, Opponent Status, Action)
# Initialize Q-tables for each type
Q = {'Fire': np.zeros((101, 151, 2, 4)), 'Water': np.zeros((101, 151, 2, 4)), 'Grass': np.zeros((101, 151, 2, 4)), 'Psychic': np.zeros((101, 151, 2, 4))}

learning_rate = 0.1
epsilon = 0.01
# gamma = mdp.discount_factor
gamma = 0.9

num_episodes = 1000

for type in types:
    for episode in range(num_episodes):
        state = mdp[type].reset()
        done = False
        first_move = True
        while not done:
            ai_current_hp, opponent_current_hp, opponent_status = state
            if first_move == True:
                action = np.random.randint(4)
                first_move = False
            else:
                if np.random.uniform(0, 1) < epsilon:
                    action = np.random.randint(4)  # Explore: choose random action
                else:
                    action = np.argmax(Q[type][ai_current_hp, opponent_current_hp, opponent_status])

            next_state, reward, done = mdp[type].step(action)

            next_ai_current_hp, next_opponent_current_hp, next_opponent_status = next_state
            Q[type][ai_current_hp, opponent_current_hp, opponent_status, action] += learning_rate * \
                (reward + gamma * np.max(Q[type][next_ai_current_hp, next_opponent_current_hp, next_opponent_status]) - Q[type][ai_current_hp, opponent_current_hp, opponent_status, action])
            state = next_state

In [1590]:
# Type
type = 'Psychic'

# Initialize variables to track wins, losses, and ties
wins = []
losses = [] 

# Initialize lists to store data for each episode
episode_data = []

# Define the number of episodes to run
num_episodes = 1000

# Run multiple episodes
for episode in range(num_episodes):
    state = mdp[type].reset()
    episode_data.append([])  # Initialize list to store data for this episode
    done = False
    while not done:
        ai_hp, opponent_current_hp, opponent_status = state
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.randint(4)  # Explore: choose random action
        else:
            action = np.argmax(Q[type][ai_hp, opponent_current_hp, opponent_status])

        # Store data for this step in the episode
        episode_data[-1].append((ai_hp, opponent_current_hp, opponent_status, action))

        next_state, reward, done = mdp[type].step(action)
        state = next_state

    # Update win/loss count based on the final reward
    if reward == 1:
        wins.append(1)
        losses.append(0)
    elif reward == -1:
        losses.append(1)
        wins.append(0)

# Print out the distribution of wins, losses, and ties
print("Number of wins:", sum(wins))
print("Number of losses:", sum(losses))

# Print out the data for each episode
# for i, episode in enumerate(episode_data):
#     print("Episode", i+1, ":")
#     for turn, step_data in enumerate(episode):
#         ai_hp, opponent_current_hp, opponent_status, action = step_data
#         print("  Turn {}: ai HP: {}, Opponent HP: {}, Opponent Status: {}, Action: {}".format(
#             turn + 1, ai_hp, opponent_current_hp, opponent_status, action))


Number of wins: 919
Number of losses: 81


In [1481]:
i = 0
j = 0

In [1758]:
from IPython.display import display, HTML
from display_battle import print_turn, print_ai_turn, print_opponent_turn
from display_battle import print_opponent_attack, print_ai_attack

ai_name = 'Venusaur'

if type == 'Fire':
    opponent_name = 'Charizard'
    opponent_pokemon_gif = 'front_charizard.gif'
    opponent_attack = 'Flame Thrower'
elif type == 'Water':
    opponent_name = 'Blastoise'
    opponent_pokemon_gif = 'front_blastoise.gif'
    opponent_attack = 'Hydro Pump'
elif type == 'Grass':
    opponent_name = 'Venusaur'
    opponent_pokemon_gif = 'front_venusaur.gif'
    opponent_attack = 'Razor Leaf'
elif type == 'Psychic':
    opponent_name = 'Alakazam'
    opponent_pokemon_gif = 'front_alakazam.gif'
    opponent_attack = 'Psychic'

html_code_template = '''
<div>
    <img src="{agent_pokemon_gif}" style="float:left; width:25%;">
    <img src="{opponent_pokemon_gif}" style="float:right; width:25%;">
</div>
'''

html_code = html_code_template.format(agent_pokemon_gif='back_venusaur.gif', opponent_pokemon_gif=opponent_pokemon_gif)

html_ai_gif = '<img src="{agent_pokemon_gif}" style="width:10%;">'.format(agent_pokemon_gif = 'front_venusaur.gif')
html_opponent_gif = '<img src="{opponent_pokemon_gif}" style="width:10%;">'.format(opponent_pokemon_gif = opponent_pokemon_gif)

#display(HTML(html_code))

episode = episode_data[i]
turn = episode[j]

ai_hp, opponent_current_hp, opponent_status, ai_attack = turn
print('Episode:', i+1)
print('Turn:', j+1)
print()
#print_turn(ai_current_hp=ai_hp, opponent_current_hp=opponent_current_hp, ai_attack=ai_attack, opponent_status=opponent_status, opponent_name=opponent_name, opponent_attack=opponent_attack)

j += 1
if j == len(episode):
    if wins[i] == 1:
        #print_turn(ai_current_hp=ai_hp, opponent_current_hp=0, ai_attack=ai_attack, opponent_status=opponent_status, opponent_name=opponent_name, opponent_attack=opponent_attack, result=True)
        display(HTML(html_ai_gif))
        print_ai_turn(ai_max_hp=mdp[type].ai_max_hp, ai_current_hp=ai_hp, ai_attack=ai_attack, result=True)
        display(HTML(html_opponent_gif))
        print_opponent_turn(opponent_name=opponent_name, opponent_max_hp=mdp[type].opponent_max_hp, opponent_current_hp=0, opponent_status=opponent_status, opponent_attack=opponent_attack, result=True)
        print()
        print('Venusaur ' + "\033[92m" + 'WINS!' + "\033[0m")
    else:
        #print_turn(ai_current_hp=0, opponent_current_hp=opponent_current_hp, ai_attack=ai_attack, opponent_status=opponent_status, opponent_name=opponent_name, opponent_attack=opponent_attack, result=True)
        display(HTML(html_ai_gif))
        print_ai_turn(ai_max_hp=mdp[type].ai_max_hp, ai_current_hp=0, ai_attack=ai_attack, result=True)
        display(HTML(html_opponent_gif))
        print_opponent_turn(opponent_name=opponent_name, opponent_max_hp=mdp[type].opponent_max_hp, opponent_current_hp=opponent_current_hp, opponent_status=opponent_status, opponent_attack=opponent_attack, result=True)
        print()
        print('Venusaur ' + "\033[91m" + 'LOSES!' + "\033[0m")
    i += 1
    j = 0
else:
    #print_turn(ai_current_hp=ai_hp, opponent_current_hp=opponent_current_hp, ai_attack=ai_attack, opponent_status=opponent_status, opponent_name=opponent_name, opponent_attack=opponent_attack)
    display(HTML(html_ai_gif))
    print_ai_turn(ai_max_hp=mdp[type].ai_max_hp, ai_current_hp=ai_hp, ai_attack=ai_attack)
    display(HTML(html_opponent_gif))
    print_opponent_turn(opponent_name=opponent_name, opponent_max_hp=mdp[type].opponent_max_hp, opponent_current_hp=opponent_current_hp, opponent_status=opponent_status, opponent_attack=opponent_attack)
    print()
    print_ai_attack(ai_attack=ai_attack)
    print()
    print_opponent_attack(opponent_attack=opponent_attack, opponent_name=opponent_name)

Episode: 26
Turn: 4



Venusaur:
[93m[####################------------------------------][0m
40 / 100


Alakazam:
[92m[##############################--------------------][0m
90 / 150

Venusaur uses Bodyslam!

Alakazam uses Psychic!
