<a href="https://colab.research.google.com/github/rzil-1/PixelMon/blob/main/Pixelmon1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import numpy as np
import random
import json
from gym import Env
from gym.spaces import Discrete, Box

class PokemonBattleEnv(Env):
    def __init__(self, pokemon_data_path="/content/pokemon_moves.json"):
        super(PokemonBattleEnv, self).__init__()

        # Load Pokémon data from JSON
        self.pokemon_data = self.load_pokemon_data(pokemon_data_path)

        # Define action and observation space
        self.action_space = Discrete(4)  # 4 moves per battle
        self.observation_space = Box(low=0, high=500, shape=(2,), dtype=np.float32)

        # Initialize variables
        self.agent_hp = 100
        self.opponent_hp = 100
        self.agent_moves = []
        self.opponent_moves = []
        self.steps = 0

    def load_pokemon_data(self, path):
        with open(path, 'r') as f:
            return json.load(f)

    def reset(self):
        self.agent_hp = 500
        self.opponent_hp = 500
        self.steps = 0

        # Randomly select Pokémon and moves
        agent_pokemon = random.choice(list(self.pokemon_data.keys()))
        opponent_pokemon = random.choice(list(self.pokemon_data.keys()))

        self.agent_moves = random.sample(list(self.pokemon_data[agent_pokemon].values()), 4)
        self.opponent_moves = random.sample(list(self.pokemon_data[opponent_pokemon].values()), 4)

        return np.array([self.agent_hp, self.opponent_hp], dtype=np.float32)

    def step(self, action):
        self.steps += 1

        agent_damage = self.agent_moves[action]
        self.opponent_hp -= agent_damage

        opponent_action = random.randint(0, 3)
        opponent_damage = self.opponent_moves[opponent_action]
        self.agent_hp -= opponent_damage

        self.agent_hp = max(self.agent_hp, 0)
        self.opponent_hp = max(self.opponent_hp, 0)

        reward = 0
        damage_difference = agent_damage - opponent_damage

        if self.opponent_hp == 0:
            reward = 10  # Victory reward
        elif self.agent_hp == 0:
            reward = -10  # Loss penalty

        done = self.agent_hp == 0 or self.opponent_hp == 0
        return np.array([self.agent_hp, self.opponent_hp], dtype=np.float32), reward, done, {}

    def render(self, mode='human'):
        print(f"Agent HP: {self.agent_hp} | Opponent HP: {self.opponent_hp}")


# Q-learning with State Space Reduction
def discretize_hp(hp, bin_size=50):
    """Discretize HP into bins."""
    return int(hp // bin_size)

num_actions = 4
epsilon = 1.0  # Exploration rate
epsilon_min = 0.01
epsilon_decay = 0.995
alpha = 0.9  # Learning rate
gamma = 0.99  # Discount factor

bin_size = 50  # HP discretization size
num_bins = 500 // bin_size + 1  # Number of bins for HP
env = PokemonBattleEnv("/content/pokemon_moves.json")
q_table = np.zeros((num_bins, num_bins, num_actions))  # Q-table for discretized states

for episode in range(20000):
    state = env.reset()
    done = False

    while not done:
        agent_hp, opponent_hp = state
        state_idx = (discretize_hp(agent_hp, bin_size), discretize_hp(opponent_hp, bin_size))

        if random.uniform(0, 1) < epsilon:
            action = env.action_space.sample()  # Explore
        else:
            action = np.argmax(q_table[state_idx])  # Exploit

        next_state, reward, done, _ = env.step(action)
        next_state_idx = (discretize_hp(next_state[0], bin_size), discretize_hp(next_state[1], bin_size))

        # Ensure the indices are within bounds before using them
        if next_state_idx[0] >= num_bins or next_state_idx[1] >= num_bins:
            next_state_idx = (min(next_state_idx[0], num_bins - 1), min(next_state_idx[1], num_bins - 1))

        best_next_action = np.argmax(q_table[next_state_idx])
        q_table[state_idx + (action,)] += alpha * (reward + gamma * q_table[next_state_idx + (best_next_action,)] - q_table[state_idx + (action,)])

        state = next_state

    epsilon = max(epsilon_min, epsilon * epsilon_decay)

# Test the trained agent
test_episodes = 10
for episode in range(test_episodes):
    state = env.reset()
    done = False
    while not done:
        agent_hp, opponent_hp = state
        state_idx = (discretize_hp(agent_hp, bin_size), discretize_hp(opponent_hp, bin_size))
        action = np.argmax(q_table[state_idx])
        state, reward, done, _ = env.step(action)
        env.render()
    print(f"Episode {episode + 1} finished with reward: {reward}")


Agent HP: 450 | Opponent HP: 410
Agent HP: 400 | Opponent HP: 260
Agent HP: 400 | Opponent HP: 260
Agent HP: 250 | Opponent HP: 260
Agent HP: 100 | Opponent HP: 260
Agent HP: 0 | Opponent HP: 260
Episode 1 finished with reward: -10
Agent HP: 500 | Opponent HP: 390
Agent HP: 500 | Opponent HP: 350
Agent HP: 500 | Opponent HP: 310
Agent HP: 445 | Opponent HP: 200
Agent HP: 445 | Opponent HP: 90
Agent HP: 390 | Opponent HP: 40
Agent HP: 300 | Opponent HP: 0
Episode 2 finished with reward: 10
Agent HP: 460 | Opponent HP: 350
Agent HP: 370 | Opponent HP: 200
Agent HP: 280 | Opponent HP: 80
Agent HP: 240 | Opponent HP: 40
Agent HP: 200 | Opponent HP: 0
Episode 3 finished with reward: 10
Agent HP: 500 | Opponent HP: 460
Agent HP: 350 | Opponent HP: 420
Agent HP: 350 | Opponent HP: 270
Agent HP: 200 | Opponent HP: 120
Agent HP: 200 | Opponent HP: 10
Agent HP: 200 | Opponent HP: 0
Episode 4 finished with reward: 10
Agent HP: 410 | Opponent HP: 380
Agent HP: 365 | Opponent HP: 340
Agent HP: 320 