<a href="https://colab.research.google.com/github/saktiworkstation/reinforcement-learning-as-a-character/blob/main/Survival_Quest_V_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gym pygame



In [3]:
import gym
from gym import spaces
import numpy as np
import pygame
import random
import sys

# Konstanta untuk lingkungan grid dan tampilan
GRID_WIDTH, GRID_HEIGHT = 10, 10
CELL_SIZE = 50
SCREEN_WIDTH, SCREEN_HEIGHT = GRID_WIDTH * CELL_SIZE, GRID_HEIGHT * CELL_SIZE

# Warna untuk render
COLOR_BG = (30, 30, 30)
COLOR_GRID = (50, 50, 50)
COLOR_AGENT = (0, 255, 0)
COLOR_GOAL = (255, 215, 0)
COLOR_TRAP = (255, 0, 0)
COLOR_ENEMY = (255, 0, 255)

# Aksi yang tersedia: 0: Atas, 1: Bawah, 2: Kiri, 3: Kanan
ACTION_MAPPING = {
    0: (0, -1),
    1: (0, 1),
    2: (-1, 0),
    3: (1, 0)
}


class AISurvivalEnv(gym.Env):
    """
    Custom Environment untuk AI Survival Quest berbasis grid.
    Pemain adalah robot AI yang belajar menghindari jebakan dan musuh untuk mencapai tujuan.
    """
    metadata = {"render.modes": ["human"]}

    def __init__(self):
        super(AISurvivalEnv, self).__init__()
        # Mendefinisikan ruang aksi dan observasi
        self.action_space = spaces.Discrete(4)  # 4 arah
        # Observasi hanya posisi (x, y) robot di grid
        self.observation_space = spaces.Box(low=0, high=max(GRID_WIDTH, GRID_HEIGHT)-1,
                                            shape=(2,), dtype=np.int32)

        # Inisialisasi posisi robot, goal, trap, dan musuh
        self.agent_pos = np.array([0, 0])
        self.goal_pos = np.array([GRID_WIDTH - 1, GRID_HEIGHT - 1])
        self.trap_positions = [np.array([3, 3]), np.array([6, 2]), np.array([2, 7])]
        self.enemy_positions = [np.array([5, 5])]  # Bisa dikembangkan dengan pola gerak sederhana

        self.max_steps = 100  # Batas langkah per episode
        self.current_step = 0

        # Inisialisasi Pygame untuk render (hanya sekali)
        pygame.init()
        self.screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
        pygame.display.set_caption("AI Survival Quest")
        self.clock = pygame.time.Clock()

    def reset(self):
        """Reset lingkungan ke kondisi awal."""
        self.agent_pos = np.array([0, 0])
        self.current_step = 0
        return self._get_obs()

    def step(self, action):
        """Implementasi logika pergerakan agent dan reward berdasarkan aksi."""
        self.current_step += 1

        # Hitung pergerakan baru
        move = ACTION_MAPPING.get(action, (0, 0))
        new_pos = self.agent_pos + np.array(move)

        # Cek batas grid
        if 0 <= new_pos[0] < GRID_WIDTH and 0 <= new_pos[1] < GRID_HEIGHT:
            self.agent_pos = new_pos
        else:
            # Jika keluar batas, beri penalti
            reward = -5
            done = True
            return self._get_obs(), reward, done, {}

        reward = -1  # Biaya langkah untuk mendorong pencarian strategi yang efisien

        # Cek kondisi reward atau penalty
        if np.array_equal(self.agent_pos, self.goal_pos):
            reward = 10  # Mencapai tujuan
            done = True
        elif any(np.array_equal(self.agent_pos, trap) for trap in self.trap_positions):
            reward = -5  # Jebakan
            done = True
        elif any(np.array_equal(self.agent_pos, enemy) for enemy in self.enemy_positions):
            reward = -5  # Terkena musuh
            done = True
        elif self.current_step >= self.max_steps:
            done = True
        else:
            done = False

        return self._get_obs(), reward, done, {}

    def _get_obs(self):
        """Mengembalikan state observasi, yaitu posisi agent."""
        return self.agent_pos.copy()

    def render(self, mode="human"):
        """Render tampilan lingkungan dengan Pygame."""
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()

        self.screen.fill(COLOR_BG)
        # Gambar grid
        for x in range(0, SCREEN_WIDTH, CELL_SIZE):
            pygame.draw.line(self.screen, COLOR_GRID, (x, 0), (x, SCREEN_HEIGHT))
        for y in range(0, SCREEN_HEIGHT, CELL_SIZE):
            pygame.draw.line(self.screen, COLOR_GRID, (0, y), (SCREEN_WIDTH, y))

        # Gambar trap
        for trap in self.trap_positions:
            rect = pygame.Rect(trap[0]*CELL_SIZE, trap[1]*CELL_SIZE, CELL_SIZE, CELL_SIZE)
            pygame.draw.rect(self.screen, COLOR_TRAP, rect)

        # Gambar musuh
        for enemy in self.enemy_positions:
            rect = pygame.Rect(enemy[0]*CELL_SIZE, enemy[1]*CELL_SIZE, CELL_SIZE, CELL_SIZE)
            pygame.draw.rect(self.screen, COLOR_ENEMY, rect)

        # Gambar goal
        rect = pygame.Rect(self.goal_pos[0]*CELL_SIZE, self.goal_pos[1]*CELL_SIZE, CELL_SIZE, CELL_SIZE)
        pygame.draw.rect(self.screen, COLOR_GOAL, rect)

        # Gambar agent
        rect = pygame.Rect(self.agent_pos[0]*CELL_SIZE, self.agent_pos[1]*CELL_SIZE, CELL_SIZE, CELL_SIZE)
        pygame.draw.rect(self.screen, COLOR_AGENT, rect)

        pygame.display.flip()
        self.clock.tick(10)

    def close(self):
        pygame.quit()


def choose_action(state, q_table, epsilon):
    """Pilih aksi menggunakan epsilon-greedy."""
    if random.uniform(0, 1) < epsilon:
        # Eksplorasi: pilih aksi acak
        return random.choice(range(4))
    else:
        # Eksploitasi: pilih aksi dengan nilai Q tertinggi
        state_key = tuple(state)
        if state_key not in q_table:
            q_table[state_key] = np.zeros(4)
        return int(np.argmax(q_table[state_key]))


def update_q_table(q_table, state, action, reward, next_state, alpha, gamma):
    """Update Q-table berdasarkan persamaan Q-Learning."""
    state_key = tuple(state)
    next_state_key = tuple(next_state)

    if state_key not in q_table:
        q_table[state_key] = np.zeros(4)
    if next_state_key not in q_table:
        q_table[next_state_key] = np.zeros(4)

    best_next_action = np.max(q_table[next_state_key])
    td_target = reward + gamma * best_next_action
    td_error = td_target - q_table[state_key][action]
    q_table[state_key][action] += alpha * td_error


def main():
    env = AISurvivalEnv()
    episodes = 500
    max_steps = env.max_steps

    # Parameter Q-Learning
    alpha = 0.1      # Learning rate
    gamma = 0.99     # Discount factor
    epsilon = 1.0    # Epsilon awal untuk eksplorasi
    epsilon_min = 0.01
    epsilon_decay = 0.995

    # Inisialisasi Q-table sebagai dictionary
    q_table = {}

    for ep in range(episodes):
        state = env.reset()
        total_reward = 0

        for step in range(max_steps):
            action = choose_action(state, q_table, epsilon)
            next_state, reward, done, _ = env.step(action)
            total_reward += reward

            update_q_table(q_table, state, action, reward, next_state, alpha, gamma)
            state = next_state

            # Render setiap langkah pada episode terakhir untuk visualisasi
            if ep >= episodes - 5:
                env.render()

            if done:
                break

        # Decay epsilon untuk mengurangi eksplorasi secara bertahap
        epsilon = max(epsilon * epsilon_decay, epsilon_min)
        print(f"Episode: {ep+1}, Total Reward: {total_reward}, Epsilon: {epsilon:.3f}")

    # Setelah training, tampilkan hasil akhir
    print("Training selesai. Tekan CTRL+C pada window render untuk keluar.")
    try:
        # Tampilkan episode demo dengan agent yang telah belajar
        state = env.reset()
        done = False
        while not done:
            env.render()
            action = choose_action(state, q_table, epsilon=0)  # epsilon=0 artinya hanya eksploitasi
            state, reward, done, _ = env.step(action)
    except KeyboardInterrupt:
        env.close()


if __name__ == "__main__":
    main()

Episode: 1, Total Reward: -7, Epsilon: 0.995
Episode: 2, Total Reward: -8, Epsilon: 0.990
Episode: 3, Total Reward: -5, Epsilon: 0.985
Episode: 4, Total Reward: -5, Epsilon: 0.980
Episode: 5, Total Reward: -15, Epsilon: 0.975
Episode: 6, Total Reward: -5, Epsilon: 0.970
Episode: 7, Total Reward: -8, Epsilon: 0.966
Episode: 8, Total Reward: -18, Epsilon: 0.961
Episode: 9, Total Reward: -7, Epsilon: 0.956
Episode: 10, Total Reward: -6, Epsilon: 0.951
Episode: 11, Total Reward: -5, Epsilon: 0.946
Episode: 12, Total Reward: -11, Epsilon: 0.942
Episode: 13, Total Reward: -5, Epsilon: 0.937
Episode: 14, Total Reward: -12, Epsilon: 0.932
Episode: 15, Total Reward: -9, Epsilon: 0.928
Episode: 16, Total Reward: -5, Epsilon: 0.923
Episode: 17, Total Reward: -5, Epsilon: 0.918
Episode: 18, Total Reward: -7, Epsilon: 0.914
Episode: 19, Total Reward: -5, Epsilon: 0.909
Episode: 20, Total Reward: -6, Epsilon: 0.905
Episode: 21, Total Reward: -6, Epsilon: 0.900
Episode: 22, Total Reward: -5, Epsilon: