### Run in colab
<a href="https://colab.research.google.com/github/racousin/data_science_practice/blob/master/website/public/modules/data-science-practice/module9/exercise/module9_exercise2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!pip install swig==4.2.1
!pip install gymnasium==1.2.0

In [2]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import gymnasium as gym
import matplotlib.pyplot as plt
import random, json

In [None]:
env = gym.make("FrozenLake-v1", map_name="8x8")

print("Observation space nb :", env.observation_space)
print("Action space nb :", env.action_space)
env.close()


# module9_exercise2 : ML - Arena <a href="https://ml-arena.com/viewcompetition/5" target="_blank"> FrozenLake Competition</a>

### Objective
Get at list an agent running on ML-Arena <a href="https://ml-arena.com/viewcompetition/5" target="_blank"> FrozenLake Competition</a> with mean reward upper than 0.35 (ie 35%)


You should submit an agent file named `agent.py` with a class `Agent` that includes at least the following attributes:

In [None]:
class Agent:
    def __init__(self, env):
        self.env = env

    def choose_action(self, observation, reward=0.0, terminated=False, truncated=False, info=None):
        action = self.env.action_space.sample()
        return action

### Description

The game starts with the player at location [0,0] of the frozen lake grid world with the goal located at far extent of the world [7,7].

Holes in the ice are distributed in set locations.

The player makes moves until they reach the goal or fall in a hole.

Each run will consist of 10 attempts to cross the ice. The reward will be the total amount accumulated during those trips. For example, if your agent reaches the goal 3 times out of 10, its reward will be 3.

The environment is based on :

In [5]:
env = gym.make('FrozenLake-v1', map_name="8x8")

In [None]:


def agenttrainer(nb_episodes=50000, alpha=0.1, gamma=0.99, eps_begginner=1.0, eps_ending=0.01, reductor_step=200000):
    env = gym.make("FrozenLake-v1", map_name="8x8")
    nb_actual_state_ts = env.observation_space.n
    nb_actions = env.action_space.n
    
    Q = np.zeros((nb_actual_state_ts, nb_actions))
    epsilon = eps_begginner

    for episode in range(nb_episodes):
        actual_state_t, _ = env.reset(seed=episode)
        endingval = isItFalse = False 
        
        while not (endingval or isItFalse):

            if random.random() < epsilon:
                action = env.action_space.sample()
            else:
                bestChoices = np.flatnonzero(Q[actual_state_t] == Q[actual_state_t].max())
                action = int(random.choice(bestChoices))

            nouvel_actual_state_t, actual_rewarded_state, endingval, isItFalse, _ = env.step(action)

            best_followup = Q[nouvel_actual_state_t].max()
            cible = actual_rewarded_state + (0 if (endingval or isItFalse) else gamma * best_followup)

            Q[actual_state_t, action] += alpha * (cible - Q[actual_state_t, action])
            actual_state_t = nouvel_actual_state_t

        if episode < reductor_step:
            epsilon = eps_begginner - (eps_begginner - eps_ending) * (episode / reductor_step)
        else:
            epsilon = eps_ending

    env.close()
    return Q


def agent_tester(Q, nb_runs=200, trialsbyrun=10):
    env = gym.make("FrozenLake-v1", map_name="8x8")
    total_runs = []
    for r in range(nb_runs):
        total = 0
        for _ in range(trialsbyrun):
            actual_state_t, _ = env.reset()
            endingval = isItFalse = False
            while not (endingval or isItFalse):
                bestChoices = np.flatnonzero(Q[actual_state_t] == Q[actual_state_t].max())
                action = int(random.choice(bestChoices))
                actual_state_t, actual_rewarded_state, endingval, isItFalse, _ = env.step(action)
                total += actual_rewarded_state
        total_runs.append(total)
    env.close()

    averaging_run = np.mean(total_runs)
    successful_rate = averaging_run / trialsbyrun
    return averaging_run, successful_rate


Q = agenttrainer()
simpleaverage, success_status = agent_tester(Q)
print("Reward moyen :", round(simpleaverage, 2))
print("Succès moyen  :", round(success_status, 3), "≈", round(100*success_status,1), "%")
q_list = np.asarray(Q, dtype=float).round(6).tolist()

agent_code = f"""
import random

class Agent:
    #init of agent
    def __init__(self, env):
        self.env = env
        self.Q = {json.dumps(q_list)}
        self.len_s = len(self.Q)
        self.len_a = len(self.Q[0]) if self.len_s>0 else env.action_space.n

    def choose_action(self, observation, reward=0.0, terminated=False, truncated=False, info=None):
        s = int(observation)
        if s < 0 or s >= self.len_s:
            return self.env.action_space.sample()
        row = self.Q[s]
        m = max(row)
        bestactions = [a for a, q in enumerate(row) if q == m]
        return random.choice(bestactions)

"""
with open("agent.py", "w") as f:
    f.write(agent_code)


from google.colab import files
files.download("agent.py")








### Before submit
Test that your agent has the right attributes

In [None]:

env = gym.make('FrozenLake-v1', map_name="8x8")
from agent import Agent


agent = Agent(env)

observation, _ = env.reset()
reward, endingval, isItFalse, info = None, False, False, None
rewards = []

while not (endingval or isItFalse):
    action = agent.choose_action(
        observation,
        reward=reward,
        terminated=endingval,
        truncated=isItFalse,
        info=info
    )
    observation, reward, endingval, isItFalse, info = env.step(action)
    rewards.append(reward)

print(f"Reward  on one try: {sum(rewards)}")