# Cap X - Gráficos de Média e Desvio Padrão com Stable-baselines

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pablo-sampaio/rl_facil/blob/main/capExtra/capX-plot-mean.ipynb)

Vamos comparar os algoritmos **A2C** e **DQN** em várias execuções neste Google Colab.

In [None]:
assert 'google.colab' in sys.modules, 'Notebook criado para o Colab'
import sys

from IPython.display import clear_output
!apt-get install ffmpeg freeglut3-dev xvfb  # For visualization
!pip install "stable-baselines3[extra]==2.0.0"

# clone repository
!git clone https://github.com/pablo-sampaio/rl_facil
sys.path.append("/content/rl_facil")

#clear_output()

In [None]:
!mkdir log_dir

## Imports

In [None]:
import gymnasium as gym
import numpy as np
import tensorboard

%load_ext tensorboard

import stable_baselines3
stable_baselines3.__version__

In [None]:
from stable_baselines3 import A2C, DQN

## 1 - Visualização no Tensorboard

In [None]:
%tensorboard --logdir log_dir

## 2 - Executar Vários Treinamentos com cada Algoritmo


In [None]:
NUM_RUNS = 3
TOTAL_TRAINING_STEPS = 30_000
ENVIRONMENT_ID = "CartPole-v1"

In [None]:
for x in range(NUM_RUNS):
    model3 = A2C('MlpPolicy', ENVIRONMENT_ID, n_steps=16, ent_coef=0.01, verbose=0, tensorboard_log="log_dir").learn(TOTAL_TRAINING_STEPS)

In [None]:
for x in range(NUM_RUNS):
    model2 = DQN('MlpPolicy', ENVIRONMENT_ID, buffer_size=10_000, learning_starts=2_000, target_update_interval=2_500, tensorboard_log="log_dir", verbose=1).learn(TOTAL_TRAINING_STEPS)

## 3 - Mostrar Gráficos

### 3.1 Definições necessárias

Funções auxiliares.

In [None]:
import os
import tensorflow as tf
from tensorboard.backend.event_processing import event_accumulator

def load_data_from_logs(log_dir, data_tag):
    event_acc = event_accumulator.EventAccumulator(log_dir)
    event_acc.Reload()

    # Get a list of all available tags (usually scalars)
    tags = event_acc.Tags()['scalars']

    data = []
    for event in event_acc.Scalars(data_tag):
        data.append((event.step, event.value))

    return data

def process_grandchild_folders(root_dir, subfolder_prefix, data_tag):
    collected_data = []
    for parent_dir, _, grandchild_dirs in os.walk(root_dir):
        parent_folder = os.path.basename(parent_dir)
        if parent_folder.startswith(subfolder_prefix):
            for grandchild_dir in grandchild_dirs:
                log_dir = os.path.join(parent_dir, grandchild_dir)
                print(f"Loading data from {log_dir}")
                data = load_data_from_logs(log_dir, data_tag)
                collected_data.append(data)
    return collected_data

In [None]:
# Just a test
#root_folder = 'log_dir/'
#all_data = process_grandchild_folders(root_folder, 'DQN', 'rollout/ep_len_mean')
#all_data[0]

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_mean_std_curve(alg_prefix):
  # Assuming you have loaded the data for a specific tag as a list of lists
  # data = [
  #     [(timestep1, value1), (timestep2, value2), ...],
  #     [(timestep1, value1), (timestep2, value2), ...],
  # ]

  root_folder = 'log_dir/'
  all_data = process_grandchild_folders(root_folder, alg_prefix, 'rollout/ep_len_mean')

  # Create a dictionary to organize data by timestep
  timestep_data = {}

  for run_values in all_data:
      for timestep, value in run_values:
          if timestep not in timestep_data:
              timestep_data[timestep] = []
          timestep_data[timestep].append(value)

  # Calculate mean and standard deviation for each timestep
  mean_values = []
  std_dev_values = []

  for timestep, values_at_timestep in timestep_data.items():
      mean_values.append(np.mean(values_at_timestep))
      std_dev_values.append(np.std(values_at_timestep))

  # Create the plot
  plt.figure(figsize=(10, 6))
  plt.plot(sorted(timestep_data.keys()), mean_values, label='Mean')
  plt.fill_between(sorted(timestep_data.keys()), np.subtract(mean_values, std_dev_values), np.add(mean_values, std_dev_values), alpha=0.2, label='Std. Dev.')

  plt.xlabel('Timestep')
  plt.ylabel('Value')
  plt.title('Mean and Standard Deviation Plot')
  plt.legend()
  plt.grid(True)
  plt.tight_layout()

  plt.show()

### 3.2 Desenha os Gráficos



In [None]:
plot_mean_std_curve('A2C')

In [None]:
plot_mean_std_curve('DQN')