In [154]:
import plotly.graph_objects as go
import numpy as np
import os

logs_path = "/Users/danielemateria/Desktop/q-learning-maze-robot/logs"
log_file_paths = os.listdir(logs_path)
log_file_paths = [os.path.join(logs_path, file) for file in log_file_paths if file.endswith('.log')]
log_file_paths.sort()

def extract_rewards_from_log(log_file_paths):
    rewards = np.array([])
    for log_file_path in log_file_paths:
        with open(log_file_path, 'r') as file:
            for line in file:
                if "|" in line:
                    reward = line.split("|")[1].strip()
                    reward = float(reward.split(":")[-1])
                    rewards = np.append(rewards, reward)
    return rewards

def plot_rewards_plotly(rewards, window=50, marker_every=20):
    episodes = np.arange(1, len(rewards) + 1)
    fig = go.Figure()
    # Linea principale
    fig.add_trace(go.Scatter(
        x=episodes, y=rewards,
        mode='lines',
        name='Reward per Episode',
        line=dict(color='royalblue', width=2),
        opacity=0.7
    ))
    # Marker ogni marker_every episodi
    marker_idx = np.arange(0, len(rewards), marker_every)
    fig.add_trace(go.Scatter(
        x=episodes[marker_idx], y=rewards[marker_idx],
        mode='markers',
        name=f'Marker every {marker_every} episodes',
        marker=dict(color='royalblue', size=7, symbol='circle')
    ))
    # Media mobile
    if len(rewards) >= window:
        moving_avg = np.convolve(rewards, np.ones(window)/window, mode='valid')
        fig.add_trace(go.Scatter(
            x=episodes[window-1:], y=moving_avg,
            mode='lines',
            name=f'Moving Avg ({window})',
            line=dict(color='orange', width=3)
        ))
    fig.update_layout(
        title='Training Rewards per Episode',
        xaxis_title='Episode',
        yaxis_title='Reward',
        template='plotly_white',
        legend=dict(font=dict(size=12)),
        width=1000, height=500
    )
    fig.add_hline(y=0, line_dash="dash", line_color="red")
    fig.show()

rewards = extract_rewards_from_log(log_file_paths)
print("Total episodes: ", len(rewards))
print("Last episode reward: ", rewards[-1])
print("Rewards over -500: ", len(rewards[rewards > -500]))
plot_rewards_plotly(rewards)

Total episodes:  3115
Last episode reward:  -4627.5
Rewards over -500:  1
