# TP2 - Monte-Carlo

## Algorithme de Monte-Carlo Every-Visit

Nom: Ryan NAIDJI

In [21]:
# Définition des épisodes et des récompenses
episodes = [
    ["S(3,1)", "S(3,0)", "S(2,0)", "S(1,0)", "S(1,1)", "S(1,2)", "S(2,2)", "S(3,2)", "S(3,1)", "S(4,1)", "S(4,2)", "S(3,2)", "S(3,1)", "S(3,0)", "S(2,0)"],
    ["S(3,1)", "S(3,2)", "S(2,2)", "S(1,2)", "S(1,1)", "S(0,1)"],
    ["S(3,1)", "S(3,0)", "S(2,0)", "S(1,0)", "S(1,1)", "S(0,1)"]
]

rewards = {
    "S(0,0)": 0, "S(0,1)": 5, "S(0,2)": 0, "S(0,3)": 0,
    "S(1,0)": 2, "S(1,1)": 1, "S(1,2)": 3, "S(1,3)": 0,
    "S(2,0)": 2, "S(2,1)": 0, "S(2,2)": 1, "S(2,3)": 4,
    "S(3,0)": 1, "S(3,1)": 2, "S(3,2)": 3, "S(3,3)": 0,
    "S(4,0)": 0, "S(4,1)": 1, "S(4,2)": 1, "S(4,3)": 3
}

In [22]:
def calculate_episode_returns(episodes, rewards):
    episode_returns = []

    # Parcourir chaque épisode
    for episode in episodes:
        state_returns = {}
        
        # Identifier chaque visite d'un état et calculer les récompenses cumulatives
        for i, state in enumerate(episode):
            if state not in state_returns:
                state_returns[state] = []

            # Calculer la récompense cumulée depuis cette visite
            total_reward = sum(rewards[s] for s in episode[i:])
            state_returns[state].append(total_reward)

        # Calculer la moyenne des récompenses pour les états visités plusieurs fois
        for state, returns in state_returns.items():
            if len(returns) > 1:
                # Calculer la moyenne des récompenses pour cet état dans cet épisode
                state_returns[state] = sum(returns) / len(returns)
            else:
                state_returns[state] = returns[0]

        episode_returns.append(state_returns)

    return episode_returns

# Utilisation de la fonction définie
episode_returns = calculate_episode_returns(episodes, rewards)
episode_returns

[{'S(3,1)': 14.666666666666666,
  'S(3,0)': 14.0,
  'S(2,0)': 13.0,
  'S(1,0)': 22,
  'S(1,1)': 20,
  'S(1,2)': 19,
  'S(2,2)': 16,
  'S(3,2)': 11.5,
  'S(4,1)': 10,
  'S(4,2)': 9},
 {'S(3,1)': 15,
  'S(3,2)': 13,
  'S(2,2)': 10,
  'S(1,2)': 9,
  'S(1,1)': 6,
  'S(0,1)': 5},
 {'S(3,1)': 13,
  'S(3,0)': 11,
  'S(2,0)': 10,
  'S(1,0)': 8,
  'S(1,1)': 6,
  'S(0,1)': 5}]

In [23]:
def aggregate_episode_returns(episode_returns):
    aggregated_returns = {}

    # Parcourir chaque dictionnaire d'un épisode
    for episode in episode_returns:
        for state, return_value in episode.items():
            if state not in aggregated_returns:
                aggregated_returns[state] = []
            aggregated_returns[state].append(return_value)

    return aggregated_returns

# Calculer les retours agrégés sur tous les épisodes
aggregated_returns = aggregate_episode_returns(episode_returns)
aggregated_returns


{'S(3,1)': [14.666666666666666, 15, 13],
 'S(3,0)': [14.0, 11],
 'S(2,0)': [13.0, 10],
 'S(1,0)': [22, 8],
 'S(1,1)': [20, 6, 6],
 'S(1,2)': [19, 9],
 'S(2,2)': [16, 10],
 'S(3,2)': [11.5, 13],
 'S(4,1)': [10],
 'S(4,2)': [9],
 'S(0,1)': [5, 5]}

In [24]:
def calculate_average_returns(aggregated_returns):
    average_returns = {}

    # Calculer la moyenne pour chaque état
    for state, returns in aggregated_returns.items():
        average_returns[state] = sum(returns) / len(returns) if returns else 0

    return average_returns

# Utilisation de la fonction pour calculer les moyennes
average_returns = calculate_average_returns(aggregated_returns)
average_returns


{'S(3,1)': 14.222222222222221,
 'S(3,0)': 12.5,
 'S(2,0)': 11.5,
 'S(1,0)': 15.0,
 'S(1,1)': 10.666666666666666,
 'S(1,2)': 14.0,
 'S(2,2)': 13.0,
 'S(3,2)': 12.25,
 'S(4,1)': 10.0,
 'S(4,2)': 9.0,
 'S(0,1)': 5.0}