# Training Reinforcement Learning Agent
This notebook contains all the necessary code to train different policies and compare them to analyze the performance of the agent.

## Imports and global settings
The following section imports all necessary modules to train the agent and sets global settings.

In [None]:
from monte_carlo_eval import eval
import pandas as pd

Afterwards we remove and add a directory which is used to save the metrics for a trainings-run.

In [None]:
!rm -r ./performance-logs
!mkdir ./performance-logs

The following lines of code set the global parameters for the Monte Carlo simulation. The parameters are:

In [None]:
num_episodes = 10_000
checkpoint = 200

Lastly, the following method performs the evaluation and saves them into dataframes for further analysis.

In [None]:
def addMetrics(map, epsilon, checkpoint, df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness):
    # Evaluate the performance of the agent
    total_time, epoch_times, overall_effectiveness, checkpoint_effectiveness = eval(map, epsilon, num_episodes, checkpoint)

    # Add the performance metrics to the dataframes
    df_total_times = pd.concat([pd.DataFrame([[map, epsilon, total_time]], columns=df_total_times.columns), df_total_times], ignore_index=True)
    df_overall_performance = pd.concat([pd.DataFrame([[map, epsilon, overall_effectiveness]], columns=df_overall_performance.columns), df_overall_performance], ignore_index=True)
    for i, epoch_time in enumerate(epoch_times):
        df_epoch_times = pd.concat([pd.DataFrame([[map, epsilon, i+1, epoch_time]], columns=df_epoch_times.columns), df_epoch_times], ignore_index=True)

    for i, epoch_time in enumerate(checkpoint_effectiveness):
        df_checkpoint_effectiveness = pd.concat([pd.DataFrame([[map, epsilon, (i+1)*checkpoint, epoch_time]], columns=df_checkpoint_effectiveness.columns), df_checkpoint_effectiveness], ignore_index=True)
    
    return df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness 

## Map 1
This section performs the evaluation of the reinforcement agent for `maps/map1.txt` for epsilon `0.9`, `0.7` and `0.5` respectively.

In [None]:
df_total_times = pd.DataFrame(columns = ['map', 'epsilon', 'total_time'])
df_epoch_times = pd.DataFrame(columns = ['map', 'epsilon', 'episode', 'epoch_time'])
df_overall_performance = pd.DataFrame(columns = ['map', 'epsilon', 'overall_performance'])
df_checkpoint_effectiveness = pd.DataFrame(columns = ['map', 'epsilon', 'episode', 'checkpoint_effectiveness'])

In [None]:
map_name = "map1"

### Epsilon 0.9

In [None]:
df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness = addMetrics(map_name, 0.9, checkpoint, df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness)

### Epsilon 0.7

In [None]:
df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness = addMetrics(map_name, 0.7, checkpoint, df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness)

### Epsilon 0.5

In [None]:
df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness = addMetrics(map_name, 0.5, checkpoint, df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness)

### Caching of results

In [None]:
df_total_times.to_csv(f'./performance-logs/df_total_times_{map_name}.csv', index=False)
df_epoch_times.to_csv(f'./performance-logs/df_epoch_times_{map_name}.csv', index=False)
df_overall_performance.to_csv(f'./performance-logs/df_overall_performance_{map_name}.csv', index=False)
df_checkpoint_effectiveness.to_csv(f'./performance-logs/df_checkpoint_effectiveness_{map_name}.csv', index=False)

## Map 2
This section performs the evaluation of the reinforcement agent for `maps/map2.txt` for epsilon `0.9`, `0.7` and `0.5` respectively.

In [None]:
df_total_times = pd.DataFrame(columns = ['map', 'epsilon', 'total_time'])
df_epoch_times = pd.DataFrame(columns = ['map', 'epsilon', 'episode', 'epoch_time'])
df_overall_performance = pd.DataFrame(columns = ['map', 'epsilon', 'overall_performance'])
df_checkpoint_effectiveness = pd.DataFrame(columns = ['map', 'epsilon', 'episode', 'checkpoint_effectiveness'])

In [None]:
map_name = "map2"

### Epsilon 0.9

In [None]:
df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness = addMetrics(map_name, 0.9, checkpoint, df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness)

### Epsilon 0.7

In [None]:
df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness = addMetrics(map_name, 0.7, checkpoint, df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness)

### Epsilon 0.5

In [None]:
df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness = addMetrics(map_name, 0.5, checkpoint, df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness)

### Caching of results

In [None]:
df_total_times.to_csv(f'./performance-logs/df_total_times_{map_name}.csv', index=False)
df_epoch_times.to_csv(f'./performance-logs/df_epoch_times_{map_name}.csv', index=False)
df_overall_performance.to_csv(f'./performance-logs/df_overall_performance_{map_name}.csv', index=False)
df_checkpoint_effectiveness.to_csv(f'./performance-logs/df_checkpoint_effectiveness_{map_name}.csv', index=False)

## Map 3
This section performs the evaluation of the reinforcement agent for `maps/map3.txt` for epsilon `0.9`, `0.7` and `0.5` respectively.

In [None]:
df_total_times = pd.DataFrame(columns = ['map', 'epsilon', 'total_time'])
df_epoch_times = pd.DataFrame(columns = ['map', 'epsilon', 'episode', 'epoch_time'])
df_overall_performance = pd.DataFrame(columns = ['map', 'epsilon', 'overall_performance'])
df_checkpoint_effectiveness = pd.DataFrame(columns = ['map', 'epsilon', 'episode', 'checkpoint_effectiveness'])

In [None]:
map_name = "map3"

### Epsilon 0.9

In [None]:
df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness = addMetrics(map_name, 0.9, checkpoint, df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness)

### Epsilon 0.7

In [None]:
df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness = addMetrics(map_name, 0.7, checkpoint, df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness)

### Epsilon 0.5

In [None]:
df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness = addMetrics(map_name, 0.5, checkpoint, df_total_times, df_epoch_times, df_overall_performance, df_checkpoint_effectiveness)

### Caching of results

In [None]:
df_total_times.to_csv(f'./performance-logs/df_total_times_{map_name}.csv', index=False)
df_epoch_times.to_csv(f'./performance-logs/df_epoch_times_{map_name}.csv', index=False)
df_overall_performance.to_csv(f'./performance-logs/df_overall_performance_{map_name}.csv', index=False)
df_checkpoint_effectiveness.to_csv(f'./performance-logs/df_checkpoint_effectiveness_{map_name}.csv', index=False)

## Performance evaluation
This section plots and compares all the different performance metrics to get a better understanding of our implementation.

### Data Preparation
This section combines all the previously generated dataframes to be able to work with all collected datapoints.

In [None]:
# work in progress