# Plot Comparison Between Algorithms

Expects the input data to contain CSV files containing rewards per timestep

In [1]:
import os
import numpy as np

%reload_ext autoreload
%autoreload 2

We need to read the CSV files (from a function in another file) to get the reward at each timestep for each run of each algorithm. Only the `dataPath` directories will be loaded.

`load_data` loads the CSV files containing rewards as a python list of Pandas DataFrames.

`dataPath` contains the exact path of the directories containing the CSV files. This path is relative to the `data` directory. It assumes every element will be path for a different algorithm. It will overwrite if two paths are for different parameter settings of the same algorithm.

Expects there to be more than 1 input CSV file.

In [2]:
dataPath = {'esarsa': 'esarsa/esarsa',
#             'dqn-adam-delay0': 'dqn/alpha=0.001_buffer-size=500_buffer-type=random_decreasing-epsilon=None_delays=0_dqn-adamBeta1=0.9_dqn-adamBeta2=0.999_dqn-adamEps=1e-08_dqn-batch=16_dqn-hidden=128,128_dqn-sync=32_enable-debug=0_epsilon=0.1_gamma=0.9_min-epsilon=0.1_state-len=4/',
#             'dqn-sgd-delay0': 'dqn/sgd-momentum0.9/alpha-0.001_buffer-size-500_buffer-type-random_decreasing-epsilon-None_dqn-adamBeta1-0.9_dqn-adamBeta2-0.999_dqn-adamEps-1e-08_dqn-batch-16_dqn-hidden-128,128_dqn-momentum-0.9_dqn-sync-32_enable-debug-0_epsilon-0.1_gamma-0.9_min-epsilon-0.1_state-len-4/',
            'dqn-adam-sweep0': 'dqn/adam_sweep/alpha-0.01_buffer-size-500_buffer-type-random_decreasing-epsilon-None_dqn-adamBeta1-0.9_dqn-adamBeta2-0.999_dqn-adamEps-1e-08_dqn-batch-16_dqn-hidden-128,128_dqn-momentum-0.9_dqn-sync-32_enable-debug-0_epsilon-0.1_gamma-0.9_min-epsilon-0.1_state-len-4',
            'dqn-adam-sweep1': 'dqn/adam_sweep/alpha-0.003_buffer-size-500_buffer-type-random_decreasing-epsilon-None_dqn-adamBeta1-0.9_dqn-adamBeta2-0.999_dqn-adamEps-1e-08_dqn-batch-16_dqn-hidden-128,128_dqn-momentum-0.9_dqn-sync-32_enable-debug-0_epsilon-0.1_gamma-0.9_min-epsilon-0.1_state-len-4',
            'dqn-adam-sweep2': 'dqn/adam_sweep/alpha-0.001_buffer-size-500_buffer-type-random_decreasing-epsilon-None_dqn-adamBeta1-0.9_dqn-adamBeta2-0.999_dqn-adamEps-1e-08_dqn-batch-16_dqn-hidden-128,128_dqn-momentum-0.9_dqn-sync-32_enable-debug-0_epsilon-0.1_gamma-0.9_min-epsilon-0.1_state-len-4',
            'dqn-adam-sweep3': 'dqn/adam_sweep/alpha-0.0003_buffer-size-500_buffer-type-random_decreasing-epsilon-None_dqn-adamBeta1-0.9_dqn-adamBeta2-0.999_dqn-adamEps-1e-08_dqn-batch-16_dqn-hidden-128,128_dqn-momentum-0.9_dqn-sync-32_enable-debug-0_epsilon-0.1_gamma-0.9_min-epsilon-0.1_state-len-4',
            'dqn-adam-sweep4': 'dqn/adam_sweep/alpha-0.0001_buffer-size-500_buffer-type-random_decreasing-epsilon-None_dqn-adamBeta1-0.9_dqn-adamBeta2-0.999_dqn-adamEps-1e-08_dqn-batch-16_dqn-hidden-128,128_dqn-momentum-0.9_dqn-sync-32_enable-debug-0_epsilon-0.1_gamma-0.9_min-epsilon-0.1_state-len-4',
            'dqn-adam-sweep5': 'dqn/adam_sweep/alpha-3e-05_buffer-size-500_buffer-type-random_decreasing-epsilon-None_dqn-adamBeta1-0.9_dqn-adamBeta2-0.999_dqn-adamEps-1e-08_dqn-batch-16_dqn-hidden-128,128_dqn-momentum-0.9_dqn-sync-32_enable-debug-0_epsilon-0.1_gamma-0.9_min-epsilon-0.1_state-len-4',
           }

basePath = '../data/'

#algorithms = [dataPath[i].split('/')[0] for i in range(len(dataPath))]

Data = {}

from loadFromRewards import load_data

for alg in dataPath.keys():
    print(alg)
    if os.path.isdir(basePath + dataPath[alg]) == True:
        Data[alg] = load_data(basePath+dataPath[alg])

print('Data will be stored for', ', '.join([k for k in Data.keys()]))
print('Loaded the rewards data from the csv files')

esarsa
dqn-adam-sweep0
dqn-adam-sweep1
dqn-adam-sweep2
dqn-adam-sweep3
dqn-adam-sweep4
dqn-adam-sweep5
Data will be stored for esarsa, dqn-adam-sweep0, dqn-adam-sweep1, dqn-adam-sweep2, dqn-adam-sweep3, dqn-adam-sweep4, dqn-adam-sweep5
Loaded the rewards data from the csv files


The rewards can be transformed into the following values of transformation =
1. 'Returns'
2. 'Failures'
3. 'Average-Rewards'
4. 'Rewards' (no change)

----------------------------------------------------------------------------------------------

There is an additional parameter of window which can be any non-negative integer. It is used for the 'Average-Rewards' transformation to maintain a moving average over a sliding window. By default window is 0.

- If window is 500 and timesteps are 10000, then the first element is the average of the performances of timesteps from 1 - 500. The second element is the average of the performances of timesteps from 2 - 501. The last element is the average of the performances of timesteps from 9501 - 10000.

----------------------------------------------------------------------------------------------

`transform_data` transforms the absolute failure timesteps (python list of Pandas DataFrames) into the respective `transformation` (a numpy array of numpy arrays) for plotting

In [3]:
plottingData = {}

from loadFromRewards import transform_data

transformation = 'Returns'
window = 2500

for alg, data in Data.items():
    plottingData[alg] = transform_data(alg, data, transformation, window)

print('Data will be plotted for', ', '.join([k for k in plottingData.keys()]))
print('The stored rewards are transformed to: ', transformation)

0 esarsa
10 esarsa
20 esarsa
30 esarsa
40 esarsa
50 esarsa
0 dqn-adam-sweep0
0 dqn-adam-sweep1
0 dqn-adam-sweep2
0 dqn-adam-sweep3
0 dqn-adam-sweep4
0 dqn-adam-sweep5
Data will be plotted for esarsa, dqn-adam-sweep0, dqn-adam-sweep1, dqn-adam-sweep2, dqn-adam-sweep3, dqn-adam-sweep4, dqn-adam-sweep5
The stored rewards are transformed to:  Returns


Here, we can plot the following statistics:

1. Mean of all the runs

2. Median run

3. Run with the best performance (highest return, or equivalently least failures)

4. Run with the worst performance (lowest return, or equivalently most failures)

5. Mean along with the confidence interval (Currently, plots the mean along with 95% confidence interval, but should be changed to make it adaptive to any confidence interval)

6. Mean along with percentile regions (Plots the mean and shades the region between the run with the lower percentile and the run with the upper percentile)

----------------------------------------------------------------------------------------------

Details:

plotBest, plotWorst, plotMeanAndPercentileRegions sort the performances based on their final performance

                                   ----------------------------------------------------

Mean, Median, MeanAndConfidenceInterval are all symmetric plots so 'Failures' does not affect their plots
    
Best, Worst, MeanAndPercentileRegions are all asymmetric plots so 'Failures' affects their plots, and has to be treated in the following way:   

                                   ----------------------------------------------------

1. plotBest for Returns will plot the run with the highest return (least failures)
   plotBest for Failures will plot the run with the least failures and not the highest failures

2. plotWorst for Returns will plot the run with the lowest return (most failures)
   plotWorst for Failures will plot the run with the most failures and not the least failures

3. plotMeanAndPercentileRegions for Returns uses the lower variable to select the run with the 'lower' percentile and uses the upper variable to select the run with the 'upper' percentile
   plotMeanAndPercentileRegions for Failures uses the lower variable along with some calculations to select the run with 'upper' percentile and uses the upper variable along with some calculations to select the run with the 'lower' percentile 
    
----------------------------------------------------------------------------------------------

Caution:
- Jupyter notebooks (mostly) or matplotlib gives an error when displaying very dense plots. For example: plotting best and worst case for transformation of 'Rewards' for 'example' algorithm, or when trying to zoom into dense plots. Most of the plots for 'Rewards' and 'example' fail.

In [4]:
from stats import getMean, getMedian, getBest, getWorst, getConfidenceIntervalOfMean, getRegion

# Add color, linestyles as needed

def plotMean(xAxis, data, color):
    mean = getMean(data)
    plt.plot(xAxis, mean, label=alg+'-mean', color=color)

def plotMedian(xAxis, data, color):
    median = getMedian(data)
    plt.plot(xAxis, median, label=alg+'-median', color=color)

def plotBest(xAxis, data, transformation, color):
    best = getBest(data, transformation)
    plt.plot(xAxis, best, label=alg+'-best', color=color)

def plotWorst(xAxis, data, transformation, color):
    worst = getWorst(data,  transformation)
    plt.plot(xAxis, worst, label=alg+'-worst', color=color)

def plotMeanAndConfidenceInterval(xAxis, data, confidence, color):
    plotMean(xAxis, data, color=color)
    lowerBound, upperBound = getConfidenceIntervalOfMean(data, confidence)
    plt.fill_between(xAxis, lowerBound, upperBound, alpha=0.25, color=color)

def plotMeanAndPercentileRegions(xAxis, data, lower, upper, transformation, color):
    plotMean(xAxis, data, color)
    lowerRun, upperRun = getRegion(data, lower, upper, transformation)
    plt.fill_between(xAxis, lowerRun, upperRun, alpha=0.25, color=color)

Details:

- X axis for 'Average-Rewards' will start from 'window' timesteps and end with the final timesteps

- Need to add color (shades), linestyle as per requirements

- Currently plot one at a time by commenting out the others otherwise, it displays different colors for all.


In [5]:
# For saving figures
#%matplotlib inline

# For plotting in the jupyter notebook
%matplotlib notebook         

import matplotlib.pyplot as plt

colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

for alg, data in plottingData.items():
    lenRun = len(data[0])
    xAxis = np.array([i for i in range(1,lenRun+1)])
    
    if transformation == 'Average-Rewards':
        xAxis += (window-1)
    
    if alg == 'esarsa':
        color = colors[0]
    elif alg == 'hand':
        color = colors[1]
    elif alg.split("-")[0] == "dqn":
        color = colors[2+int(alg.split('sweep')[1])]
    
    plotMean(xAxis, data, color=color)

    #plotMedian(xAxis, data, color=color)
    
    #plotBest(xAxis, data, transformation=transformation, color=color)
    
    #plotWorst(xAxis, data, transformation=transformation, color=color)
    
    #plotMeanAndConfidenceInterval(xAxis, data, confidence=0.95, color=color)
    
    #plotMeanAndPercentileRegions(xAxis, data, lower=0.025, upper=0.975, transformation=transformation, color=color)

    
#plt.title('Rewards averaged with sliding window of 1000 timesteps across 100 runs', pad=25, fontsize=10)
plt.xlabel('Timesteps', labelpad=35)
plt.ylabel(transformation, rotation=0, labelpad=45)
plt.rcParams['figure.figsize'] = [8, 5.33]
plt.legend(loc=0)
plt.yticks()
plt.xticks()
plt.tight_layout()

#plt.savefig('../img/'+transformation+'.png',dpi=500, bbox_inches='tight')

<IPython.core.display.Javascript object>