# Analyse the results

In [8]:
import os
import numpy as np
from collections import defaultdict

In [9]:
def list_dir(path):
    return list(filter(lambda f: not f.startswith('.'), os.listdir(path)))

def read_results(path):
    results = []
    with open(path, 'r') as f:
        for l in f.readlines():
            r = float(l)
            results.append(r)
    return results

In [13]:
return_types = set()
step_sizes_types = set()

results_folder = "results"
seeds = list_dir(results_folder)

results_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

# Structure is: seed/return_function/num_steps/learning_rate.txt
for s in seeds:
    seed_folder = os.path.join(results_folder, str(s))
    returns = list_dir(seed_folder)
    # various return types
    for r in returns:
        return_types.add(r)
        
        returns_folder = os.path.join(seed_folder, str(r))
        step_sizes = list_dir(returns_folder)
        # various step sizes
        for ns in step_sizes:
            step_sizes_types.add(ns)
            
            step_sizes_folder = os.path.join(returns_folder, str(ns))
            files = list_dir(step_sizes_folder)
            # each file has a different learning rate
            for file in files:
                # get the number after 0.
                learning_rate = file.split(".")[1]
                filepath = os.path.join(step_sizes_folder, file)
                results = read_results(filepath)
                
                # we append all the seeds to the same list
                for result in results:
                    results_dict[r][ns][learning_rate].append(result)

In [14]:
print(results_dict["GAE"])

defaultdict(<function <lambda>.<locals>.<lambda> at 0x1078f4c80>, {})


In [15]:
print(results_dict["Q"]["1"])

defaultdict(<class 'list'>, {'0009': [51.2, 24.7, 77.2, 107.8, 99.9, 95.7, 85.8, 50.8, 39.1, 36.3, 37.2, 37.1, 38.4, 33.1, 51.0, 39.3, 32.0, 102.6, 55.4, 19.8, 61.7, 58.8, 50.6, 44.6, 26.4, 20.2, 19.5, 44.8, 53.2, 30.7, 26.8, 21.0, 26.0, 26.1, 24.1, 21.7, 23.4, 20.7, 46.3, 36.3, 27.1, 43.5, 96.8, 39.2, 30.2, 20.8, 19.6, 21.8, 26.3, 22.8], '01': [21.6, 30.4, 18.9, 24.9, 18.8, 24.7, 20.2, 54.0, 39.5, 51.4, 55.6, 22.3, 44.8, 69.8, 27.2, 47.6, 26.2, 33.2, 23.7, 19.8, 24.8, 22.0, 27.0, 38.9, 60.6, 21.3, 17.4, 47.4, 19.1, 72.0, 17.2, 37.2, 28.1, 22.8, 28.9, 22.1, 16.5, 11.0, 13.7, 15.8, 98.3, 38.8, 30.0, 64.2, 101.1, 31.5, 26.6, 19.9, 135.4, 155.5], '009': [35.0, 28.7, 24.9, 20.6, 20.9, 31.0, 43.7, 63.7, 96.8, 168.0, 92.9, 139.7, 96.1, 85.7, 55.3, 58.8, 21.9, 194.0, 20.9, 19.2, 56.1, 26.7, 139.4, 14.5, 24.7, 28.5, 23.4, 40.2, 19.7, 44.4, 24.6, 29.2, 21.6, 21.5, 21.1, 72.5, 42.6, 26.0, 32.4, 34.6, 90.0, 137.6, 23.0, 41.7, 35.2, 27.3, 28.8, 20.4, 42.7, 46.2], '07': [25.8, 28.7, 36.2, 32.1, 29.

## Find the best learning rate for every step size and return combination

In [16]:
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

# print(color.BOLD + 'Hello World !' + color.END)

In [27]:
for r in return_types:
    best_num_steps = 0
    best_global_mean = 0
    best_global_lr = 0
    
    for ns in step_sizes_types:
        best_lr = 0
        best_mean = 0
        
        for lr in results_dict[r][ns].keys():
            results = np.array(results_dict[r][ns][lr])
            
            mean = np.mean(results)
            median = np.median(results)
            std = np.std(results)
            print(f"Return: {r}\tnum_steps: {ns}\tlr: {lr}\tmean: {mean:.2f}\tmedian: {median}\tstd: {std:.2f}")
            
            # track which learning rate is best for this num_steps
            if mean > best_mean:
                best_mean = mean
                best_lr = lr

        # track which num_steps is best
        if best_mean > best_global_mean:
            best_num_steps = ns
            best_global_mean = best_mean
            best_global_lr = best_lr
        print(f"Finished for return: {r}, num_steps: {ns}")    
        print(f"Final result for num_steps: {ns}\treturn: {r}\tnum_steps: {ns}\t\tBest lr: {best_lr}\tBest mean: {best_mean:.2f}")
    
    print("\n"+"#"*30)    
    print(f"Finished for return: {r}")
    print(color.BOLD+f"Final result for return: {r}\tBest num_steps: {best_num_steps}\tBest lr: {best_global_lr}\tBest mean: {best_global_mean:.2f}"+color.END)
    print("#"*30+"\n")

Return: Q	num_steps: 10	lr: 0009	mean: 35.89	median: 32.9	std: 13.05
Return: Q	num_steps: 10	lr: 01	mean: 35.95	median: 26.2	std: 19.66
Return: Q	num_steps: 10	lr: 009	mean: 60.93	median: 54.55	std: 24.76
Return: Q	num_steps: 10	lr: 007	mean: 52.87	median: 45.45	std: 23.34
Return: Q	num_steps: 10	lr: 005	mean: 47.05	median: 41.15	std: 22.19
Return: Q	num_steps: 10	lr: 001	mean: 32.75	median: 28.5	std: 12.04
Return: Q	num_steps: 10	lr: 003	mean: 43.35	median: 41.95	std: 16.37
Return: Q	num_steps: 10	lr: 0003	mean: 52.84	median: 53.3	std: 13.67
Return: Q	num_steps: 10	lr: 0001	mean: 42.96	median: 46.25	std: 13.45
Return: Q	num_steps: 10	lr: 0005	mean: 46.03	median: 44.2	std: 11.64
Finished for return: Q, num_steps: 10
Final result for num_steps: 10	return: Q	num_steps: 10		Best lr: 009	Best mean: 60.93
Return: Q	num_steps: 1	lr: 0009	mean: 42.91	median: 36.7	std: 24.17
Return: Q	num_steps: 1	lr: 01	mean: 38.79	median: 27.1	std: 29.51
Return: Q	num_steps: 1	lr: 009	mean: 51.09	median: 33.