In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [112]:
alg_values = ["ppo", "a2c", "dqn"]
step_values = ["10000", "50000", "100000", "500000", "1000000"]
rew_values = ["break-and-follow", "break", "follow"]

base_path = "/home/ullmann/Documents/ml-work/comp6321-project/testing/round1/scores/"
ds_b = pd.read_csv(base_path + "b20.csv", sep=",")
ds_f = pd.read_csv(base_path + "f20.csv", sep=",")
ds_bf = pd.read_csv(base_path + "bf20.csv", sep=",")

def getDs(reward):
    if reward == "break-and-follow":
        return ds_bf
    elif reward == "break":
        return ds_b
    return ds_f
    
def printMaxMin(score_avgs, lives_avgs, combination):
    max_score = max(score_avgs)
    max_lives = max(lives_avgs)

    min_score = min(score_avgs)
    min_lives = min(lives_avgs)

    print("=====")
    print("Max score: %1.2f, from %s" % (max_score, combination[score_avgs.index(max_score)]))
    print("Max lives: %1.2f, from %s" % (max_lives, combination[lives_avgs.index(max_lives)]))
    print("=====")
    print("Min score: %1.2f, from %s" % (min_score, combination[score_avgs.index(min_score)]))
    print("Min lives: %1.2f, from %s" % (min_lives, combination[lives_avgs.index(min_lives)]))
    print(getTopX(np.array(score_avgs), 5))
    print(getTopX(np.array(lives_avgs), 5))
    
def getTopX(arr, x):
    return arr[np.argpartition(arr, -x)[-x:]]

In [113]:
# average by algorithm
score_avgs = []
lives_avgs = []
combination = []
for a in alg_values:
    for r in rew_values:
        ds = getDs(r)
        combination.append(a + " | " + r)
        print(combination[-1] + ":")
        filtered = ds[(ds.algorithm.str.contains(a))]
        
        s = filtered["score"].mean()
        l = filtered["lives"].mean()
        score_avgs.append(s)
        lives_avgs.append(l)
        
        
        print("Avg score: %1.2f" % s)
        print("Avg lives: %1.2f" % l)
        print("")
    print("=====")
    
printMaxMin(score_avgs, lives_avgs, combination)

ppo | break-and-follow:
Avg score: 102.60
Avg lives: 5.00

ppo | break:
Avg score: 43.70
Avg lives: 0.69

ppo | follow:
Avg score: 112.50
Avg lives: 4.50

=====
a2c | break-and-follow:
Avg score: 76.60
Avg lives: 3.14

a2c | break:
Avg score: 22.40
Avg lives: 0.17

a2c | follow:
Avg score: 71.30
Avg lives: 2.89

=====
dqn | break-and-follow:
Avg score: 44.60
Avg lives: 1.14

dqn | break:
Avg score: 10.90
Avg lives: 0.06

dqn | follow:
Avg score: 46.10
Avg lives: 2.21

=====
=====
Max score: 112.50, from ppo | follow
Max lives: 5.00, from ppo | break-and-follow
=====
Min score: 10.90, from dqn | break
Min lives: 0.06, from dqn | break
[ 46.1  71.3  76.6 112.5 102.6]
[2.21 2.89 3.14 4.5  5.  ]


In [114]:
# average by steps
score_avgs = []
lives_avgs = []
combination = []
for step in step_values:
    for r in rew_values:
        ds = getDs(r)
        combination.append(str(step) + " | " + r)
        print(combination[-1] + ":")
        filtered = ds[(ds.algorithm.str.contains("_" + str(step)))]

        s = filtered["score"].mean()
        l = filtered["lives"].mean()
        score_avgs.append(s)
        lives_avgs.append(l)
        
        
        print("Avg score: %1.2f" % s)
        print("Avg lives: %1.2f" % l)
        print("")
    print("=====")
    
printMaxMin(score_avgs, lives_avgs, combination)

10000 | break-and-follow:
Avg score: 75.94
Avg lives: 3.24

10000 | break:
Avg score: 24.06
Avg lives: 0.41

10000 | follow:
Avg score: 68.72
Avg lives: 3.04

=====
50000 | break-and-follow:
Avg score: 72.58
Avg lives: 2.87

50000 | break:
Avg score: 28.08
Avg lives: 0.15

50000 | follow:
Avg score: 88.50
Avg lives: 3.44

=====
100000 | break-and-follow:
Avg score: 79.42
Avg lives: 3.22

100000 | break:
Avg score: 13.08
Avg lives: 0.30

100000 | follow:
Avg score: 66.42
Avg lives: 2.61

=====
500000 | break-and-follow:
Avg score: 84.83
Avg lives: 3.43

500000 | break:
Avg score: 43.67
Avg lives: 0.27

500000 | follow:
Avg score: 90.50
Avg lives: 4.08

=====
1000000 | break-and-follow:
Avg score: 90.33
Avg lives: 3.10

1000000 | break:
Avg score: 25.33
Avg lives: 0.60

1000000 | follow:
Avg score: 33.83
Avg lives: 1.25

=====
=====
Max score: 90.50, from 500000 | follow
Max lives: 4.08, from 500000 | follow
=====
Min score: 13.08, from 100000 | break
Min lives: 0.15, from 50000 | break


In [111]:
# average by alg and steps
score_avgs = []
lives_avgs = []
combination = []
for a in alg_values:
    for step in step_values:
        for r in rew_values:
            ds = getDs(r)
            combination.append(str(step) + " | " + a + " | " + r)
            #print(combination[-1] + ":")
            filtered = ds[(ds.algorithm.str.contains("_" + str(step)) & ds.algorithm.str.contains(a))]

            s = filtered["score"].mean()
            l = filtered["lives"].mean()
            score_avgs.append(s)
            lives_avgs.append(l)

            #print("Avg score: %1.2f" % s)
            #print("Avg lives: %1.2f" % l)
            #print("")
        #print("=====")
    
printMaxMin(score_avgs, lives_avgs, combination)

=====
Max score: 130.25, from 50000 | ppo | follow
Max lives: 5.00, from 10000 | ppo | break-and-follow
=====
Min score: 0.00, from 1000000 | a2c | follow
Min lives: 0.00, from 1000000 | a2c | follow
[104.25 107.   115.   122.5  130.25]
[4.25 4.5  5.   5.   5.   5.   4.95 5.   4.5  5.  ]


In [107]:
130.25 / 7

18.607142857142858