In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
alg_values = ["ppo", "a2c", "dqn"]
step_values = ["10000", "50000", "100000", "500000", "1000000"]
rew_values = ["break-and-follow", "break", "follow"]

base_path = "/home/ullmann/Documents/ml-work/comp6321-project/testing/round2/scores/"
ds_b = pd.read_csv(base_path + "b20.csv", sep=",")
ds_f = pd.read_csv(base_path + "f20.csv", sep=",")
ds_bf = pd.read_csv(base_path + "bf20.csv", sep=",")

def getDs(reward):
    if reward == "break-and-follow":
        return ds_bf
    elif reward == "break":
        return ds_b
    return ds_f
    
def printMaxMin(score_avgs, lives_avgs, combination):
    max_score = max(score_avgs)
    max_lives = max(lives_avgs)

    min_score = min(score_avgs)
    min_lives = min(lives_avgs)

    print("=====")
    print("Max score: %1.2f, from %s" % (max_score, combination[score_avgs.index(max_score)]))
    print("Max lives: %1.2f, from %s" % (max_lives, combination[lives_avgs.index(max_lives)]))
    print("=====")
    print("Min score: %1.2f, from %s" % (min_score, combination[score_avgs.index(min_score)]))
    print("Min lives: %1.2f, from %s" % (min_lives, combination[lives_avgs.index(min_lives)]))
    print(getTopX(np.array(score_avgs), 5))
    print(getTopX(np.array(lives_avgs), 5))
    
def getTopX(arr, x):
    return arr[np.argpartition(arr, -x)[-x:]]

In [3]:
# average by algorithm
score_avgs = []
lives_avgs = []
combination = []
for a in alg_values:
    for r in rew_values:
        ds = getDs(r)
        combination.append(a + " | " + r)
        print(combination[-1] + ":")
        filtered = ds[(ds.algorithm.str.contains(a))]
        
        s = filtered["score"].mean()
        l = filtered["lives"].mean()
        score_avgs.append(s)
        lives_avgs.append(l)
        
        
        print("Avg score: %1.2f" % s)
        print("Avg lives: %1.2f" % l)
        print("")
    print("=====")
    
printMaxMin(score_avgs, lives_avgs, combination)

ppo | break-and-follow:
Avg score: 110.60
Avg lives: 4.38

ppo | break:
Avg score: 28.40
Avg lives: 0.60

ppo | follow:
Avg score: 106.60
Avg lives: 4.66

=====
a2c | break-and-follow:
Avg score: 96.89
Avg lives: 4.49

a2c | break:
Avg score: 8.80
Avg lives: 0.06

a2c | follow:
Avg score: 58.00
Avg lives: 1.96

=====
dqn | break-and-follow:
Avg score: nan
Avg lives: nan

dqn | break:
Avg score: 24.80
Avg lives: 0.16

dqn | follow:
Avg score: 47.80
Avg lives: 2.38

=====
=====
Max score: 110.60, from ppo | break-and-follow
Max lives: 4.66, from ppo | follow
=====
Min score: 8.80, from a2c | break
Min lives: 0.06, from a2c | break
[ 58.          96.88888889 106.6        110.6                 nan]
[2.38       4.48888889        nan 4.66       4.38      ]


In [4]:
# average by steps
score_avgs = []
lives_avgs = []
combination = []
for step in step_values:
    for r in rew_values:
        ds = getDs(r)
        combination.append(str(step) + " | " + r)
        print(combination[-1] + ":")
        filtered = ds[(ds.algorithm.str.contains("_" + str(step)))]

        s = filtered["score"].mean()
        l = filtered["lives"].mean()
        score_avgs.append(s)
        lives_avgs.append(l)
        
        
        print("Avg score: %1.2f" % s)
        print("Avg lives: %1.2f" % l)
        print("")
    print("=====")
    
printMaxMin(score_avgs, lives_avgs, combination)

10000 | break-and-follow:
Avg score: 112.00
Avg lives: 4.02

10000 | break:
Avg score: 20.44
Avg lives: 0.32

10000 | follow:
Avg score: 75.78
Avg lives: 3.48

=====
50000 | break-and-follow:
Avg score: 93.25
Avg lives: 5.00

50000 | break:
Avg score: 21.00
Avg lives: 0.20

50000 | follow:
Avg score: 63.33
Avg lives: 2.28

=====
100000 | break-and-follow:
Avg score: 112.00
Avg lives: 3.46

100000 | break:
Avg score: 30.67
Avg lives: 0.48

100000 | follow:
Avg score: 65.33
Avg lives: 3.32

=====
500000 | break-and-follow:
Avg score: 87.50
Avg lives: 5.00

500000 | break:
Avg score: 13.00
Avg lives: 0.13

500000 | follow:
Avg score: 74.00
Avg lives: 2.80

=====
1000000 | break-and-follow:
Avg score: 120.67
Avg lives: 2.53

1000000 | break:
Avg score: 31.33
Avg lives: 0.83

1000000 | follow:
Avg score: 60.33
Avg lives: 3.33

=====
=====
Max score: 120.67, from 1000000 | break-and-follow
Max lives: 5.00, from 50000 | break-and-follow
=====
Min score: 13.00, from 500000 | break
Min lives: 0

In [5]:
# average by alg and steps
score_avgs = []
lives_avgs = []
combination = []
for a in alg_values:
    for step in step_values:
        for r in rew_values:
            ds = getDs(r)
            combination.append(str(step) + " | " + a + " | " + r)
            print(combination[-1] + ":")
            filtered = ds[(ds.algorithm.str.contains("_" + str(step)) & ds.algorithm.str.contains(a))]

            s = filtered["score"].mean()
            l = filtered["lives"].mean()
            score_avgs.append(s)
            lives_avgs.append(l)

            #print("Avg score: %1.2f" % s)
            #print("Avg lives: %1.2f" % l)
            #print("")
        #print("=====")
    
printMaxMin(score_avgs, lives_avgs, combination)

=====
Max score: 155.00, from 1000000 | ppo | break-and-follow
Max lives: 5.00, from 10000 | ppo | follow
=====
Min score: 0.00, from 10000 | a2c | break
Min lives: 0.00, from 10000 | a2c | break
[nan nan nan nan nan]
[nan nan nan nan nan]


In [6]:
130.25 / 7

18.607142857142858