In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
import sys
sys.path.append("/project/")

from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, PercentFormatter
import seaborn as sns
import matplotlib.lines as mlines


from utils import get_df

In [3]:
ALGO_LIST = [
    "mcpg_me",
    "dcg_me",
    "dcg_me_pg_steps",
    "dcg_me_batch_size",
    "dcg_me_cr_steps",
    "pga_me",
    "pga_me_pg_steps",
    "pga_me_batch_size",
    "pga_me_cr_steps",
    "me",
    "memes",
]

In [4]:
def filter(df_row):
    if df_row["algo"] == "dcg_me":
        if df_row["num_critic_training_steps"] != 3000:
            return "dcg_me_cr_steps"
        
        if df_row["num_pg_training_steps"] != 150:
            return "dcg_me_pg_steps"
        
        if df_row["training_batch_size"] != 100:
            return "dcg_me_batch_size"
        
    
    if df_row["algo"] == "pga_me":
        if df_row["num_critic_training_steps"] != 3000:
            return "pga_me_cr_steps"
        
        if df_row["num_pg_training_steps"] != 150:
            return "pga_me_pg_steps"
        
        if df_row["training_batch_size"] != 100:
            return "pga_me_batch_size"
            
    return df_row["algo"]

In [5]:
results_dir = Path("scal_test_ac/output/")
EPISODE_LENGTH = 250
df = get_df(results_dir, EPISODE_LENGTH)
df['algo'] = df.apply(filter, axis=1)
df = df[df["algo"].isin(ALGO_LIST)]
df = df[df["num_evaluations"] <= 1_005_000]

ant_omni_250
ant_uni_250
walker2d_uni_250


In [6]:
PGA_LIST = [
    "pga_me",
    "pga_me_pg_steps",
    "pga_me_batch_size",
    "pga_me_cr_steps",
]

DCG_LIST = [
    "dcg_me",
    "dcg_me_pg_steps",
    "dcg_me_batch_size",
    "dcg_me_cr_steps",
]

REST_LIST = [
    "mcpg_me",
    "me",
    "memes"
]

In [7]:
idx = df.groupby(["env", "algo", "run"])["iteration"].idxmax()
df_last_iteration = df.loc[idx]
df = df_last_iteration[['env', 'algo', 'time', 'qd_score', 'batch_size']]

In [8]:
df

Unnamed: 0,env,algo,time,qd_score,batch_size
26424,ant_omni_250,dcg_me,2204.731689,986976.37500,256
38424,ant_omni_250,dcg_me,2201.303223,955488.68750,256
50424,ant_omni_250,dcg_me,2201.743408,969590.43750,256
62424,ant_omni_250,dcg_me,2204.287842,976348.37500,256
74424,ant_omni_250,dcg_me,2223.974609,974977.37500,256
...,...,...,...,...,...
1354661,walker2d_uni_250,pga_me_cr_steps,2134.184570,480805.21875,512
1225444,walker2d_uni_250,pga_me_pg_steps,6573.014648,563690.43750,4096
1227189,walker2d_uni_250,pga_me_pg_steps,3144.781738,541048.62500,2048
1230680,walker2d_uni_250,pga_me_pg_steps,1275.929932,543838.93750,1024


In [8]:
pga_df = df[df["algo"].isin(PGA_LIST)]
dcg_df = df[df["algo"].isin(DCG_LIST)]
rest_df = df[df["algo"].isin(REST_LIST)]

In [9]:
pga_df[['qd_score', 'time']] = pga_df.groupby(['env'])[['qd_score', 'time']].transform(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

pga_df = pga_df.groupby(['env', 'algo', 'batch_size'])[['qd_score', 'time']].median().reset_index()

pga_df['time'] = 1 - pga_df['time']

pga_df['scalability_score'] = pga_df['qd_score'] * pga_df['time']

pga_df = pga_df.groupby(['algo', 'batch_size'])[['scalability_score']].mean().reset_index()

#pga_df = pga_df.loc[pga_df.groupby(['env'])['scalability_score'].idxmax()]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pga_df[['qd_score', 'time']] = pga_df.groupby(['env'])[['qd_score', 'time']].transform(


In [10]:
pga_df

Unnamed: 0,algo,batch_size,scalability_score
0,pga_me,256,0.390125
1,pga_me,512,0.512589
2,pga_me,1024,0.550851
3,pga_me,2048,0.50076
4,pga_me,4096,0.348388
5,pga_me_batch_size,512,0.463893
6,pga_me_batch_size,1024,0.358222
7,pga_me_batch_size,2048,0.328766
8,pga_me_batch_size,4096,0.000622
9,pga_me_cr_steps,512,0.279792


In [11]:
pga_df = pga_df.loc[pga_df['scalability_score'].idxmax()]

In [12]:
pga_df

algo                   pga_me
batch_size               1024
scalability_score    0.550851
Name: 2, dtype: object

In [13]:
dcg_df[['qd_score', 'time']] = dcg_df.groupby(['env'])[['qd_score', 'time']].transform(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

dcg_df = dcg_df.groupby(['env', 'algo', 'batch_size'])[['qd_score', 'time']].median().reset_index()

dcg_df['time'] = 1 - dcg_df['time']

dcg_df['scalability_score'] = dcg_df['qd_score'] * dcg_df['time']

dcg_df = dcg_df.groupby(['algo', 'batch_size'])[['scalability_score']].mean().reset_index()

#dcg_df = dcg_df.loc[dcg_df.groupby(['env'])['scalability_score'].idxmax()]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dcg_df[['qd_score', 'time']] = dcg_df.groupby(['env'])[['qd_score', 'time']].transform(


In [14]:
dcg_df

Unnamed: 0,algo,batch_size,scalability_score
0,dcg_me,256,0.283689
1,dcg_me,512,0.487358
2,dcg_me,1024,0.743094
3,dcg_me,2048,0.741805
4,dcg_me,4096,0.470108
5,dcg_me_batch_size,512,0.388002
6,dcg_me_batch_size,1024,0.544021
7,dcg_me_batch_size,2048,0.212655
8,dcg_me_batch_size,4096,0.039571
9,dcg_me_cr_steps,512,0.295384


In [15]:
dcg_df = dcg_df.loc[dcg_df['scalability_score'].idxmax()]

In [16]:
dcg_df

algo                   dcg_me
batch_size               1024
scalability_score    0.743094
Name: 2, dtype: object

In [17]:
rest_df[['qd_score', 'time']] = rest_df.groupby(['env', 'algo'])[['qd_score', 'time']].transform(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

rest_df = rest_df.groupby(['env', 'algo', 'batch_size'])[['qd_score', 'time']].median().reset_index()

rest_df['time'] = 1 - rest_df['time']

rest_df['scalability_score'] = rest_df['qd_score'] * rest_df['time']

rest_df = rest_df.groupby(['algo', 'batch_size'])[['scalability_score']].mean().reset_index()

#rest_df = rest_df.loc[rest_df.groupby(['env', 'algo'])['scalability_score'].idxmax()]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rest_df[['qd_score', 'time']] = rest_df.groupby(['env', 'algo'])[['qd_score', 'time']].transform(


In [18]:
rest_df

Unnamed: 0,algo,batch_size,scalability_score
0,mcpg_me,256,0.022586
1,mcpg_me,512,0.212335
2,mcpg_me,1024,0.480144
3,mcpg_me,2048,0.565302
4,mcpg_me,4096,0.65557
5,me,256,0.029966
6,me,512,0.281644
7,me,1024,0.36242
8,me,2048,0.53619
9,me,4096,0.566902


In [19]:
rest_df = rest_df.loc[rest_df.groupby(['algo'])['scalability_score'].idxmax()]

In [20]:
rest_df

Unnamed: 0,algo,batch_size,scalability_score
4,mcpg_me,4096,0.65557
9,me,4096,0.566902
10,memes,16,0.334903


In [12]:
df[['qd_score', 'time']] = df.groupby(['env'])[['qd_score', 'time']].transform(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['qd_score', 'time']] = df.groupby(['env'])[['qd_score', 'time']].transform(


In [8]:
df[df['env']=='ant_uni_250'][df['algo'] == "mcpg_me"]

  df[df['env']=='ant_uni_250'][df['algo'] == "mcpg_me"]


Unnamed: 0,env,algo,time,qd_score,batch_size
3924,ant_uni_250,mcpg_me,0.942984,0.834222,256
15924,ant_uni_250,mcpg_me,0.998477,0.496438,256
27924,ant_uni_250,mcpg_me,1.0,0.717133,256
39924,ant_uni_250,mcpg_me,0.939764,0.422556,256
51924,ant_uni_250,mcpg_me,0.943728,0.295597,256
61961,ant_uni_250,mcpg_me,0.393231,0.414854,512
67961,ant_uni_250,mcpg_me,0.425022,0.350813,512
73961,ant_uni_250,mcpg_me,0.392915,0.47206,512
79961,ant_uni_250,mcpg_me,0.393448,0.019581,512
85961,ant_uni_250,mcpg_me,0.425869,0.0,512


In [10]:
df = df.groupby(['env', 'algo', 'batch_size'])[['qd_score', 'time']].median().reset_index()

In [10]:
df

Unnamed: 0,env,algo,batch_size,qd_score,time
0,ant_omni_250,dcg_me,256,0.975481,0.989342
1,ant_omni_250,dcg_me,512,0.958362,0.422470
2,ant_omni_250,dcg_me,1024,0.950429,0.154078
3,ant_omni_250,dcg_me,2048,0.790351,0.018137
4,ant_omni_250,dcg_me,4096,0.208019,0.000461
...,...,...,...,...,...
67,walker2d_uni_250,pga_me,1024,0.474255,0.103168
68,walker2d_uni_250,pga_me,2048,0.437494,0.047219
69,walker2d_uni_250,pga_me,4096,0.248359,0.002351
70,walker2d_uni_250,pga_me_batch_size,4096,,


In [11]:
df['time'] = 1 - df['time']

In [8]:
df

Unnamed: 0,env,algo,batch_size,qd_score,time
0,ant_omni_250,dcg_me,256,1.0,0.135792
1,ant_omni_250,dcg_me,512,0.989157,0.557055
2,ant_omni_250,dcg_me,1024,0.984132,0.756506
3,ant_omni_250,dcg_me,2048,0.882738,0.857528
4,ant_omni_250,dcg_me,4096,0.513884,0.870664
5,ant_omni_250,mcpg_me,256,0.745888,0.834048
6,ant_omni_250,mcpg_me,512,0.706045,0.918493
7,ant_omni_250,mcpg_me,1024,0.672867,0.956411
8,ant_omni_250,mcpg_me,2048,0.804323,0.973242
9,ant_omni_250,mcpg_me,4096,0.800098,0.978872


In [12]:
df['scalability_score'] = df['qd_score'] * df['time']

In [13]:
df

Unnamed: 0,env,algo,batch_size,qd_score,time,scalability_score
0,ant_omni_250,dcg_me,256,0.975481,0.010658,0.010397
1,ant_omni_250,dcg_me,512,0.958362,0.57753,0.553483
2,ant_omni_250,dcg_me,1024,0.950429,0.845922,0.803988
3,ant_omni_250,dcg_me,2048,0.790351,0.981863,0.776017
4,ant_omni_250,dcg_me,4096,0.208019,0.999539,0.207923
5,ant_omni_250,mcpg_me,256,0.64029,0.060575,0.038786
6,ant_omni_250,mcpg_me,512,0.552199,0.608182,0.335837
7,ant_omni_250,mcpg_me,1024,0.478842,0.854072,0.408966
8,ant_omni_250,mcpg_me,2048,0.769488,0.963219,0.741186
9,ant_omni_250,mcpg_me,4096,0.760148,0.999731,0.759944


In [14]:
df = df.loc[df.groupby(['env', 'algo'])['scalability_score'].idxmax()]

In [15]:
df

Unnamed: 0,env,algo,batch_size,qd_score,time,scalability_score
2,ant_omni_250,dcg_me,1024,0.950429,0.845922,0.803988
9,ant_omni_250,mcpg_me,4096,0.760148,0.999731,0.759944
13,ant_omni_250,me,2048,0.371024,0.94353,0.350072
19,ant_omni_250,pga_me,4096,0.418602,0.998446,0.417952
23,ant_uni_250,dcg_me,2048,0.975815,0.97624,0.95263
29,ant_uni_250,mcpg_me,4096,0.691865,0.991346,0.685877
34,ant_uni_250,me,4096,0.612798,0.999185,0.612299
37,ant_uni_250,pga_me,1024,0.796593,0.888004,0.707378
42,walker2d_uni_250,dcg_me,1024,0.962435,0.858681,0.826424
47,walker2d_uni_250,mcpg_me,1024,0.674561,0.909277,0.613362
