In [25]:
import pandas as pd
import os
import wandb

# Load data from WANDB/Disk

In [26]:
DATA_FILE = "data/vecDQN.csv"
WANDB_NAME = "rogercreus/vec-DQN"

if not os.path.exists(DATA_FILE):
    api = wandb.Api()
    runs = api.runs(WANDB_NAME)
    data = pd.DataFrame()
    for run in runs:
        run_history = run.history(
            samples=500,
            x_axis='global_step',
            pandas=True
        )
        run_history['run'] = run.name
        data = pd.concat([data, run_history])
        data.to_csv(DATA_FILE)
else:
    data = pd.read_csv(DATA_FILE)
    
data.head()

Unnamed: 0.1,Unnamed: 0,metrics/feature_matrix_approximate_rank,train/avg_ep_return,schedule/epsilon,metrics/lambda_N,metrics/feature_matrix_effective_rank,train/loss,train/hns,metrics/feature_matrix_srank,train_step,...,charts/q_values,charts/avg_episodic_return,charts/episodic_return,charts/episodic_length,charts/learning_rate,charts/per_beta,losses/q_loss,charts/epsilon,charts/global_step,charts/train_step
0,0,183.0,140.699997,0.96832,0.0,203.545654,0.077786,-0.0048,445.0,1.0,...,,,,,,,,,,
1,1,97.0,149.300003,0.948544,0.0,161.822205,0.089153,0.000855,449.0,99879.0,...,,,,,,,,,,
2,2,72.0,172.699997,0.928768,0.0,148.913483,0.108907,0.016243,455.0,199759.0,...,,,,,,,,,,
3,3,45.0,175.0,0.908991,0.0,131.72731,0.958015,0.017755,455.0,299639.0,...,,,,,,,,,,
4,4,34.0,192.300003,0.889215,0.0,123.729546,1.763952,0.029131,456.0,399519.0,...,,,,,,,,,,


In [27]:
data.columns

Index(['Unnamed: 0', 'metrics/feature_matrix_approximate_rank',
       'train/avg_ep_return', 'schedule/epsilon', 'metrics/lambda_N',
       'metrics/feature_matrix_effective_rank', 'train/loss', 'train/hns',
       'metrics/feature_matrix_srank', 'train_step',
       'metrics/change_of_policy_due_to_representation',
       'metrics/representations_l2_distance', 'metrics/lambda_2',
       'metrics/feature_matrix_rank', '_step',
       'metrics/feature_cov_matrix_rank_torch', 'metrics/q_value_stability',
       'metrics/lambda_1', 'global_step', 'metrics/representation_stability',
       'metrics/policy_churn', 'metrics/change_of_policy_due_to_q_values',
       'metrics/feature_norm', 'metrics/feature_cov_matrix_rankMe',
       'metrics/dead_neurons_cnn', 'metrics/feature_mean', 'sps', '_runtime',
       'metrics/feature_std', '_timestamp',
       'metrics/representations_cosine_similarity', 'metrics/dead_neurons_mlp',
       'train/q_values', 'run', 'losses/policy_loss', 'losses/value_

# Preprocess the data

In [28]:

# remove old run naming
# if run doesnt start with algo:, remove it
data = data[data['run'].str.startswith('algo:')]
data['run'] = data['run'].str.replace('_objective:q_lambda', '')
data['run'] = data['run'].str.replace('_rolloutLength:80', '')

# parse algorithm
data['algorithm'] = data['run'].apply(lambda x: x.split('_')[0].split(':')[1])

# separate [ppo,pqn] and [r2d2,apex] data
ppo_pqn_data = data[data['algorithm'].isin(['ppo', 'pqn'])]
async_data = data[data['algorithm'].isin(['r2d2'])]
data = data[data['algorithm'] == 'dqn']

# add targetNet and LN to run names that don't have them
data['run'] = data['run'].apply(
    lambda x: x.replace('-v5', '-v5_useTargetNet:True_useLN:False') 
    if ('_useTargetNet' not in x) else x
)
# add shuffled to run names that don't have them
data['run'] = data['run'].apply(
    lambda x: x.replace('_useLN:False', '_useLN:False_shuffled:False') 
    if ('_useLN:False_nenvs' in x) or ('_useLN:False_encoder' in x) else x)
    
data['run'] = data['run'].apply(
    lambda x: x.replace('_useLN:True', '_useLN:True_shuffled:False') 
    if ('_useLN:True_nenvs' in x) or ('_useLN:True_encoder' in x) else x
)
# add encoder to run names that don't have them after shuffled
data['run'] = data['run'].apply(
    lambda x: x.replace('_shuffled:False', '_shuffled:False_encoder:nature') 
    if ('_shuffled:False_nenvs' in x) else x
)
data['run'] = data['run'].apply(
    lambda x: x.replace('_shuffled:True', '_shuffled:True_encoder:nature') 
    if ('_shuffled:True_nenvs' in x) else x
)

# dqn data
data['objective'] = data['run'].apply(lambda x: x.split('_')[1])
data['env'] = data['run'].apply(lambda x: x.split('_')[2].split(':')[1])
data['useTargetNet'] = data['run'].apply(lambda x: x.split('_')[3].split(':')[1])
data['useLN'] = data['run'].apply(lambda x: x.split('_')[4].split(':')[1])
data['shuffled'] = data['run'].apply(lambda x: x.split('_')[5].split(':')[1])
data['encoder'] = data['run'].apply(lambda x: x.split('_')[6].split(':')[1])
data['nenvs'] = data['run'].apply(lambda x: x.split('_')[7].split(':')[1])
data['batchSize'] = data['run'].apply(lambda x: x.split('_')[8].split(':')[1])
data['numGradSteps'] = data['run'].apply(lambda x: x.split('_')[9].split(':')[1])
data['RR'] = data['run'].apply(lambda x: x.split('_')[10].split(':')[1])
data['DRR'] = data['run'].apply(lambda x: x.split('_')[11].split(':')[1])
data['seed'] = data['run'].apply(lambda x: x.split('_')[12].split(':')[1])

# ppo/pqn data: e.g. algo:ppo_env:MsPacman-v5_s3_1733625007
ppo_pqn_data['env'] = ppo_pqn_data['run'].apply(lambda x: x.split('_')[1].split(':')[1])
ppo_pqn_data['seed'] = ppo_pqn_data['run'].apply(lambda x: x.split('_')[2][-1])

# async data: e.g. algo:r2d2_env:MsPacman-v5_seed:3_1736095304
async_data['env'] = async_data['run'].apply(lambda x: x.split('_')[1].split(':')[1])
async_data['seed'] = async_data['run'].apply(lambda x: x.split('_')[2][-1])
async_data = async_data[async_data['env'] != 'craftax']
async_data = async_data[['algorithm', 'env', 'seed', 'learner/steps', 'actor/steps', 'charts/episode_return']]
# the problem is that when learner/steps is not null, both actor/steps and charts/episode_return are null
# and likewise when actor/steps is not null, both learner/steps and charts/episode_return are null
# and when charts/episode_return is not null, both learner/steps and actor/steps are null
#  iwant to keep all the non-null values for the three columns

async_data = async_data[~async_data['learner/steps'].isnull() | ~async_data['actor/steps'].isnull() | ~async_data['charts/episode_return'].isnull()]
async_data['actor/steps'] = async_data['actor/steps'].shift(1)
async_data['charts/episode_return'] = async_data['charts/episode_return'].shift(2)
async_data = async_data.dropna()
async_data.head()
async_data.to_csv('data/async.csv')

In [29]:
data.head()

Unnamed: 0.1,Unnamed: 0,metrics/feature_matrix_approximate_rank,train/avg_ep_return,schedule/epsilon,metrics/lambda_N,metrics/feature_matrix_effective_rank,train/loss,train/hns,metrics/feature_matrix_srank,train_step,...,useTargetNet,useLN,shuffled,encoder,nenvs,batchSize,numGradSteps,RR,DRR,seed
0,0,183.0,140.699997,0.96832,0.0,203.545654,0.077786,-0.0048,445.0,1.0,...,True,False,False,nature,1,32,2,2.0,64.0,2
1,1,97.0,149.300003,0.948544,0.0,161.822205,0.089153,0.000855,449.0,99879.0,...,True,False,False,nature,1,32,2,2.0,64.0,2
2,2,72.0,172.699997,0.928768,0.0,148.913483,0.108907,0.016243,455.0,199759.0,...,True,False,False,nature,1,32,2,2.0,64.0,2
3,3,45.0,175.0,0.908991,0.0,131.72731,0.958015,0.017755,455.0,299639.0,...,True,False,False,nature,1,32,2,2.0,64.0,2
4,4,34.0,192.300003,0.889215,0.0,123.729546,1.763952,0.029131,456.0,399519.0,...,True,False,False,nature,1,32,2,2.0,64.0,2


In [30]:
############ DQN data
run_columns = ['algorithm', 'objective', 'useTargetNet', 'useLN', 'shuffled', 'encoder', 'nenvs', 'batchSize', 'numGradSteps', 'DRR', 'RR', 'env', 'seed', 'global_step']

data = data.groupby(run_columns).mean(numeric_only=True).reset_index()
data = data.sort_values(by=run_columns)
data = data[run_columns + [col for col in data.columns if 'metrics' in col or 'train/hns' in col]]

########### PPO and PQN data
ppo_pqn_data = ppo_pqn_data.groupby(['algorithm', 'env', 'seed', 'global_step']).mean(numeric_only=True).reset_index()
ppo_pqn_data = ppo_pqn_data.sort_values(by=['algorithm', 'env', 'seed', 'global_step'])

# keep only these columns used for sorting + all that start with metrics + train/hns
ppo_pqn_data = ppo_pqn_data[['algorithm', 'env', 'seed', 'global_step', "train/hns"]]
ppo_pqn_data.to_csv('data/ppo_pqn.csv')

# Label with run names

In [31]:
def map_run_name_to_plot_name(row):
    # Extract fields from the row
    obj = row['objective']
    useTN = row['useTargetNet']
    useLN = row['useLN']
    shuffled = row['shuffled']
    encoder = row['encoder']
    DRR = row['DRR']
    RR = row['RR']
    nenvs = row['nenvs']
    batchSize = row['batchSize']
    numGradSteps = row['numGradSteps']

    # Mapping conditions
    if obj == "td1" and useTN == "True" and useLN == "False" and shuffled == "False" and encoder == "nature" and DRR == "8.0" and RR == "0.25" and nenvs == "1" and batchSize == "32" and numGradSteps == "1":
        return "DQN"
    elif obj == "td1" and useTN == "True" and useLN == "False" and shuffled == "False" and encoder == "nature" and DRR == "8.0" and RR == "0.25" and nenvs == "1" and batchSize == "-1" and numGradSteps == "1":
        return "DQN-RandomBatchSize"
    elif obj == "td1" and useTN == "True" and useLN == "False" and shuffled == "False" and encoder == "nature" and DRR == "64.0" and RR == "8.0" and nenvs == "4" and batchSize == "32" and numGradSteps == "8":
        return "Vec-DQN-smallBatch"
    elif obj == "td1" and useTN == "True" and useLN == "False" and shuffled == "False" and encoder == "nature" and DRR == "64.0" and RR == "2.0" and nenvs == "1" and batchSize == "32" and numGradSteps == "2":
        return "Broken-DQN"
    elif obj == "td1" and useTN == "True" and useLN == "False" and shuffled == "False" and encoder == "nature" and DRR == "8.0" and RR == "1.0" and nenvs == "128" and batchSize == "1024" and numGradSteps == "1":
        return "Vec-DQN"
    elif obj == "td1" and useTN == "True" and useLN == "False" and shuffled == "False" and encoder == "nature" and DRR == "8.0" and RR == "1.0" and nenvs == "128" and batchSize == "-1" and numGradSteps == "1":
        return "Vec-DQN-RandomBatchSize"
    elif obj == "td1" and useTN == "True" and useLN == "False" and shuffled == "False" and encoder == "nature" and DRR == "64.0" and RR == "2.0" and nenvs == "128" and batchSize == "4096" and numGradSteps == "2":
        return "Broken-Vec-DQN"
    elif obj == "rollout" and useTN == "True" and useLN == "False" and shuffled == "False" and encoder == "nature" and nenvs == "1" and batchSize == "1" and numGradSteps == "1":
        return "DQN-Rollout"
    elif obj == "rollout" and useTN == "True" and useLN == "False" and shuffled == "False" and encoder == "nature" and DRR == "160.0" and RR == "2.0" and nenvs == "1" and batchSize == "1" and numGradSteps == "2":
        return "Broken-DQN-Rollout"
    elif obj == "rollout" and useTN == "True" and useLN == "False" and shuffled == "False" and encoder == "nature" and DRR == "20.0" and RR == "1.0" and nenvs == "128" and batchSize == "32" and numGradSteps == "1":
        return "Vec-DQN-Rollout"
    elif obj == "rollout" and useTN == "True" and useLN == "False" and shuffled == "False" and encoder == "nature" and RR == "2.0" and nenvs == "128" and batchSize == "50" and numGradSteps == "2":
        return "Broken-Vec-DQN-Rollout"
    elif obj == "td1" and useTN == "False" and useLN == "False" and shuffled == "False" and encoder == "nature" and DRR == "8.0" and RR == "0.25" and nenvs == "1" and batchSize == "32" and numGradSteps == "1":
        return "DQN-NoTargetNet"
    elif obj == "td1" and useTN == "False" and useLN == "True" and shuffled == "False" and encoder == "nature" and DRR == "8.0" and RR == "0.25" and nenvs == "1" and batchSize == "32" and numGradSteps == "1":
        return "DQN-NoTargetNet+LN"
    elif obj == "rollout" and useTN == "False" and useLN == "False" and shuffled == "False" and encoder == "nature" and DRR == "20.0" and RR == "1.0" and nenvs == "128" and batchSize == "32" and numGradSteps == "1":
        return "Vec-DQN-Rollout-NoTargetNet"
    elif obj == "rollout" and useTN == "False" and useLN == "True" and shuffled == "False" and encoder == "nature" and DRR == "20.0" and RR == "1.0" and nenvs == "128" and batchSize == "32" and numGradSteps == "1":
        return "Vec-DQN-Rollout-NoTargetNet+LN"
    elif obj == "rollout" and useTN == "True" and useLN == "False" and shuffled == "True" and encoder == "nature" and DRR == "20.0" and RR == "1.0" and nenvs == "128" and batchSize == "32" and numGradSteps == "1":
        return "Vec-DQN-Rollout-Shuffled"
    elif obj == "rollout" and useTN == "False" and useLN == "True" and shuffled == "True" and encoder == "nature" and DRR == "20.0" and RR == "1.0" and nenvs == "128" and batchSize == "32" and numGradSteps == "1":
        return "Vec-DQN-Rollout-Shuffled-NoTargetNet+LN"
    elif obj == "rollout" and useTN == "False" and useLN == "False" and shuffled == "True" and encoder == "nature" and DRR == "20.0" and RR == "1.0" and nenvs == "128" and batchSize == "32" and numGradSteps == "1":
        return "Vec-DQN-Rollout-Shuffled-NoTargetNet"
    elif obj == "td1" and useTN == "True" and useLN == "False" and shuffled == "False" and encoder == "resnet":
        return "DQN-ResNet"
    elif obj == "td1" and useTN == "False" and useLN == "True" and shuffled == "False" and encoder == "resnet":
        return "DQN-ResNet-NoTargetNet+LN"
    elif obj == "rollout" and useTN == "True" and useLN == "False" and shuffled == "False" and encoder == "resnet":
        return "Vec-DQN-Rollout-ResNet"
    elif obj == "rollout" and useTN == "False" and useLN == "True" and shuffled == "False" and encoder == "resnet":
        return "Vec-DQN-Rollout-ResNet-NoTargetNet+LN"
    return "Unknown"

# Apply the function to your dataframe
data['plot_name'] = data.apply(map_run_name_to_plot_name, axis=1)
data.head()
data.to_csv("data/vecDQN_cleaned.csv")
print(data['plot_name'].unique())

['Vec-DQN-Rollout-NoTargetNet' 'Vec-DQN-Rollout-Shuffled-NoTargetNet'
 'Vec-DQN-Rollout-NoTargetNet+LN' 'Vec-DQN-Rollout-ResNet-NoTargetNet+LN'
 'Vec-DQN-Rollout-Shuffled-NoTargetNet+LN' 'DQN-Rollout'
 'Broken-DQN-Rollout' 'Vec-DQN-Rollout' 'Broken-Vec-DQN-Rollout'
 'Vec-DQN-Rollout-ResNet' 'Vec-DQN-Rollout-Shuffled' 'DQN-NoTargetNet'
 'DQN-NoTargetNet+LN' 'DQN-ResNet-NoTargetNet+LN' 'DQN-RandomBatchSize'
 'DQN' 'Broken-DQN' 'Vec-DQN-RandomBatchSize' 'Vec-DQN' 'Broken-Vec-DQN'
 'Vec-DQN-smallBatch' 'DQN-ResNet']
