## Data Scaling Results on Decagon

In [36]:
# imports, parameters, environment, etc.
import torch
import torch.nn as nn
from utilities.ogbench_utilities import *
from diffusion_planner import DiffusionPlannerConfig
from diffusion_planner import eval_model as diffusion_planer_eval
from utilities.cleandiffuser.invdynamic import MlpInvDynamic
from utilities.cleandiffuser.diffusion import ContinuousDiffusionSDE
from utilities.cleandiffuser.diffusion import DiscreteDiffusionSDE
from utilities.cleandiffuser.nn_diffusion import JannerUNet1d,CNN1dShiftEq,ConvNext1dShiftEq
from utilities.cleandiffuser.nn_condition import MLPCondition
from utilities.cleandiffuser.classifier import CumRewClassifier
from utilities.cleandiffuser.nn_classifier import HalfJannerUNet1d
from utilities.decagon_env import *
from utilities.toy_env_utilities import ToyEnvInvDyn
from utilities.ogbench_utilities import *
import matplotlib.pyplot as plt
from matplotlib import colormaps
from diffusion_planner import get_trajectory


env = 'gridland'
n_size = 5
memory = 1
horizon = 512 # for generation so must be power of 2
num_envs = 250
num_episodes = 1
temperature = 0.5
n_exec_steps = 512
render = True

# gets the set of trajectories from the training data 
import numpy as np 
config = DiffusionPlannerConfig()
config.horizon = 500
config.gen_horizon = 512
env = Decagon()
env.reset()
save_directory = 'decagon_data'

training_split = np.load(save_directory + '/train_data.npy',allow_pickle=True)
testing_split = np.load(save_directory + '/test_data.npy',allow_pickle=True)

print(training_split[0]['landmarks'])
print(env.get_trajectory_landmarks(training_split[0]['trajectory']))

print('Training Length:',len(training_split))
print('Testing Length:',len(testing_split))

['H' 'I' 'F' 'D' 'C']
['H' 'I' 'F' 'D' 'C']
Training Length: 24193
Testing Length: 6047


In [37]:
# U-Net unconditional, full data

training_split = np.load(save_directory + '/train_data.npy',allow_pickle=True)
testing_split = np.load(save_directory + '/test_data.npy',allow_pickle=True)
config = DiffusionPlannerConfig()
config.guidance = 'none'
config.num_episodes = 1
config.w_cfg = 1.0
config.num_envs = num_envs
obs_dim = 2
fix_mask = torch.zeros((config.gen_horizon, obs_dim)) 
# --------------- Network Architecture -----------------
if config.use_shift_equivariant_arch:
    #nn_diffusion = ConvNext1dShiftEq(obs_dim)
    nn_diffusion = CNN1dShiftEq(obs_dim,
                                kernel_expansion_rate=config.kernel_expansion_rate,
                                model_dim = config.model_dim,
                                emb_dim = config.emb_dim,
                                encode_position = config.add_positional_encoding)
else:
    nn_diffusion = JannerUNet1d(
        obs_dim, model_dim=config.model_dim, emb_dim=config.model_dim, dim_mult=[1, 2, 2, 2],
        timestep_emb_type="positional", attention=config.self_attention, kernel_size=5,
        use_timestep_emb=config.use_timestep_embeddings)


classifier = None
# creates condition network if needed (when using classifier-free guidance)
if 'cfg' in config.guidance:
    nn_condition = MLPCondition(
    in_dim=config.lang_enc_size, out_dim=config.emb_dim, hidden_dims=[config.emb_dim, ], act=nn.SiLU(), dropout=config.label_dropout)
else:
    nn_condition = None
loss_weight = torch.ones((config.gen_horizon, obs_dim))
agent = DiscreteDiffusionSDE(
        nn_diffusion = nn_diffusion, 
        nn_condition = nn_condition,
        classifier = classifier, 
        fix_mask=fix_mask, 
        loss_weight=loss_weight, 
        ema_rate=config.ema_rate,
        device=config.device,
        diffusion_steps=config.diffusion_steps, 
        predict_noise=config.predict_noise)

# loads agent
agent.load('trained_models/DP-UNet-Uncond-Full-decagon-2a272be8diffusion_ckpt_200000.pt')
total_train = 0
total_test = 0
total_not_train = 0
# sample language string
sampled_trajectories = get_trajectory(agent,config,None)[0]
for sample_trajectory in sampled_trajectories:
    #sample_trajectory = sampled_trajectories[0][0]
    # plots it on the map
    #fig,ax = env.get_mpl_plot()
    #for i,grid_point in enumerate(sample_trajectory):
    #    plt.scatter(grid_point[0],grid_point[1],color=i*np.array((1,1,1))/512,zorder=-1)

    # prints out the cooresponding plan string
    landmark_tuple = env.get_trajectory_landmarks(sample_trajectory)
    #print('Landmark Path:',landmark_tuple)
    in_training = False
    in_testing = False
    for thing in training_split:
        if list(landmark_tuple) == list(thing['landmarks']):
            #print('In Training Data!')
            in_training = True
    for thing in testing_split:
        if list(landmark_tuple) == list(thing['landmarks']):
            #print('In Training Data!')
            in_testing = True
    #if not in_training:
    #    print('Not In Training Data!')
    if in_training:
        total_train += 1
    else:
        total_not_train += 1
    if in_testing:
        total_test += 1
    #for thing in testing_split:
    #    if list(landmark_tuple) == list(thing['landmarks']):
    #        print('In Testing Data!')
print('# In Train:',total_train)
print('# Not In Train:',total_not_train)
print('# In Test:',total_test)
print('Ratio:',total_not_train/(total_train+total_not_train))

# In Train: 173
# Not In Train: 77
# In Test: 35
Ratio: 0.308


In [38]:
# U-Net unconditional, medium

training_split = np.load(save_directory + '/train_data_medium.npy',allow_pickle=True)
config = DiffusionPlannerConfig()
config.guidance = 'none'
config.num_episodes = 1
config.w_cfg = 1.0
config.num_envs = num_envs
obs_dim = 2
fix_mask = torch.zeros((config.gen_horizon, obs_dim)) 
# --------------- Network Architecture -----------------
if config.use_shift_equivariant_arch:
    #nn_diffusion = ConvNext1dShiftEq(obs_dim)
    nn_diffusion = CNN1dShiftEq(obs_dim,
                                kernel_expansion_rate=config.kernel_expansion_rate,
                                model_dim = config.model_dim,
                                emb_dim = config.emb_dim,
                                encode_position = config.add_positional_encoding)
else:
    nn_diffusion = JannerUNet1d(
        obs_dim, model_dim=config.model_dim, emb_dim=config.model_dim, dim_mult=[1, 2, 2, 2],
        timestep_emb_type="positional", attention=config.self_attention, kernel_size=5,
        use_timestep_emb=config.use_timestep_embeddings)


classifier = None
# creates condition network if needed (when using classifier-free guidance)
if 'cfg' in config.guidance:
    nn_condition = MLPCondition(
    in_dim=config.lang_enc_size, out_dim=config.emb_dim, hidden_dims=[config.emb_dim, ], act=nn.SiLU(), dropout=config.label_dropout)
else:
    nn_condition = None
loss_weight = torch.ones((config.gen_horizon, obs_dim))
agent = DiscreteDiffusionSDE(
        nn_diffusion = nn_diffusion, 
        nn_condition = nn_condition,
        classifier = classifier, 
        fix_mask=fix_mask, 
        loss_weight=loss_weight, 
        ema_rate=config.ema_rate,
        device=config.device,
        diffusion_steps=config.diffusion_steps, 
        predict_noise=config.predict_noise)

# loads agent
agent.load('trained_models/DP-UNet-Uncond-Medium-decagon-8822f635diffusion_ckpt_200000.pt')
total_train = 0
total_test = 0
total_not_train = 0
# sample language string
sampled_trajectories = get_trajectory(agent,config,None)[0]
for sample_trajectory in sampled_trajectories:
    #sample_trajectory = sampled_trajectories[0][0]
    # plots it on the map
    #fig,ax = env.get_mpl_plot()
    #for i,grid_point in enumerate(sample_trajectory):
    #    plt.scatter(grid_point[0],grid_point[1],color=i*np.array((1,1,1))/512,zorder=-1)

    # prints out the cooresponding plan string
    landmark_tuple = env.get_trajectory_landmarks(sample_trajectory)
    #print('Landmark Path:',landmark_tuple)
    in_training = False
    in_testing = False
    for thing in training_split:
        if list(landmark_tuple) == list(thing['landmarks']):
            #print('In Training Data!')
            in_training = True
    for thing in testing_split:
        if list(landmark_tuple) == list(thing['landmarks']):
            #print('In Training Data!')
            in_testing = True
    #if not in_training:
    #    print('Not In Training Data!')
    if in_training:
        total_train += 1
    else:
        total_not_train += 1
    if in_testing:
        total_test += 1
    #for thing in testing_split:
    #    if list(landmark_tuple) == list(thing['landmarks']):
    #        print('In Testing Data!')
print('# In Train:',total_train)
print('# Not In Train:',total_not_train)
print('# In Test:',total_test)
print('Ratio:',total_not_train/(total_train+total_not_train))

# In Train: 170
# Not In Train: 80
# In Test: 19
Ratio: 0.32


In [39]:
# U-Net unconditional, SMM

training_split = np.load(save_directory + '/train_data_smm.npy',allow_pickle=True)
config = DiffusionPlannerConfig()
config.guidance = 'none'
config.num_episodes = 1
config.w_cfg = 1.0
config.num_envs = num_envs
obs_dim = 2
fix_mask = torch.zeros((config.gen_horizon, obs_dim)) 
# --------------- Network Architecture -----------------
if config.use_shift_equivariant_arch:
    #nn_diffusion = ConvNext1dShiftEq(obs_dim)
    nn_diffusion = CNN1dShiftEq(obs_dim,
                                kernel_expansion_rate=config.kernel_expansion_rate,
                                model_dim = config.model_dim,
                                emb_dim = config.emb_dim,
                                encode_position = config.add_positional_encoding)
else:
    nn_diffusion = JannerUNet1d(
        obs_dim, model_dim=config.model_dim, emb_dim=config.model_dim, dim_mult=[1, 2, 2, 2],
        timestep_emb_type="positional", attention=config.self_attention, kernel_size=5,
        use_timestep_emb=config.use_timestep_embeddings)


classifier = None
# creates condition network if needed (when using classifier-free guidance)
if 'cfg' in config.guidance:
    nn_condition = MLPCondition(
    in_dim=config.lang_enc_size, out_dim=config.emb_dim, hidden_dims=[config.emb_dim, ], act=nn.SiLU(), dropout=config.label_dropout)
else:
    nn_condition = None
loss_weight = torch.ones((config.gen_horizon, obs_dim))
agent = DiscreteDiffusionSDE(
        nn_diffusion = nn_diffusion, 
        nn_condition = nn_condition,
        classifier = classifier, 
        fix_mask=fix_mask, 
        loss_weight=loss_weight, 
        ema_rate=config.ema_rate,
        device=config.device,
        diffusion_steps=config.diffusion_steps, 
        predict_noise=config.predict_noise)

# loads agent
agent.load('trained_models/DP-UNet-Uncond-SMM-decagon-e64cda3adiffusion_ckpt_200000.pt')
total_train = 0
total_test = 0
total_not_train = 0
# sample language string
sampled_trajectories = get_trajectory(agent,config,None)[0]
for sample_trajectory in sampled_trajectories:
    #sample_trajectory = sampled_trajectories[0][0]
    # plots it on the map
    #fig,ax = env.get_mpl_plot()
    #for i,grid_point in enumerate(sample_trajectory):
    #    plt.scatter(grid_point[0],grid_point[1],color=i*np.array((1,1,1))/512,zorder=-1)

    # prints out the cooresponding plan string
    landmark_tuple = env.get_trajectory_landmarks(sample_trajectory)
    #print('Landmark Path:',landmark_tuple)
    in_training = False
    in_testing = False
    for thing in training_split:
        if list(landmark_tuple) == list(thing['landmarks']):
            #print('In Training Data!')
            in_training = True
    for thing in testing_split:
        if list(landmark_tuple) == list(thing['landmarks']):
            #print('In Training Data!')
            in_testing = True
    #if not in_training:
    #    print('Not In Training Data!')
    if in_training:
        total_train += 1
    else:
        total_not_train += 1
    if in_testing:
        total_test += 1
    #for thing in testing_split:
    #    if list(landmark_tuple) == list(thing['landmarks']):
    #        print('In Testing Data!')
print('# In Train:',total_train)
print('# Not In Train:',total_not_train)
print('# In Test:',total_test)
print('Ratio:',total_not_train/(total_train+total_not_train))

# In Train: 232
# Not In Train: 18
# In Test: 2
Ratio: 0.072


In [40]:
# U-Net unconditional, SM

training_split = np.load(save_directory + '/train_data_sm.npy',allow_pickle=True)
config = DiffusionPlannerConfig()
config.guidance = 'none'
config.num_episodes = 1
config.w_cfg = 1.0
config.num_envs = num_envs
obs_dim = 2
fix_mask = torch.zeros((config.gen_horizon, obs_dim)) 
# --------------- Network Architecture -----------------
if config.use_shift_equivariant_arch:
    #nn_diffusion = ConvNext1dShiftEq(obs_dim)
    nn_diffusion = CNN1dShiftEq(obs_dim,
                                kernel_expansion_rate=config.kernel_expansion_rate,
                                model_dim = config.model_dim,
                                emb_dim = config.emb_dim,
                                encode_position = config.add_positional_encoding)
else:
    nn_diffusion = JannerUNet1d(
        obs_dim, model_dim=config.model_dim, emb_dim=config.model_dim, dim_mult=[1, 2, 2, 2],
        timestep_emb_type="positional", attention=config.self_attention, kernel_size=5,
        use_timestep_emb=config.use_timestep_embeddings)


classifier = None
# creates condition network if needed (when using classifier-free guidance)
if 'cfg' in config.guidance:
    nn_condition = MLPCondition(
    in_dim=config.lang_enc_size, out_dim=config.emb_dim, hidden_dims=[config.emb_dim, ], act=nn.SiLU(), dropout=config.label_dropout)
else:
    nn_condition = None
loss_weight = torch.ones((config.gen_horizon, obs_dim))
agent = DiscreteDiffusionSDE(
        nn_diffusion = nn_diffusion, 
        nn_condition = nn_condition,
        classifier = classifier, 
        fix_mask=fix_mask, 
        loss_weight=loss_weight, 
        ema_rate=config.ema_rate,
        device=config.device,
        diffusion_steps=config.diffusion_steps, 
        predict_noise=config.predict_noise)

# loads agent
agent.load('trained_models/DP-UNet-Uncond-SM-decagon-f2faaa21diffusion_ckpt_latest.pt')
total_train = 0
total_test = 0
total_not_train = 0
# sample language string
sampled_trajectories = get_trajectory(agent,config,None)[0]
for sample_trajectory in sampled_trajectories:
    #sample_trajectory = sampled_trajectories[0][0]
    # plots it on the map
    #fig,ax = env.get_mpl_plot()
    #for i,grid_point in enumerate(sample_trajectory):
    #    plt.scatter(grid_point[0],grid_point[1],color=i*np.array((1,1,1))/512,zorder=-1)

    # prints out the cooresponding plan string
    landmark_tuple = env.get_trajectory_landmarks(sample_trajectory)
    #print('Landmark Path:',landmark_tuple)
    in_training = False
    in_testing = False
    for thing in training_split:
        if list(landmark_tuple) == list(thing['landmarks']):
            #print('In Training Data!')
            in_training = True
    for thing in testing_split:
        if list(landmark_tuple) == list(thing['landmarks']):
            #print('In Training Data!')
            in_testing = True
    #if not in_training:
    #    print('Not In Training Data!')
    if in_training:
        total_train += 1
    else:
        total_not_train += 1
    if in_testing:
        total_test += 1
    #for thing in testing_split:
    #    if list(landmark_tuple) == list(thing['landmarks']):
    #        print('In Testing Data!')
print('# In Train:',total_train)
print('# Not In Train:',total_not_train)
print('# In Test:',total_test)
print('Ratio:',total_not_train/(total_train+total_not_train))

# In Train: 248
# Not In Train: 2
# In Test: 0
Ratio: 0.008


In [41]:
# U-Net unconditional, SSM

training_split = np.load(save_directory + '/train_data_ssm.npy',allow_pickle=True)
config = DiffusionPlannerConfig()
config.guidance = 'none'
config.num_episodes = 1
config.w_cfg = 1.0
config.num_envs = num_envs
obs_dim = 2
fix_mask = torch.zeros((config.gen_horizon, obs_dim)) 
# --------------- Network Architecture -----------------
if config.use_shift_equivariant_arch:
    #nn_diffusion = ConvNext1dShiftEq(obs_dim)
    nn_diffusion = CNN1dShiftEq(obs_dim,
                                kernel_expansion_rate=config.kernel_expansion_rate,
                                model_dim = config.model_dim,
                                emb_dim = config.emb_dim,
                                encode_position = config.add_positional_encoding)
else:
    nn_diffusion = JannerUNet1d(
        obs_dim, model_dim=config.model_dim, emb_dim=config.model_dim, dim_mult=[1, 2, 2, 2],
        timestep_emb_type="positional", attention=config.self_attention, kernel_size=5,
        use_timestep_emb=config.use_timestep_embeddings)


classifier = None
# creates condition network if needed (when using classifier-free guidance)
if 'cfg' in config.guidance:
    nn_condition = MLPCondition(
    in_dim=config.lang_enc_size, out_dim=config.emb_dim, hidden_dims=[config.emb_dim, ], act=nn.SiLU(), dropout=config.label_dropout)
else:
    nn_condition = None
loss_weight = torch.ones((config.gen_horizon, obs_dim))
agent = DiscreteDiffusionSDE(
        nn_diffusion = nn_diffusion, 
        nn_condition = nn_condition,
        classifier = classifier, 
        fix_mask=fix_mask, 
        loss_weight=loss_weight, 
        ema_rate=config.ema_rate,
        device=config.device,
        diffusion_steps=config.diffusion_steps, 
        predict_noise=config.predict_noise)

# loads agent
agent.load('trained_models/DP-UNet-Uncond-SSM-decagon-c590c495diffusion_ckpt_200000.pt')
total_train = 0
total_test = 0
total_not_train = 0
# sample language string
sampled_trajectories = get_trajectory(agent,config,None)[0]
for sample_trajectory in sampled_trajectories:
    #sample_trajectory = sampled_trajectories[0][0]
    # plots it on the map
    #fig,ax = env.get_mpl_plot()
    #for i,grid_point in enumerate(sample_trajectory):
    #    plt.scatter(grid_point[0],grid_point[1],color=i*np.array((1,1,1))/512,zorder=-1)

    # prints out the cooresponding plan string
    landmark_tuple = env.get_trajectory_landmarks(sample_trajectory)
    #print('Landmark Path:',landmark_tuple)
    in_training = False
    in_testing = False
    for thing in training_split:
        if list(landmark_tuple) == list(thing['landmarks']):
            #print('In Training Data!')
            in_training = True
    for thing in testing_split:
        if list(landmark_tuple) == list(thing['landmarks']):
            #print('In Training Data!')
            in_testing = True
    #if not in_training:
    #    print('Not In Training Data!')
    if in_training:
        total_train += 1
    else:
        total_not_train += 1
    if in_testing:
        total_test += 1
    #for thing in testing_split:
    #    if list(landmark_tuple) == list(thing['landmarks']):
    #        print('In Testing Data!')
print('# In Train:',total_train)
print('# Not In Train:',total_not_train)
print('# In Test:',total_test)
print('Ratio:',total_not_train/(total_train+total_not_train))

# In Train: 250
# Not In Train: 0
# In Test: 0
Ratio: 0.0


In [42]:
# U-Net unconditional, small

training_split = np.load(save_directory + '/train_data_small.npy',allow_pickle=True)
config = DiffusionPlannerConfig()
config.guidance = 'none'
config.num_episodes = 1
config.w_cfg = 1.0
config.num_envs = num_envs
obs_dim = 2
fix_mask = torch.zeros((config.gen_horizon, obs_dim)) 
# --------------- Network Architecture -----------------
if config.use_shift_equivariant_arch:
    #nn_diffusion = ConvNext1dShiftEq(obs_dim)
    nn_diffusion = CNN1dShiftEq(obs_dim,
                                kernel_expansion_rate=config.kernel_expansion_rate,
                                model_dim = config.model_dim,
                                emb_dim = config.emb_dim,
                                encode_position = config.add_positional_encoding)
else:
    nn_diffusion = JannerUNet1d(
        obs_dim, model_dim=config.model_dim, emb_dim=config.model_dim, dim_mult=[1, 2, 2, 2],
        timestep_emb_type="positional", attention=config.self_attention, kernel_size=5,
        use_timestep_emb=config.use_timestep_embeddings)


classifier = None
# creates condition network if needed (when using classifier-free guidance)
if 'cfg' in config.guidance:
    nn_condition = MLPCondition(
    in_dim=config.lang_enc_size, out_dim=config.emb_dim, hidden_dims=[config.emb_dim, ], act=nn.SiLU(), dropout=config.label_dropout)
else:
    nn_condition = None
loss_weight = torch.ones((config.gen_horizon, obs_dim))
agent = DiscreteDiffusionSDE(
        nn_diffusion = nn_diffusion, 
        nn_condition = nn_condition,
        classifier = classifier, 
        fix_mask=fix_mask, 
        loss_weight=loss_weight, 
        ema_rate=config.ema_rate,
        device=config.device,
        diffusion_steps=config.diffusion_steps, 
        predict_noise=config.predict_noise)

# loads agent
agent.load('trained_models/DP-UNet-Uncond-Small-decagon-c400557ediffusion_ckpt_200000.pt')
total_train = 0
total_test = 0
total_not_train = 0
# sample language string
sampled_trajectories = get_trajectory(agent,config,None)[0]
for sample_trajectory in sampled_trajectories:
    #sample_trajectory = sampled_trajectories[0][0]
    # plots it on the map
    #fig,ax = env.get_mpl_plot()
    #for i,grid_point in enumerate(sample_trajectory):
    #    plt.scatter(grid_point[0],grid_point[1],color=i*np.array((1,1,1))/512,zorder=-1)

    # prints out the cooresponding plan string
    landmark_tuple = env.get_trajectory_landmarks(sample_trajectory)
    #print('Landmark Path:',landmark_tuple)
    in_training = False
    in_testing = False
    for thing in training_split:
        if list(landmark_tuple) == list(thing['landmarks']):
            #print('In Training Data!')
            in_training = True
    for thing in testing_split:
        if list(landmark_tuple) == list(thing['landmarks']):
            #print('In Training Data!')
            in_testing = True
    #if not in_training:
    #    print('Not In Training Data!')
    if in_training:
        total_train += 1
    else:
        total_not_train += 1
    if in_testing:
        total_test += 1
    #for thing in testing_split:
    #    if list(landmark_tuple) == list(thing['landmarks']):
    #        print('In Testing Data!')
print('# In Train:',total_train)
print('# Not In Train:',total_not_train)
print('# In Test:',total_test)
print('Ratio:',total_not_train/(total_train+total_not_train))

# In Train: 250
# Not In Train: 0
# In Test: 0
Ratio: 0.0
