In [1]:
# gets the environment set up 
from utilities.decagon_env import Decagon
import numpy as np
from tqdm import tqdm

# sets up env
env = Decagon()
HOLDOUT_SPLIT = 0.2
RANDOM_SEED = 42
tolerance = 0.01
gaussian_scale = 0.05 # random noise during action collection
save_directory = 'decagon_data'

# generates the landmark tuples
landmarks = list(env.point_dict.keys())
landmark_tuples = []
for landmark_1 in landmarks:
    for landmark_2 in landmarks:
        for landmark_3 in landmarks:
            for landmark_4 in landmarks:
                for landmark_5 in landmarks:
                    if len(set((landmark_1,landmark_2,landmark_3,landmark_4,landmark_5))) == 5:
                        landmark_tuples.append(tuple((landmark_1,landmark_2,landmark_3,landmark_4,landmark_5)))

# shuffles list and generates train/test split
rng = np.random.default_rng(seed=RANDOM_SEED)


# creates storing stuff 
trajectory_storage = [] # list of trajectories, each of which has state observations and matching string
skip_1 = 0
skip_2 = 0
# for each tuple generates a sequence
for landmark_tuple in tqdm(landmark_tuples):
    storage_dict = {}

    # state storage
    state_storage = []
    # resets env with approrpriate goal
    state, _ = env.reset(options={'start_point':landmark_tuple[0]})
    landmark_index = 1
    at_next_landmark = False
    next_landmark = landmark_tuple[landmark_index]
    landmark_coords = env.point_dict[next_landmark]
    # storage for plan sketcher
    landmark_storage = [landmark_tuple[0]]
    distance_storage = []
    distance = 0
    completed = False
    # main loop
    for i in range(512):
        state_storage.append(state)
        action = landmark_coords - state
        action = action + rng.normal(loc=0,scale=gaussian_scale,size=action.shape)
        state, _, terminated, truncated, info = env.step(action)
        at_next_landmark = np.linalg.norm(state-landmark_coords) < tolerance
        distance += 1
        if at_next_landmark:
            # plan sketcher storage
            if not completed:
                landmark_storage.append(landmark_tuple[landmark_index])
                distance_storage.append(distance)
                distance = 0

            landmark_index += 1
            if landmark_index > 4:
                landmark_index = 4
                completed = True
            at_next_landmark = False
            next_landmark = landmark_tuple[landmark_index]
            landmark_coords = env.point_dict[next_landmark]

            
    if len(landmark_storage) == 3: # for cases where last 2 landmarks are missed - should be rare
        skip_2 += 1
    if len(landmark_storage) == 4: # for cases where last landmark was missed
        skip_1 += 1 
    

    storage_dict.update({
                         'trajectory':np.array(state_storage),
                         'landmarks': np.array(landmark_storage),
                         'distances': np.array(distance_storage)
                         })
    trajectory_storage.append(storage_dict)


# training/testing splits
rng.shuffle(trajectory_storage)
training_split = trajectory_storage[:1-int(HOLDOUT_SPLIT*len(trajectory_storage))]
testing_split = trajectory_storage[1-int(HOLDOUT_SPLIT*len(trajectory_storage)):]
# saves stuff


print(skip_2)
print(skip_1)

100%|██████████| 30240/30240 [14:01<00:00, 35.94it/s]

0
0





In [2]:
np.save(save_directory + '/train_data',training_split)
np.save(save_directory + '/test_data',testing_split)

In [3]:
# making smaller data splits

save_directory = 'decagon_data'


training_split = np.load(save_directory + '/train_data.npy',allow_pickle=True)
print(len(training_split))
training_split_medium = training_split[:2500]
training_split_small = training_split[:250]
training_split_tiny = training_split[:25]

np.save(save_directory + '/train_data_medium',training_split_medium)
np.save(save_directory + '/train_data_small',training_split_small)
np.save(save_directory + '/train_data_tiny',training_split_tiny)

24193


In [4]:
# weirder data splits
import numpy as np
save_directory = 'decagon_data'
training_split = np.load(save_directory + '/train_data.npy',allow_pickle=True)
print(len(training_split))
training_split_ssm = training_split[:444]
training_split_sm = training_split[:790]
training_split_smm = training_split[:1405]

np.save(save_directory + '/train_data_ssm',training_split_ssm)
np.save(save_directory + '/train_data_sm',training_split_sm)
np.save(save_directory + '/train_data_smm',training_split_smm)

24193
