In [None]:
from os.path import join
import os
import pickle

import numpy as np
import torch
import matplotlib.pyplot as plt

from src.env.MiniFurnace import MiniFurnace

from torch.utils.data import DataLoader, TensorDataset

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
env = MiniFurnace()
num_TRAIN_trajectory = 10
num_VALIDATION_trajectory = 5
num_TEST_trajectory = 5

### Data generation function from environment

In [None]:
def generate_random_trajectory(env, random_seed=1, T=100):
    np.random.seed(random_seed)
    o, a = env.reset()
    list_o = []
    list_a = []
    list_o.append(o)
    list_a.append(a)
    for i in range(T):
        a = np.random.uniform(low=env.action_space['low'], high=env.action_space['high'], size=(1, env.action_dim))
        # a = env.action_space.sample()
        o = env.step(a)        
        list_o.append(o)
        list_a.append(a)
        
    list_o = np.concatenate(list_o)
    list_a = np.concatenate(list_a)
    return list_o, list_a

### Generate train data from environment

In [None]:
train_obs, train_action = [], []
for i in range(num_TRAIN_trajectory):
    list_o, list_a = generate_random_trajectory(env=env, random_seed=i)
    # print(list_o.min(), list_o.max())
    train_obs.append(list_o)
    train_action.append(list_a)
    
train_data = {
    'traj_obs': train_obs,
    'traj_action': train_action   }

with open('data/train_data.pkl', 'wb') as f:
    pickle.dump(train_data, f)

In [None]:
list_a.shape

In [None]:
list_o.shape

### Generate validation data from environment

In [None]:
val_obs, val_action = [], []
for i in range(num_VALIDATION_trajectory):
    list_o, list_a = generate_random_trajectory(env=env, random_seed=i+100)
    # print(list_o.min(), list_o.max())
    val_obs.append(list_o)
    val_action.append(list_a)
    
val_data = {
    'traj_obs': val_obs,
    'traj_action': val_action   }

with open('data/val_data.pkl', 'wb') as f:
    pickle.dump(val_data, f)

### Generate TEST data from environment

In [None]:
test_obs, test_action = [], []
for i in range(num_TEST_trajectory):
    list_o, list_a = generate_random_trajectory(env=env, random_seed=i+10000)
    # print(list_o.min(), list_o.max())
    test_obs.append(list_o)
    test_action.append(list_a)
    
test_data = {
    'traj_obs': test_obs,
    'traj_action': test_action   }

with open('data/test_data.pkl', 'wb') as f:
    pickle.dump(test_data, f)

### Generate objective trajectory data

In [None]:
max_value, min_value = 0.1, -0.1
period = 20

In [None]:
traj1 = np.zeros((env.state_dim, period))
traj2 = np.repeat(np.linspace(0, max_value, period).reshape(1, -1), repeats=env.state_dim, axis=0)
traj3 = np.ones((env.state_dim, period)) * max_value
traj4 = np.repeat(np.linspace(max_value, min_value, period).reshape(1, -1), repeats=env.state_dim, axis=0)
traj5 = np.ones((env.state_dim, period)) * min_value
traj6 = np.repeat(np.linspace(min_value, 0, period).reshape(1, -1), repeats=env.state_dim, axis=0)
traj7 = np.zeros((env.state_dim, period))
reference_trajectory = np.concatenate([traj1, traj2, traj3, traj4, traj5, traj6, traj7], axis=1)

In [None]:
np.save('data/reference_trajectory.npy', reference_trajectory)

In [None]:
plt.plot(reference_trajectory[0])