In [None]:
import time

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import pandas as pd

from src.env.MiniFurnace import MiniFurnace

# Tutorial of the environment

In [None]:
env = MiniFurnace()

+ dimension of state($x$):  $5$
+ dimension of action($u$): $3$
+ state order ($I$), action order($J$) are unknown. We are going to apply some combination of these orders and find the proper values.
+ The environment is fully observable, with $o_t=s_t$
+ In order to limit the complexity of the task, the environment was designed to be fully deterministic
+ the environment dynamic is unknown.
    * We are going to find the model that it has approximate dynamic with the environment via training MultiSteplinear model.

## The attribute of the environment 

+ observation_space: [$-2, 2$]
+ action_space: [$-1, 1$]
+ state_dim: $5$
+ action_dim: $3$

In [None]:
print('The observation space of environment is {}'.format(env.observation_space)) 

In [None]:
print('The action space of environment is {}'.format(env.action_space)) 

In [None]:
print('The dimension of observation is {}'.format(env.state_dim)) 

In [None]:
print('The dimension of action is {}'.format(env.action_dim)) 

## The functions of the environment 

+ step($u$): proceed dynamic to next timestep
    * $u$'s dimension must be (1, action dim) 
    * type of $u$ is numpy array.
    * if not, error will be raised. 
    * return: next timestep's observation
+ reset(): reset initial observations and actions
    * return: observation at t=$0$
+ get_obs(): get current observation tensor
    * return: current timestep's observation
+ get_action(): get current action tensor
    * return: current timestep's action

### env.step(u)

In [None]:
u = np.random.rand(1, 3)
next_x = env.step(u)
print(next_x)

### env.get_action()

In [None]:
print('get current observation {}'.format(env.get_action()))

### env.get_obs()

In [None]:
print('get current observation {}'.format(env.get_obs()))

### env.reset()

In [None]:
initial_obs, initial_action = env.reset()
print('initial observation is {}, and the shape is {}'.format(initial_obs, initial_obs.shape))
print('initial action is {}, and the shape is {}'.format(initial_action, initial_action.shape))