Test PPO model

In [None]:

#!/usr/bin/env python3
import os
import gym
from gym import wrappers
from lib import model
import numpy as np
import torch
import ptan
import pandas as pd

ENV_ID = "RocketLander-v0"
MODEL_TO_LOAD = "D:/Coding/SpaceXReinforcementLearning/rocket_saved_network/PPO/actorbest_-3.092_1600000.dat"
RECORD_RUN = None #record dir
SAVE_RUN = True #save states & actions into excel file
SIMULATE_WIND = False

def main():
    device = torch.device("cpu")
    env = gym.make(ENV_ID)
    if RECORD_RUN:
        env = wrappers.Monitor(env, RECORD_RUN)

    net = model.ModelActor(env.observation_space.shape[0], env.action_space.shape[0]).to(device)
    if MODEL_TO_LOAD:
        net.load_state_dict(torch.load(MODEL_TO_LOAD))

    obs = env.reset()
    total_reward = 0.0
    total_steps = 0
    left_or_right_movement = np.random.randint(0, 2)
    
    if SAVE_RUN:
        os.makedirs("excel_logs", exist_ok=True)
        x_pos, y_pos, theta, eng_throttle, eng_gimbal = ([] for _ in range(5))
        gimbal, throttle, side_thruster = ([] for _ in range(3))
        if len(obs) == 10:
            vel_x, vel_y, ang_vel = ([] for _ in range(3))
    
    while True:
        env.render()
        obs_v = ptan.agent.float32_preprocessor([obs]).to(device)
        states = env.get_states_value()
        mu_v = net(obs_v)[0]
        mu = mu_v.squeeze(dim=0).data.cpu().numpy()
        logstd = net.logstd.data.cpu().numpy()
        rnd = np.random.normal(size=logstd.shape)
        action = mu + np.exp(logstd) * rnd
        action = np.clip(action, -1, 1)

        if SAVE_RUN:
            x_pos.append(states[0])
            y_pos.append(states[1])
            theta.append(states[2])
            eng_throttle.append(states[3])
            eng_gimbal.append(states[4])
                
            gimbal.append(action[0])
            throttle.append(action[1])
            side_thruster.append(action[2])
                
            if len(obs) == 10:
                vel_x.append(states[7])
                vel_y.append(states[8])
                ang_vel.append(states[9])
            
        if np.isscalar(action): 
            action = [action]
        obs, reward, done, _ = env.step(action)
        total_reward += reward
        total_steps += 1
        
        #Simulate wind
        if SIMULATE_WIND:
            if states[5] == 0 and states[6] == 0:
                    env.apply_random_x_disturbance(epsilon=0.005, left_or_right=left_or_right_movement)
                    env.apply_random_y_disturbance(epsilon=0.005)
            
        if done:
            if SAVE_RUN:
                if len(obs) == 10:
                    state_dat=pd.DataFrame(list(zip(x_pos, y_pos, theta, eng_throttle, eng_gimbal, vel_x, vel_y, ang_vel)),\
                        columns=['x_pos', 'y_pos', 'theta', 'engine_throttle', 'engine_gimbal', 'vel_x', 'vel_y', 'ang_vel'])
                else:
                    state_dat=pd.DataFrame(list(zip(x_pos, y_pos, theta, eng_throttle, eng_gimbal)),\
                        columns=['x_pos', 'y_pos', 'theta', 'engine_throttle', 'engine_gimbal'])
                    
                act_dat=pd.DataFrame(list(zip(gimbal, throttle, side_thruster)),columns=['gimbal', 'throttle', 'side_thruster'])
                with pd.ExcelWriter(f"excel_logs\ppo_{total_reward}_{total_steps}.xlsx") as writer:
                    state_dat.to_excel(writer, sheet_name="state")
                    act_dat.to_excel(writer, sheet_name="action")
                
            env.render(close=True)
            break
    print("In %d steps we got %.3f reward" % (total_steps, total_reward))


In [None]:
main()