<LARGE>Select the Runtime > "Change runtime type" menu to enable a GPU accelerator</LARGE>

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

os.makedirs("/content/gdrive/MyDrive/colab_model/rocket/DDPG/", exist_ok=True)

Installing required libraries

In [None]:
!apt-get --purge remove cuda nvidia* libnvidia-*
!dpkg -l | grep cuda- | awk '{print $2}' | xargs -n1 dpkg --purge
!apt-get remove cuda-*
!apt autoremove
!apt-get update

In [None]:
!wget  --no-clobber https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.0.130-1_amd64.deb
!dpkg -i cuda-repo-ubuntu1804_10.0.130-1_amd64.deb
!sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
!apt-get update
!apt-get install cuda-10-0

In [None]:
!apt install python-opengl
!apt install ffmpeg
!apt install xvfb

!pip install cvxpy
!pip install box2d-py
!pip uninstall pyglet -y
!pip uninstall gym -y
!pip install tensorflow==1.15
!pip install pyglet==1.3.2
!pip install gym==0.9.4
!pip install pyvirtualdisplay

<LARGE>Restart the Runtime first before proceeding below</LARGE>

In [None]:
!git clone -b paper-training https://github.com/naufalhisyam/rocket-lander.git
%cd /content/rocket-lander
!ls

In [None]:
from pyvirtualdisplay import Display

display = Display(visible=0, size=(1000, 800))
display.start()

In [None]:
%tensorflow_version 1.x
import os
import numpy as np
from numpy.core.numeric import False_
import pandas as pd
import tensorflow as tf

from control_and_ai.DDPG.ddpg import DDPG
from control_and_ai.DDPG.utils import Utils
from control_and_ai.DDPG.exploration import OUPolicy

from constants import *
from constants import DEGTORAD
from environments.rocketlander import RocketLander, get_state_sample

action_bounds = [1, 1, 15*DEGTORAD]

eps = []
eps.append(OUPolicy(0, 0.2, 0.4))
eps.append(OUPolicy(0, 0.2, 0.4))
eps.append(OUPolicy(0, 0.2, 0.4))

simulation_settings = {'Side Engines': True,
                       'Clouds': True,
                       'Vectorized Nozzle': True,
                       'Graph': False,
                       'Render': False,
                       'Starting Y-Pos Constant': 1,
                       'Initial Force': 'random',
                       'Rows': 1,
                       'Columns': 2,
                       'Episodes': 500}
env = RocketLander(simulation_settings)

#Set both line below to False if you want to contniue training from a saved checkpoint
RETRAIN = True #Restore weights if False
TEST = False #Test the model

NUM_EPISODES = 300
SAVE_REWARD = True #Export reward log as .xlsx
NAME = "test" #Model name

model_dir = '/content/gdrive/MyDrive/colab_model/rocket/DDPG/' + NAME

agent = DDPG(
    action_bounds,
    eps,
    env.observation_space.shape[0], #for first model
    actor_learning_rate=0.0001,
    critic_learning_rate=0.001,
    retrain=RETRAIN,
    log_dir="./logs",
    model_dir=model_dir,
    batch_size=100,
    gamma=0.99)

In [None]:
def train(env, agent):
    obs_size = env.observation_space.shape[0]

    util = Utils()
    state_samples = get_state_sample(samples=5000, normal_state=True)
    util.create_normalizer(state_sample=state_samples)
    if SAVE_REWARD:
        rew = []
        ep = []

    for episode in range(1, NUM_EPISODES + 1):
        old_state = None
        done = False
        total_reward = 0

        state = env.reset()
        state = util.normalize(state)
        max_steps = 500

        left_or_right_barge_movement = np.random.randint(0, 2)

        for t in range(max_steps): # env.spec.max_episode_steps
            old_state = state
            # infer an action
            action = agent.get_action(np.reshape(state, (1, obs_size)), not TEST)

            # take it
            state, reward, done, _ = env.step(action[0])
            state = util.normalize(state)
            total_reward += reward

            if state[LEFT_GROUND_CONTACT] == 0 and state[RIGHT_GROUND_CONTACT] == 0:
                #env.move_barge_randomly(epsilon, left_or_right_barge_movement)
                env.apply_random_x_disturbance(epsilon=0.005, left_or_right=left_or_right_barge_movement)
                env.apply_random_y_disturbance(epsilon=0.005)

            if not TEST:
                # update q vals
                agent.update(old_state, action[0], np.array(reward), state, done)

            if done:
                break

        agent.log_data(total_reward, episode)

        if episode % 50 == 0 and not TEST:
            print('Saved model at episode', episode)
            agent.save_model(episode)
        if SAVE_REWARD:
            rew.append(total_reward)
            ep.append(episode)
        print("Episode:\t{0}\tReward:\t{1}".format(episode, total_reward))
    
    if SAVE_REWARD:
        os.makedirs("excel_logs/eps-rewards/", exist_ok=True)
        reward_data=pd.DataFrame(list(zip(ep,rew)),columns=['episode','reward'])
        with pd.ExcelWriter(f"/content/rocket-lander/excel_logs/eps-rewards/DDPG_eps-rewards_{NAME}_{rew[-1]}_{len(ep)}.xlsx") as writer:
            reward_data.to_excel(writer, sheet_name=f"{NAME}_eps-rewards")
        !cp -a "/content/rocket-lander/excel_logs/eps-rewards/." "{model_dir}"

In [None]:
train(env, agent)