# Dueling DQN with prioritized replay in  Robot Toolbox for Python and Swift.


In [1]:
import torch
import json
import pickle
import random
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from tqdm import trange
from IPython.display import clear_output
from scipy.signal import convolve, gaussian
import os
import io
import base64
import time
import glob

from IPython.display import HTML

import data_panda as rbt
device = 'cpu'
%matplotlib inline


#j3 range -0.08 a 3.75  #j2 range -0.07 a -3. #j1 range -1.8 a 1.76

### Robot and enviroment

Panda robot and obstacle


    Observation:
        Type: Box(4)
        Num     Observation               Min                     Max

        0       Joint1                   -4.8                    4.8
        1       Joint2                    -Inf                    Inf
        2       Joint3                -0.418 rad (-24 deg)    0.418 rad (24 deg)
        
    Actions:
        Type: Discrete 3^3 27 actions
        
        Num   Three actions for each joint
         0     do n joint j
         1     increment angle of joint j
        -1     decrement angle of joint j

        #j3 range 0.0 a 3.7
        #j2 range 0.0 a -3.
        #j1 range -1.7 a 1.7



        



In [2]:
state_shape = 3
env=rbt.Panda_RL()
config = rbt.load_config("config_dueling.yaml")
agent=rbt.DuelingDQNAgent(state_shape, device,layers=config["dueling_layers"], epsilon=0)
env.renderize=True #start/stop robot viewing

In [3]:
env.panda

ERobot: panda (by Franka Emika), 7 joints (RRRRRRR), 1 gripper, geometry, collision
┌─────┬──────────────┬───────┬─────────────┬────────────────────────────────────────────────┐
│link │     link     │ joint │   parent    │              ETS: parent to link               │
├─────┼──────────────┼───────┼─────────────┼────────────────────────────────────────────────┤
│   0 │ panda_link0  │       │ BASE        │                                                │
│   1 │ panda_link1  │     0 │ panda_link0 │ SE3(0, 0, 0.333) ⊕ Rz(q0)                      │
│   2 │ panda_link2  │     1 │ panda_link1 │ SE3(-90°, -0°, 0°) ⊕ Rz(q1)                    │
│   3 │ panda_link3  │     2 │ panda_link2 │ SE3(0, -0.316, 0; 90°, -0°, 0°) ⊕ Rz(q2)       │
│   4 │ panda_link4  │     3 │ panda_link3 │ SE3(0.0825, 0, 0; 90°, -0°, 0°) ⊕ Rz(q3)       │
│   5 │ panda_link5  │     4 │ panda_link4 │ SE3(-0.0825, 0.384, 0; -90°, -0°, 0°) ⊕ Rz(q4) │
│   6 │ panda_link6  │     5 │ panda_link5 │ SE3(90°, -0°, 0°) ⊕ Rz(q5

### Main loop



Intializing with a given state

In [4]:
env.start_scene()

In [5]:
q_far=np.array([ 0., -0.8 ,  0. , -0.0698,  0.,  3.3825,  0.    ])

In [6]:
rbt.evaluate(env, agent, n_games=1, greedy=False, t_max=100)

pybullet build time: Oct 28 2022 16:11:27


(660.2687703402628, 99.0, 0.0, 0.0, 1.8795566196116154, [['Running', '']])

In [7]:
# env.reset_j1=[-1.7,1.7]
# env.reset_j1=[-1.7,-1.]


env.reset()

array([-1.6 , -2.6 ,  0.71])

In [8]:
a=np.array([0,-1,1])

env.step(a)

(array([-1.6 , -2.61,  0.72]), -20.668002602340454, False, ['Running', ''])

In [9]:
env.panda.q=env.q_goal
print(env.step([0,0,1]))
print(sum(env.fitness()))
print(env.step([0,0,-1]))
print(sum(env.fitness()))


(array([ 0.35, -0.84,  3.7 ]), 10.689578002188107, False, ['Running', ''])
0.010005201172680325
(array([ 0.35, -0.84,  3.69]), 100, True, ['Done', 'Completed'])
2.1073424255447017e-08


In [33]:
folder="new_2"
#env.reset_j1=[-1.7,-1.]
agent.play(env,folder,tmax=600)

Final score:2937.821683087388 in 600 steps, minimum distance 0.0020300392991623234
Status: Running 


In [36]:
agent.play(env,folder,tmax=300,model="other")


Final score:2775.5891783840175 in 300 steps, minimum distance 0.03384482789943642
Status: Running 


In [90]:
folder="obs_10_12_13"
#env.reset_j1=[-1.7,-1.]
agent.play(env,folder,tmax=400)

Final score:1579.0398777921607 in 263 steps, minimum distance 1.1552150760724987e-05
Status: Done Completed


In [91]:
agent.play(env,folder,tmax=400,model="other")

Final score:1538.8585640526774 in 272 steps, minimum distance 0.0017304747049568894
Status: Done Completed


In [87]:
#torch.save(agent.state_dict(), "model_ok_2.pt")

In [92]:
agent.play(env,folder,tmax=400,model="last")

Final score:1556.115741654673 in 274 steps, minimum distance 0.0020009016994586766
Status: Done Completed


Activate Ceil obstacle

In [51]:
env.ceil=True
env.close_scene()
env.start_scene()

**Let us record a video of trained agent**

In [None]:
# # Animate learned policy
# save_dir='./videos/'
# #env = make_env(env_name)
# generate_animation(env, agent, save_dir=save_dir)
# [filepath] = glob.glob(os.path.join(save_dir, '*.mp4'))

# display_animation(filepath)

In [None]:
#TODO

# Run this on another environment in OpenAI Gym
# Create a robotic environment with more actions
#