# Dueling DQN with prioritized replay in  Robot Toolbox for Python and Swift.


In [1]:
import torch
import json
import pickle
import random
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from tqdm import trange
from IPython.display import clear_output
from scipy.signal import convolve, gaussian
import os
import io
import base64
import time
import glob

from IPython.display import HTML

import data_panda as rbt
device = 'cpu'
%matplotlib inline


#j3 range -0.08 a 3.75  #j2 range -0.07 a -3. #j1 range -1.8 a 1.76

### Robot and enviroment

Panda robot and obstacle


    Observation:
        Type: Box(4)
        Num     Observation               Min                     Max

        0       Joint1                   -4.8                    4.8
        1       Joint2                    -Inf                    Inf
        2       Joint3                -0.418 rad (-24 deg)    0.418 rad (24 deg)
        
    Actions:
        Type: Discrete 3^3 27 actions
        
        Num   Three actions for each joint
         0     do n joint j
         1     increment angle of joint j
        -1     decrement angle of joint j

        #j3 range 0.0 a 3.7
        #j2 range 0.0 a -3.
        #j1 range -1.7 a 1.7



        



In [2]:
state_shape = 3
env=rbt.Panda_RL()
config = rbt.load_config("config_dueling.yaml")
agent=rbt.DuelingDQNAgent(state_shape, device,layers=config["dueling_layers"], epsilon=0)
env.renderize=True #start/stop robot viewing

In [3]:
env.panda

ERobot: panda (by Franka Emika), 7 joints (RRRRRRR), 1 gripper, geometry, collision
┌─────┬──────────────┬───────┬─────────────┬────────────────────────────────────────────────┐
│link │     link     │ joint │   parent    │              ETS: parent to link               │
├─────┼──────────────┼───────┼─────────────┼────────────────────────────────────────────────┤
│   0 │ panda_link0  │       │ BASE        │                                                │
│   1 │ panda_link1  │     0 │ panda_link0 │ SE3(0, 0, 0.333) ⊕ Rz(q0)                      │
│   2 │ panda_link2  │     1 │ panda_link1 │ SE3(-90°, -0°, 0°) ⊕ Rz(q1)                    │
│   3 │ panda_link3  │     2 │ panda_link2 │ SE3(0, -0.316, 0; 90°, -0°, 0°) ⊕ Rz(q2)       │
│   4 │ panda_link4  │     3 │ panda_link3 │ SE3(0.0825, 0, 0; 90°, -0°, 0°) ⊕ Rz(q3)       │
│   5 │ panda_link5  │     4 │ panda_link4 │ SE3(-0.0825, 0.384, 0; -90°, -0°, 0°) ⊕ Rz(q4) │
│   6 │ panda_link6  │     5 │ panda_link5 │ SE3(90°, -0°, 0°) ⊕ Rz(q5

### Main loop



Intializing with a given state

In [4]:
env.start_scene()

In [5]:
q_far=np.array([ 0., -0.8 ,  0. , -0.0698,  0.,  3.3825,  0.    ])

In [6]:
rbt.evaluate(env, agent, n_games=1, greedy=False, t_max=100)

pybullet build time: Oct 28 2022 16:11:27


(82.06092599791205,
 13.0,
 1.0,
 0.0,
 2.14601889016358,
 [['Termination', 'Collided']])

In [7]:
# env.reset_j1=[-1.7,1.7]
# env.reset_j1=[-1.7,-1.]


env.reset()

array([-1.6 , -2.6 ,  0.71])

In [8]:
a=np.array([0,-1,1])

env.step(a)

(array([-1.6 , -2.61,  0.72]), -20.24427988614672, False, ['Running', ''])

In [14]:
env.panda.q=env.q_goal
env.sig_R=1
print(env.step([1,1,-1]))
print(env.fitness())
# print(env.step([0,0,1]))
# print(env.fitness())

(array([ 0.36, -0.83,  3.68]), -5.0, False, ['Running', ''])
(3.3422562987665234, 2.49)


In [16]:
env.Pg

array([[ 8.80000000e-02],
       [-2.16027695e-17],
       [ 8.22600000e-01]])

In [12]:
env.get_position()

array([5.71989244e-01, 1.08164922e-17, 1.03964954e+00])

In [15]:
env.get_current_RP()

(array([[-5.62233992e-01, -5.62233992e-01,  6.06453523e-01],
        [ 7.07106781e-01, -7.07106781e-01,  7.37577446e-18],
        [ 4.28827399e-01,  4.28827399e-01,  7.95118937e-01]]),
 array([[5.71989244e-01],
        [1.08164922e-17],
        [1.03964954e+00]]))

In [20]:
env.set_goal()

In [19]:
env.distance()

3.253498010697011

In [None]:

print(env.step([0,1,-1]))

#print(env.fitness())
#j=[(-1.8,1.76), (-3,0),(0,3.75)]




In [11]:
env.get_q()

[0.36, -0.83, 3.68]

In [12]:
folder="new_19"
#env.reset_j1=[-1.7,-1.]
agent.play(env,folder,tmax=500)

FileNotFoundError: [Errno 2] No such file or directory: 'runs/new_19/best-model-rw.pt'

In [None]:
agent.play(env,folder,tmax=500,model="other")


In [None]:
agent.play(env,folder,tmax=500,model="last")

In [None]:
DIR='runs/results_25/'
print(glob.glob(DIR+'/*.pt'))

**Let us record a video of trained agent**

In [None]:
# # Animate learned policy
# save_dir='./videos/'
# #env = make_env(env_name)
# generate_animation(env, agent, save_dir=save_dir)
# [filepath] = glob.glob(os.path.join(save_dir, '*.mp4'))

# display_animation(filepath)

In [None]:
#TODO

# Run this on another environment in OpenAI Gym
# Create a robotic environment with more actions
#