In [1]:
## This contains the test procedure for vision to weights
## Author : Avadesh Meduri
## Date : 31/05/2022
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import os
import time
import pathlib

python_path = pathlib.Path('.').absolute().parent/'python'
os.sys.path.insert(1, str(python_path))

In [3]:
## This is demo for kuka reaching a desired point with diff_qp
## Author : Avadesh Meduri
## Date : 25/02/2022
import time
import numpy as np
import pinocchio as pin
from robot_properties_kuka.config import IiwaConfig
from vocam.diff_pin_costs import DiffFrameTranslationCost, DiffFrameVelocityCost

import meshcat
import meshcat.transformations as tf
import meshcat.geometry as g

from torch.utils.data import Dataset
import torch
import torchvision
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data
from torchvision.io import read_image
from torch.utils.data import DataLoader, RandomSampler, Sampler
import time
from PIL import Image
from skimage.io import imread
import numba
from torchvision.transforms import ToTensor, ToPILImage, Resize
from IPython.display import display, clear_output
from matplotlib import pyplot as plt

from vocam.inverse_qp import IOC

import torch
from torch.autograd import Function
from vocam.forward_pass import IOCForwardPassWithoutVision
from vocam.nets import Net

from vocam.qpnet import QPNet

In [4]:
robot = IiwaConfig.buildRobotWrapper()
model, data = robot.model, robot.data
f_id = model.getFrameId("EE")

In [5]:
viz = pin.visualize.MeshcatVisualizer(robot.model, robot.collision_model, robot.visual_model)
viz.initViewer(open=False)
viz.loadViewerModel()

You can open the visualizer by visiting the following URL:
http://127.0.0.1:7001/static/


In [6]:
class BoxDataSet(Dataset):
    
    def __init__(self, fnames, mean = None, std = None, rgbd = True, resize = (224,224)):
        
        self.rgbd = rgbd
        self.resize = resize
        self.y_len = [0]
        self.img_dir = []
        for i in range(len(fnames)):
            self.img_dir.append("../vision/image_data/data" + str(fnames[i]))
            self.data = np.load("../vision/position_data/data" + str(fnames[i]) + ".npz")
            if i == 0:
                self.y_train = torch.tensor(self.data["position"]).float()
                self.y_len.append(len(self.data["position"])-1)
                
            else:
                self.y_train = torch.vstack((self.y_train, torch.tensor(self.data["position"]).float()))
                self.y_len.append(self.y_len[-1] + len(self.data["position"]))
        
        if isinstance(mean, np.ndarray) and isinstance(std, np.ndarray):
            print("using given mean")
            self.mean = mean
            self.std = std
            self.y_train = (self.y_train - self.mean)/self.std
        else:
            self.mean = torch.mean(self.y_train, axis = 0)
            self.std = torch.std(self.y_train, axis = 0)
            print(self.mean, self.std)
            self.y_train = (self.y_train - self.mean)/self.std
                
    def get_data(self, gidx):
        
        
        b_idx = max(np.searchsorted(self.y_len, gidx)-1,0) # which dir to look into
        idx = max(gidx - self.y_len[b_idx] - 1,0) # relative idx
        
#         print(type(imread(self.img_dir[b_idx] + "/color_" + str(idx) + ".jpg")))
        image = ToTensor()(imread(self.img_dir[b_idx] + "/color_" + str(idx) + ".jpg"))
        if self.rgbd:
            d_image = ToTensor()(imread(self.img_dir[b_idx] + "/depth_" + str(idx) + ".jpg"))
            image = torch.vstack((image, d_image))
            image = transforms.functional.crop(image,  50, 100, 180, 180)

        else:
            image = transforms.functional.crop(image,  50, 100, 180, 180)
            image = transforms.Resize(self.resize)(image)                    

        label = self.y_train[gidx]
        
        return image.float()[None,:,:,:], label



In [7]:
class C_Net_encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv11 = nn.Conv2d(4, 64, 3)
        self.conv12 = nn.Conv2d(64, 64, 3)

        self.pool = nn.MaxPool2d(2, 2)
        
        self.conv21 = nn.Conv2d(64, 128, 3)
        self.conv22 = nn.Conv2d(128, 128, 3)

        self.conv31 = nn.Conv2d(128, 256, 3)
        self.conv32 = nn.Conv2d(256, 256, 3)
        self.conv33 = nn.Conv2d(256, 256, 3)

        
        self.conv41 = nn.Conv2d(256, 512, 3)
        self.conv42 = nn.Conv2d(512, 512, 3)
        
        self.fc1 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 3)

    def forward(self, x):
        x = F.relu(self.conv11(x))
        x = self.pool(F.relu(self.conv12(x)))
        
        x = F.relu(self.conv21(x))
        x = self.pool(F.relu(self.conv22(x)))
        
        x = self.pool(F.relu(self.conv31(x)))
        x = self.pool(F.relu(self.conv32(x)))
        
        x = self.pool(F.relu(self.conv41(x)))
        x = F.relu(self.conv42(x))
            
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        enc = x
        x = F.relu(self.fc1(x))
        x = self.fc3(x)
        return x, enc

In [14]:
nq = model.nq
nv = model.nv
q0 = [np.pi/16.0, -np.pi/16.0, 0, 0, 0, 0, 0]
x_init = np.concatenate([q0, pin.utils.zero(model.nv)])

n_col = 5
u_max = [2.5,2.5,2.5, 1.5, 1.5, 1.5, 1.0]
n_vars = 3*nq*n_col+2*nq
dt = 0.05

isvec = True
lr = 1e-1
max_eps = 100


nn_dir = "../models/qpnet_91.pt"
net= QPNet(2*nq + 3, 2*n_vars).eval()
net.load(nn_dir)

# iocfp = IOCForwardPassWithoutVision(net, u_max=u_max)

dtc = DiffFrameTranslationCost.apply
# for the vision part
indices = [1,2,3,4,5,6,7,8,9]
mean = np.array([0.3068, 0.1732, 0.4015])
std = np.array([0.1402, 0.2325, 0.1624])
dl = BoxDataSet(indices, mean = mean, std = std, rgbd = True, resize = (224,224))

encoder = C_Net_encoder()
encoder.load_state_dict(torch.load("../vision/models/cnn5", map_location=torch.device('cpu')))

## EncoderNet
nn_dir = "../models/e2eNet4"
encoder_net= QPNet(2*nq + 512, 2*n_vars).eval()
encoder_net.load(nn_dir)
iocfp = IOCForwardPassWithoutVision(encoder_net, u_max=u_max)


using given mean


In [15]:
buffer_size = 10000
n_mpc = 25
d_tol = 0.5 # how close the ee should be to the ball to be accepted as a good data point

loss = torch.nn.MSELoss()
q_des_arr = np.array([[2.1789238e-02,  3.3214998e-01, -1.4518893e-04, -8.7141126e-01,
                          6.0329604e-01, -1.3965217e-03,  1.4794523e-04],
                      [1.3737, 0.9711, 1.6139, 1.2188, 1.5669, 0.1236, 0.2565]])

In [16]:
for k in range(buffer_size):

    if k % n_mpc == 0:
                        
        with torch.no_grad():
            image, label = dl.get_data(np.random.randint(dl.y_train.shape[0]))
            pred_loc, encoding = encoder(image)
            error = loss(label.unsqueeze(0), pred_loc).numpy()
            pred_loc = pred_loc*std + mean
            x_des = label*std + mean
            x_des[0] += 0.3
            
        viz.viewer["box"].set_object(g.Sphere(0.05), 
                         g.MeshLambertMaterial(
                             color=0xff22dd,
                             reflectivity=0.8))
        viz.viewer["box"].set_transform(tf.translation_matrix(x_des.detach().numpy()))
        
        if np.random.randint(2) == 0 or k == 0:
            x_init = np.zeros(2*nq)
            x_init[0:nq] = q_des_arr[0] + 0.3*2*(np.random.rand(nq) - 0.5)
            x_init[0] -= 2*0.5*(np.random.rand(1) - 0.5)
            x_init[2] -= 2*0.3*(np.random.rand(1) - 0.5)
            x_init[nq:] = 0.7*2*(np.random.rand(nv) - 0.5)
    
    else:
        x_init = x_pred[3*nq*(n_col-1):3*nq*(n_col-1) + 2*nq]
    
    x_pred = iocfp.predict_encoder(x_init, encoding)
#     x_pred = iocfp.predict(x_init[0:nq], x_init[nq:2*nq], x_des)    
    
    print("Index :" + str(k) + "/" + str(buffer_size) + " Encoder error : " + str(np.round(error,4)), end = '\r', flush = True)
    
    for i in range(n_col+1):
        q = x_pred[3*nq*i:3*nq*i + nq]
        dq = x_pred[3*nq*i + nq:3*nq*i + 2*nq]

        pin.forwardKinematics(model, data, q, dq, np.zeros(nv))
        pin.updateFramePlacements(model, data)

        viz.display(q)
        time.sleep(0.01)

Index :143/10000 Encoder error : 0.0028

KeyboardInterrupt: 