In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from torchvision import models, datasets, transforms
from torch.autograd import Variable
import numpy as np
import os
import collections
import pandas as pd
import random
from PIL import Image, ImageDraw
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from IoU import IoU
from Network import ActorNetwork, CriticNetwork
from replay_buffer import ReplayBuffer

In [2]:
def ReadData(csv_dir, img_dir, image_sets , test = None):
    imgs = []
    for y, cls in enumerate(image_sets):
        if test is not None:
            train_filename = csv_dir+'/val_' + cls + '.csv'
        else:
            train_filename = csv_dir + '/train_' + cls+ '.csv'

        data = pd.read_csv(train_filename)
        name = data.fname.values
        xmin, ymin, xmax, ymax = data.xmin.values, data.ymin.values, data.xmax.values,\
                                        data.ymax.values
        for i, entry in enumerate(name):
            img = img_dir+'/'+entry
            imgs += [(img, y, xmin[i], ymin[i], xmax[i], ymax[i])]      
    return imgs

root_dir = '/home/minty/dataset/VOCdevkit/VOC2012/'
csv_dir = os.path.join(root_dir,'csvs')
img_dir = os.path.join(root_dir,'JPEGImages')
image_sets =  ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',\
      'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', \
      'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
imgs = ReadData(csv_dir,img_dir,image_sets, test = None)
#test_imgs, test_box = ReadData(csv_dir, img_dir, image_sets, test = True)

input_transform = transforms.Compose([
   #transforms.RandomSizedCrop(300),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ],
                        std = [ 0.229, 0.224, 0.225 ]),
])

class Data(data.Dataset):
    def __init__(self, imgs, image_sets, input_transform = None, \
                 target_transform = None ,test = None):
        self.test = test
        self.input_transform = input_transform
        self.target_transform = target_transform
        self.classes = len(image_sets)
        self.imgs = imgs
                 
    def __getitem__(self, index):
        fn, y, xmin, ymin, xmax, ymax = self.imgs[index]
        #y_ = torch.LongTensor([[y]])
        #print y_
        # One hot encoding buffer that you create out of the loop and just keep reusing
        #label = torch.LongTensor(1, self.classes)

        # In your for loop
        #label.zero_()
        #label.scatter_(1, y_, 1)
        #print label
        label = torch.LongTensor([[y]])
        img = Image.open(fn).convert('RGB')
        box = torch.LongTensor([[xmin, ymin, xmax, ymax]])
        #print img.size
        #img = img.resize((300, 300))
        if self.input_transform is not None:
            img = self.input_transform(img)
            #print 'transformed'
        return img, box, label, fn
    
    def __len__(self):
        return len(self.imgs)

img_data = Data(imgs, image_sets,  input_transform = input_transform, target_transform=None\
                , test=None)
#test_data = Data(test_imgs, image_sets,  input_transform = input_transform, target_transform=None\
              #  , test=True)
print(len(img_data.imgs))
#print(len(test_data.imgs))

In [4]:
img_batch = data.DataLoader(img_data, batch_size=1 ,shuffle=True, num_workers = 2)
img, box, label , fn = next(iter(img_batch))
print img, box, label, fn


( 0 , 0 ,.,.) = 
 -0.4054 -0.3541 -0.3541  ...  -1.5528 -1.6213 -1.6213
 -0.4226 -0.3712 -0.3369  ...  -1.5185 -1.5185 -1.5014
 -0.4054 -0.3883 -0.3369  ...  -1.3987 -1.4158 -1.3987
           ...             ⋱             ...          
 -0.4739 -0.8335 -0.2856  ...  -0.9363 -1.5528 -1.7754
 -0.5424 -0.3883 -0.3027  ...  -0.8678 -1.5357 -1.4672
 -0.5938 -0.5767 -0.3198  ...  -1.0390 -1.3302 -1.0733

( 0 , 1 ,.,.) = 
 -0.2850 -0.2850 -0.2850  ...  -1.9482 -1.9832 -1.9832
 -0.3025 -0.3025 -0.2675  ...  -2.0182 -2.0357 -2.0182
 -0.2850 -0.2675 -0.2675  ...  -2.0182 -2.0357 -2.0182
           ...             ⋱             ...          
 -1.1604 -1.6331 -1.3354  ...  -0.8627 -1.5280 -1.7731
 -1.1078 -1.1078 -1.3004  ...  -0.7752 -1.4405 -1.3529
 -1.2129 -1.1954 -1.1429  ...  -0.9503 -1.2304 -0.9328

( 0 , 2 ,.,.) = 
 -0.2358 -0.2184 -0.2184  ...  -1.7347 -1.7347 -1.6999
 -0.2532 -0.2358 -0.2010  ...  -1.7696 -1.7347 -1.6824
 -0.2358 -0.2184 -0.2010  ...  -1.7173 -1.6999 -1.6476
           

In [5]:
resnet = torch.load('resnet184classification.pth')


model = torch.nn.Sequential(
               resnet.conv1,
               resnet.bn1,
               resnet.relu,
               resnet.maxpool,
               resnet.layer1,
               resnet.layer2)
#model = resnet.conv1
#print model(Variable(img)).size()
Extractor = nn.Sequential(*list(resnet.children())[:-2])
#print Extractor(Variable(img))
#have the same result
#print resnet.conv1(Variable(img)).size()

In [6]:
def Transition(box):
    x_img = img[:, :, box[0]:box[2]+1, box[1]:box[3]+1]
    return x_img
#print Transition(box).size()
#box_img = Extractor(Variable(Transition(box)))
#print box_img.size()

def roi_pooling(box_img):
    stride = [int(np.floor(box_img.size(2)/2.0)), int(np.floor(box_img.size(3)/2.0))]
    kernel_size = [int(np.ceil(box_img.size(2)/2.0)), int(np.ceil(box_img.size(3)/2.0))]
    padding = [0,0]
    if box_img.size(2) == 1:
        padding[0] = 1
        stride[0] = 1
        kernel_size[0] = 2
    if box_img.size(3) == 1:
        padding[1] = 1
        stride[1] = 1
        kernel_size[1] = 2
    padding = tuple(padding)
    stride = (stride[0], stride[1])
    kernel_size = (kernel_size[0], kernel_size[1])
    roi_pooling = nn.MaxPool2d(kernel_size=kernel_size, stride=stride,  \
                               padding = padding, dilation=(1, 1))
    #print roi_pooling(box_img).size()
    return roi_pooling(box_img)
#print roi_pooling(box_img).size()


In [7]:
def Cat_State(box_img, img):
    state1 = roi_pooling(box_img).view(-1)
    state2 = roi_pooling(Extractor(Variable(img))).view(-1)
    state = torch.cat((state1, state2)).unsqueeze(0)
    return state

In [8]:
#hyper-parameters
tau = 0.001
actor_lr = 0.01
critic_lr = 0.001
gamma = 0.99
buffer_size = 100
minibatch_size = 8
max_episodes = 5
max_steps = 10
#concatenation of the feature vectors of global image(2048) and box image(2048)
state_dim = 4096 
action_dim = 9
Penalty = 0.1
random_seed = 1234

In [9]:
actor = ActorNetwork(state_dim, action_dim)
critic = CriticNetwork(state_dim, action_dim)

In [10]:
A_STEP = 20
SCALE_STEP = 0.4
#step may be a variable?
class Action(object):
    def __init__(self, box, img):
        self.x_step = img.size(3) / A_STEP
        self.y_step = img.size(2) / A_STEP
        self.enlarge = 1 + SCALE_STEP
        self.shrink = 1 - SCALE_STEP
        self.xmin, self.ymin, self.xmax, self.ymax = box
        self.center_x = (self.xmax + self.xmin) / 2
        self.center_y = (self.ymax + self.ymin) / 2
        self.width = self.xmax - self.xmin
        self.height = self.ymax - self.ymin
        self.narrow = self.width / 5
        
    def Up(self):
        ymin = self.ymin - self.y_step
        ymax = self.ymax - self.y_step
        new_box = torch.LongTensor([[self.xmin, ymin, self.xmax, ymax]])
        #print self.new_box
        return new_box, None
        
    def Down(self):
        ymin = self.ymin + self.y_step
        ymax = self.ymax + self.y_step
        new_box = torch.LongTensor([[self.xmin, ymin, self.xmax, ymax]])
        return new_box, None
    
    def Left(self):
        xmin = self.xmin - self.x_step
        xmax = self.xmax - self.x_step
        new_box = torch.LongTensor([[xmin, self.ymin, xmax, self.ymax]])
        return new_box, None
    
    def Right(self):
        xmin = self.xmin + self.x_step
        xmax = self.xmax + self.x_step
        new_box = torch.LongTensor([[xmin, self.ymin, xmax, self.ymax]])
        return new_box, None
    
    def Enlarge(self):
        xmin = self.center_x - int(self.enlarge * self.width) / 2
        xmax = self.center_x + int(self.enlarge * self.width) / 2
        ymin = self.center_y - int(self.enlarge * self.height) / 2
        ymax = self.center_y + int(self.enlarge * self.height) /2
        new_box = torch.LongTensor([[xmin, ymin, xmax, ymax]])
        return new_box, None
    
    def Shrink(self):
        xmin = self.center_x - int(self.shrink * self.width) / 2
        xmax = self.center_x + int(self.shrink * self.width) / 2
        ymin = self.center_y - int(self.shrink * self.height) / 2
        ymax = self.center_y + int(self.shrink * self.height) / 2
        new_box = torch.LongTensor([[xmin, ymin, xmax, ymax]])
        return new_box, None
    
    def Narrow(self):
        xmin = self.xmin + self.narrow
        xmax = self.xmax - self.narrow
        new_box = torch.LongTensor([[xmin, self.ymin, xmax, self.ymax]])
        return new_box, None
    
    def Stretch(self):
        xmin = self.xmin - self.narrow
        xmax = self.xmax + self.narrow
        new_box = torch.LongTensor([[xmin, self.ymin, xmax, self.ymax]])
        return new_box, None
    
    def Trigger(self):
        new_box,_ = Initial(img)
        return new_box, True
       

In [22]:
def Sample(box_, box, trigger, Trigger, Steps,img):
    terminal = None
    reward = 0    
    while True:
        width = box[2] - box[0]
        height = box[3] - box[1]
        area = width *  height
        if area <= 20 or height < 5:
            box = Action(box,img).Enlarge()[0].squeeze(0)
            reward -= Penalty
        if width < 5:
            box = Action(box, img).Stretch()[0].squeeze(0)
            reward -= Penalty
        #penalty for surpassing boundary
        if box[0] <= action_bound[0]:
            box = Action(box, img).Right()[0].squeeze(0)
            reward -= Penalty
        if box[1] <= action_bound[1]:
            box = Action(box, img).Down()[0].squeeze(0)
            reward -= Penalty
        if box[2] >= action_bound[2]:
            box = Action(box, img).Left()[0].squeeze(0)
            reward -= Penalty
        if box[3] >= action_bound[3]:
            box = Action(box, img).Up()[0].squeeze(0)
            reward -= Penalty
        if area > 20 and box[0] > action_bound[0] and box[1] > action_bound[1] and box[2] < action_bound[2] and box[3] < action_bound[3]:
            break
        
    reward += np.sign(IoU(box, ground_truth) - IoU(box_, ground_truth))
    
    box_img = Extractor(Variable(Transition(box)))
    next_state = Cat_State(box_img,img)
    if trigger is not None:
        if IoU(box, ground_truth) > 0.6:
            reward += 3.0
        else:
            reward -= 3.0
        Trigger += 1
    else:
        Steps += 1
    if Trigger >= 4 or Steps >=10:
        terminal = True
        Trigger = 0
        Steps = 0
    else:
        terminal = None
    return next_state, reward, terminal, Trigger, Steps

In [23]:
def Initial(img):
    num = random.randint(0, 5)
    box = torch.LongTensor([0,  0, img.size(3), img.size(2)])
    center_x = box[2] / 2
    center_y = box[3] / 2
    width = box[2] - box[0]
    height = box[3] - box[1]
    if num == 0:#center
        box[0] = center_x - width / 4
        box[2] = center_x + width / 4
        box[1] = center_y - height / 4
        box[3] = center_y + height / 4
    elif num == 1: #upper left corner
        box[0] = width / 20
        box[1] = height / 20
        box[2] = width / 2
        box[3] = height / 2      
    elif num == 2: #bottom left corner
        box[0] = width / 20
        box[3] = box[3] - height / 20
        box[2] = width / 2
        box[1] = height / 2
    elif num == 3: #upper right corner
        box[1] = height / 20
        box[2] = box[2] - width / 20
        box[0] = width / 2
        box[3] = height / 2
    else: #bottom right corner
        box[2] = box[2] - width / 20
        box[3] = box[3] - height / 20
        box[0] = width / 2
        box[1] = height / 2
        
    box_img = Extractor(Variable(Transition(box)))
    state = Cat_State(box_img,img)
    return box, state
#action_list = ['Up', 'Down', 'Left', 'Right', 'Enlarge' , 'Shrink', 'Narrow', 'Stretch', 'Trigger']
def Generate(a, box, img):
    if a == 0:
        return Action(box, img).Up()
    elif a == 1:
        return Action(box, img).Down()
    elif a == 2:
        return Action(box, img).Left()
    elif a == 3:
        return Action(box, img).Right()
    elif a == 4:
        return Action(box, img).Enlarge()
    elif a == 5:
        return Action(box, img).Shrink()
    elif a == 6:
        return Action(box, img).Narrow()
    elif a == 7:
        return Action(box, img).Stretch()
    else:
        return Action(box, img).Trigger()
        

In [24]:
def CreateDict(target, net):
    new_state_dict = collections.OrderedDict()
    #critic_target.load_state_dict( (1 - tau) * critic_target.state_dict() + tau * critic.state_dict())
    keys = net.state_dict().keys()
    for item in keys:
        params1 = net.state_dict()[item]
        params2 = target.state_dict()[item]
        new_state_dict[item] = (1 - tau) * params2 + tau * params1
    return new_state_dict

In [17]:
def CheckGradientUpdate(target, net):
    for param, shared_param in zip(net.parameters(),
                                                           target.parameters()):
        if shared_param.grad is None:
            shared_param = param.grad
        else:
            shared_param.grad = (1 - tau) * shared_param.grad + tau * param.grad

In [18]:
#pipeline
replay_buffer = ReplayBuffer(buffer_size, random_seed)
optimizer_a = optim.Adam(actor.parameters(), lr = actor_lr)
actor_target = ActorNetwork(state_dim, action_dim)
critic_target = CriticNetwork(state_dim, action_dim)
optimizer_c = optim.Adam(critic.parameters(), lr = critic_lr)
loss_func = nn.MSELoss().cuda()

actor.cuda()
critic.cuda()
actor_target.cuda()
critic_target.cuda()
loss=[]

"""optimizer = optim.Adam([
                {'params': actor.parameters()},
                {'params': critic.parameters(), 'lr': critic_lr}
            ], lr = actor_lr)"""

"optimizer = optim.Adam([\n                {'params': actor.parameters()},\n                {'params': critic.parameters(), 'lr': critic_lr}\n            ], lr = actor_lr)"

In [None]:
img, box, label ,fn= next(iter(img_batch))
ground_truth = box[0,0,:]
Trigger = 0
Steps = 0
box_, s = Initial(img)
action_bound = torch.LongTensor([0,0,img.size(3),img.size(2)])

In [None]:
print box_, img.size(), ground_truth

In [None]:
a_output = actor(s.cuda())
_ , action = torch.max(a_output.data,1)
print action
box, trigger = Generate(action[0], box_, img)
s2, r, t, Trigger, Steps = Sample(box_.squeeze(0), box.squeeze(0), trigger, Trigger, Steps)

target = torch.autograd.Variable(torch.randn(1, 1).unsqueeze(0))
output = critic(s.cuda(), a_output)
print output.data
#update target networks
critic_target.load_state_dict(CreateDict(critic_target, critic))
actor_target.load_state_dict(CreateDict(actor_target, actor))

optimizer_a.zero_grad()
optimizer_c.zero_grad()
#optimizer.zero_grad()

loss_ = loss_func(output,target.cuda())
loss_.backward(retain_graph = True)
optimizer_c.step()
optimizer_a.step()
#optimizer.step()



In [None]:
target = torch.autograd.Variable(torch.randn(1, 1).unsqueeze(0))
output = critic(s.cuda(), a_output)
print output.data
#update target networks
critic_target.load_state_dict(CreateDict(critic_target, critic))
actor_target.load_state_dict(CreateDict(actor_target, actor))

optimizer_a.zero_grad()
optimizer_c.zero_grad()
#optimizer.zero_grad()

loss_ = loss_func(output,target.cuda())
loss_.backward(retain_graph = True)
optimizer_c.step()
optimizer_a.step()
#optimizer.step()
#print('state: {}, action: {}'.format(s, a_output))
_ , action = torch.max(actor(s.cuda()).data,1)
#print actor.state_dict()['model.0.weight']
#print actor.state_dict()['model.0.bias']
print 'action:', action



In [None]:
print predicted_q_value

In [None]:
for i in xrange(5):
    img, box, label ,fn= next(iter(img_batch))
    ground_truth = box[0,0,:]
    Trigger = 0
    Steps = 0
    box_, s = Initial(img)
    action_bound = torch.LongTensor([0, 0, img.size(3),img.size(2)])
    count = 0

    while True:
        count = count + 1
        a_output = actor(s.cuda())
        _ , action = torch.max(a_output.data,1)
        print action
        
        box, trigger = Generate(action[0], box_, img)
        s2, r, t, Trigger, Steps = Sample(box_.squeeze(0), box.squeeze(0), trigger, Trigger, Steps,img)
       # if Trigger != 0:
            
        replay_buffer.add(s, a_output, r , t, s2)
        if replay_buffer.size() > minibatch_size:
            s_batch, a_batch, r_batch, t_batch, s2_batch \
            = replay_buffer.sample_batch(minibatch_size)
            
            r_batch_tensor = torch.FloatTensor(minibatch_size)
            y = Variable(torch.FloatTensor(minibatch_size))
            
            for k in range(minibatch_size):
                s2 = s2_batch[k].unsqueeze(0)
                target_a = actor_target(s2.cuda())
                target_q = critic_target(s2.cuda(), target_a)
                target_q = target_q.squeeze(0)
                r_batch_tensor[k] = r_batch[k]

                if t_batch[k] is not None:
                    y[k] = r_batch_tensor[k]
                else:
                    y[k] = r_batch_tensor[k] + gamma * target_q
            
            for m in range(minibatch_size):
                state = s_batch[m].unsqueeze(0)
                state = state.cuda()
                #action = a_batch[8 * m: 8 * (m+1)].unsqueeze(0)
                action = actor(state)
                predicted_q_value = y[m].cuda()
                target = torch.autograd.Variable(predicted_q_value.data.unsqueeze(0))

                output = critic(state, action)
                #update target networks
                critic_target.load_state_dict(CreateDict(critic_target, critic))
                actor_target.load_state_dict(CreateDict(actor_target, actor))

                optimizer_a.zero_grad()
                optimizer_c.zero_grad()
                #optimizer.zero_grad()

                loss_ = loss_func(output,target)
                loss_.backward(retain_graph = True)
                # check the target networks
                CheckGradientUpdate(actor_target, actor)
                CheckGradientUpdate(critic_target, critic)
                torch.nn.utils.clip_grad_norm(actor.parameters(), 50)
                optimizer_c.step()
                optimizer_a.step()
                #optimizer.step()
                #print output.data

            loss.append(loss_.data[0])
            print('step: {}, loss: {}'.format(count + 1, loss_.data[0]))
        if t is True:
            print "one img terminated"
            break
        s = s2
        

In [19]:
class Net(nn.Module):
    def __init__(self, ):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(state_dim, 10)
        self.fc1.weight.data.normal_(0, 0.1)   # initialization
        self.out = nn.Linear(10, action_dim)
        self.out.weight.data.normal_(0, 0.1)   # initialization

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        actions_value = self.out(x)
        return actions_value

In [None]:
print box_,s
print img.size()

In [20]:
class DQN(object):
    def __init__(self):
        self.eval_net, self.target_net = Net().cuda(), Net().cuda()

        self.learn_step_counter = 0                                     # for target updating
        self.memory_counter = 0                                         # for storing memory
        self.memory = Variable(torch.zeros(50, state_dim * 2 + 2))    # initialize memory
        self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=critic_lr)
        self.loss_func = nn.MSELoss()

    def choose_action(self, x):
        #x = Variable(torch.unsqueeze(torch.FloatTensor(x), 0))
        # input only one sample
        if np.random.uniform() < 0.9:   # greedy
            a_output = self.eval_net.forward(x.cuda())
            _ , action = torch.max(a_output.data,1)
            #action = torch.LongTensor(([action.numpy()[0]]))   # return the argmax
        else:   # random
            action = np.random.randint(0, action_dim)
            action = torch.LongTensor(([action])).cuda()
        return action

    def store_transition(self, s, a, r, s_):
        a = a.cpu().numpy()[0]
        transition = torch.cat((s.cuda(), Variable(torch.FloatTensor(([a]))).unsqueeze(0).cuda(), \
                                Variable(torch.FloatTensor(([r])).unsqueeze(0)).cuda()\
                                , s_.cuda()),1)
        # replace the old memory with new memory
        index = self.memory_counter % 50 #buffer_size
        self.memory[index, :] = transition
        self.memory_counter += 1

    def learn(self):
        # target parameter update
        if self.learn_step_counter % 10 == 0:
            self.target_net.load_state_dict(self.eval_net.state_dict())
        self.learn_step_counter += 1

        # sample batch transitions
        sample_index = np.random.choice(50, 10)
        b_memory = self.memory[sample_index, :]
        b_s = b_memory[:, :state_dim].cuda()
        b_a = b_memory[:, state_dim:state_dim+1].data.numpy().astype(int)
        b_a = Variable(torch.from_numpy(b_a)).cuda()
        b_r = b_memory[:, state_dim+1:state_dim+2].cuda()
        b_s_ = b_memory[:, -state_dim:].cuda()

        # q_eval w.r.t the action in experience
        q_eval = self.eval_net(b_s).gather(1, b_a)  # shape (batch, 1)
        
        q_next = self.target_net(b_s_).detach()     # detach from graph, don't backpropagate
        q_target = b_r + gamma * q_next.max(1)[0].unsqueeze(1)   # shape (batch, 1)
        q_target = torch.autograd.Variable(q_target.data)
        loss = self.loss_func(q_eval, q_target)

        self.optimizer.zero_grad()
        loss.backward(retain_graph = True)
        self.optimizer.step()
        

In [None]:
img, box, label ,fn= next(iter(img_batch))
ground_truth = box[0,0,:]
Trigger = 0
Steps = 0
box_, s = Initial(img)
action_bound = torch.LongTensor([0,0,img.size(2),img.size(3)])

In [None]:
#test for DQN
#a = dqn.choose_action(s)
#test for DDPG
a_output = actor(s.cuda())
_ , a = torch.max(a_output.data,1)

print a
# take action
box, trigger = Generate(a[0], box_, img)
s_, r, t, Trigger, Steps = Sample(box_.squeeze(0), box.squeeze(0), trigger, Trigger, Steps)
#DQN
#box, trigger = Generate(a.cpu().numpy()[0] , box_, img)
#s_, r, terminal, Trigger, Steps = Sample(box_.squeeze(0), box.squeeze(0), trigger, Trigger, Steps)


In [None]:
dqn = DQN()
print('\nCollecting experience...')
for i_episode in range(10):
    img, box, label ,fn= next(iter(img_batch))
    ground_truth = box[0,0,:]
    Trigger = 0
    Steps = 0
    box_, s = Initial(img)
    action_bound = torch.LongTensor([0,0,img.size(2),img.size(3)])
    ep_r = 0
    while True:
        a = dqn.choose_action(s)
        print a
        # take action
        box, trigger = Generate(a.cpu().numpy()[0] , box_, img)
        s_, r, terminal, Trigger, Steps = Sample(box_.squeeze(0),\
                                                 box.squeeze(0), trigger, Trigger, Steps, img)

        # modify the reward
        """x, x_dot, theta, theta_dot = s_
        r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8
        r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5
        r = r1 + r2"""

        dqn.store_transition(s, a, r, s_)

        ep_r += r
        if dqn.memory_counter > 50:
            print "\nStarting training..."
            dqn.learn()
            if terminal is True:
                print('Ep: ', i_episode,
                      '| Ep_r: ', round(ep_r, 2))

        if terminal is True:
            print ("Trigger:{} Steps:{}".format(Trigger, Steps))
            break
        s = s_


Collecting experience...

 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 8
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]

Trigger:0 Steps:0

 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.LongTensor of size 1 (GPU 0)]


 3
[torch.cuda.

In [None]:
def PlotBox(img, box):
    #img = Image.open(img)
    draw = ImageDraw.Draw(img)
    #plt.imshow(img_e)
    draw.rectangle(box, outline = 'red')  
    #draw.rectangle((0,0, box[2], box[3]), outline = 'red')  
    plt.imshow(img)
    plt.show()

In [None]:
img, box, label ,fn= next(iter(img_batch))
img_e = Image.open(fn[0])
ground_truth = box[0,0,:]
box = (ground_truth[0],ground_truth[1],ground_truth[2],ground_truth[3])
PlotBox(img_e ,box)
action_bound = torch.LongTensor([0,0,img.size(2),img.size(3)])

In [None]:
initial_box, s = Initial(img)

#a = dqn.choose_action(s)
#newbox, trigger = Generate(a.cpu().numpy()[0], initial_box, img)
a_output = actor(s.cuda())
_ , a = torch.max(a_output.data,1)

print a
# take action
newbox, trigger = Generate(a[0], initial_box, img)

In [None]:
print action_bound

In [None]:
s2, r, t, Trigger, Steps = Sample(initial_box.squeeze(0), newbox.squeeze(0), trigger, Trigger, Steps)
newbox = newbox.squeeze(0)

In [None]:
#print newbox
#a = dqn.choose_action(s2)
a_output = actor(s2.cuda())
_ , a = torch.max(a_output.data,1)
print a
#newbox1, trigger = Generate(a.cpu().numpy()[0], newbox, img)
newbox1, trigger = Generate(a[0], newbox, img)
s2, r, t, Trigger, Steps = Sample(newbox.squeeze(0), newbox1.squeeze(0), trigger, Trigger, Steps)
print r, Steps, s2, newbox1


In [None]:
newbox = newbox1.squeeze(0)
box = (newbox[0],newbox[1],newbox[2],newbox[3])
PlotBox(img_e ,box)
print box

In [None]:
print a_output
_, action = torch.max(a_output.data,1)
print action

In [None]:
torch.save(actor, 'actor1.pth')
torch.save(critic, 'critic1.pth')
torch.save(actor_target, 'actor_target1.pth')
torch.save(critic_target, 'critic_target1.pth')

In [None]:
def extract_grads(network):
    return [p.grad.data.numpy()*2 for p in list(network.parameters())]


def update(network, grad_in, grad_out):
    grads = extract_grads(network)
    grad_out = grads

network.register_backward_hook(self.update)

In [None]:
# img, box, label = next(iter(img_batch))
box_, s = Initial(img)
action,t = Generate(0, box_)
print s
#b = Action(box_)
#print b.a()

In [None]:
# the Ornstein-Uhlenbeck stochastic process for control tasks
class OrnsteinUhlenbeckActionNoise:
    def __init__(self, mu, sigma = 0.3, theta=.15, dt=1e-2, x0=None):
        self.theta = theta
        self.mu = mu
        self.sigma = sigma
        self.dt = dt
        self.x0 = x0
        self.reset()

    def __call__(self):
        x = self.x_prev + self.theta * (self.mu - self.x_prev) * self.dt + \
        self.sigma * np.sqrt(self.dt) * np.random.normal(size=self.mu.shape)
        self.x_prev = x
        return x

    def reset(self):
        self.x_prev = self.x0 if self.x0 is not None else np.zeros_like(self.mu)

    def __repr__(self):
        return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self.mu, self.sigma)

In [None]:
#reference for mutiple branches
class mm(nn.Module):
    def __init__(self):
        super(mm, self).__init__()
        self.n = nn.Linear(4,3)
        self.m = nn.Linear(3,2)
        self.m2 = nn.Linear(3,4)
    def forward(self, input, input2):
        input_ = self.n(input)
        input2_ = self.n(input2)
        o1 = self.m(input_)
        o2 = self.m2(input2_)
        return o1, o2

o1, o2 = mm(input)
o = o1 + o2
# loss

## Or you can do

l1 = loss(o1, target)
l2 = loss2(o2, target2)
torch.autograd.backward([l1, l2])