## Setup

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [3]:
import sys
sys.path.insert(0, '/home/ravi/Desktop/eelfff')

In [4]:
import copy
import datetime
from datetime import datetime
from FireSimulator import *
from FireSimulatorUtilities import *
import glob
import itertools
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import time

%matplotlib inline
%load_ext autoreload
%autoreload 2

## Reward function

In [5]:
def eelfff_reward(traj, img_st, center, close_pos, id_compare):
    reward = 0
    fire_neigh = [(-1,0),(0,-1),(1,0),(0,1)]
    move_neigh = [(-1,0),(1,0),(-1,1),(0,1),(1,1),(-1,-1),(0,-1),(1,-1)] #excluded (0,0)
    img_dim = img_st.shape[0]
    
    x1,y1 = traj[0]
    x2,y2 = traj[1]
    
    r = -y2 + y1 + img_dim//2
    c =  x2 - x1 + img_dim//2
    
    if img_st[r,c] in [1]:
        counter = 0
        for (dr,dc) in fire_neigh:
            rn = r + dr
            cn = c + dc
            if img_st[rn,cn]==0:
                counter += 1

        if counter > 0:
            reward += 1
        else:
            reward += -2
            
    elif img_st[r,c] in [0]:
        counter = 0
        for (dr,dc) in move_neigh:
            rn = r + dr
            cn = c + dc
            if img_st[rn,cn] in [1,2]:
                counter += 1
            
        if counter > 0:
            reward += 0.5
        else:
            reward += -1
            
    #if np.linalg.norm(cir_pos-close_pos,2)<=1 and agent_id > close_agent_id:
    if id_compare and np.linalg.norm(traj[1]-close_pos,2)<=1: 
        reward += -10
    elif id_compare and np.linalg.norm(traj[0]-close_pos,2)<=1 and np.linalg.norm(traj[1]-close_pos,2)>1:
        reward += 1
            
    move_vec = np.array([x2-x1,y2-y1])
    if (x2-x1) != 0 and (y2-y1) != 0:
        move_vec = move_vec / np.linalg.norm(move_vec,2)
       
    cen_vec = np.array([x1-center,y1-center])
    cen_vec = cen_vec / np.linalg.norm(cen_vec,2)
    score = -1*np.cross(cen_vec,move_vec)
    
    #reward += np.max([score,0])/score + 2*np.min([score,0])
    if score >= 0:
        reward += 1
    #else:
    #    reward += -2

    return reward

## DQN architecture

In [6]:
torch.cuda.is_available()

True

In [7]:
dtype = torch.cuda.FloatTensor

In [8]:
class eelfff(nn.Module):
    
    def __init__(self, img_dim=8):
        super(eelfff, self).__init__()
        self.img_dim = img_dim
        
        # inputs: image + rot vec + id compare + pos-other_pos
        self.net = nn.Sequential(
                                nn.Linear(self.img_dim**2 + 2 + 1 + 2, 2048),
                                nn.ReLU(inplace=True),
                                nn.Linear(2048, 2048),
                                nn.ReLU(inplace=True),
                                nn.Linear(2048, 9)
                            )

    def forward(self, feat):
        return self.net(feat)


In [9]:
tic = time.clock()
N = 4
img_dim = 3

model = eelfff(img_dim).type(dtype)
feat = Variable(torch.randn(N,img_dim**2+2+1+2)).type(dtype)
Q = model(feat)
toc = time.clock()

print(Q.size())
print("%0.2fs = %0.2fm elapsed for this test" %(toc-tic,(toc-tic)/60))

torch.Size([4, 9])
396.41s = 6.61m elapsed for this test


## load the network

In [10]:
img_dim = 3
model = eelfff(img_dim=img_dim).type(dtype)

In [11]:
filename = '/home/ravi/Desktop/eelfff/networks/simple_ext-26-Aug-2017-15:17.pth.tar'

checkpoint = torch.load(filename)
model.load_state_dict(checkpoint['state_dict'])

## Benchmark network solution

In [12]:
results = {}

In [19]:
grid_size = 50
num_agents = 10
base_station = np.array([5,5])

capacity = 10

fire_init = None
# x = math.ceil(grid_size / 2)
# deltas = [q for q in range(-5,5,1)]
# neighbors = itertools.product(deltas,deltas)
# for (dx,dy) in neighbors:
#     xn = x + dx
#     yn = x + dy
#     fire_init.append((xn,yn))

dp = 0.15/0.2763
repeat_lim = 6
center = (grid_size+1)/2
spawn_loc = np.arange(grid_size//3//2,grid_size,grid_size//3)
perturbs = np.arange(-grid_size//3//2+1,grid_size//3//2+1,1)

seeds = [1000]

In [20]:
st = datetime.today().strftime('%Y-%m-%d %H:%M:%S')
print('[%s] start' % st)

tic = time.clock()
for i,s in enumerate(seeds):
    np.random.seed(1000+s)
    results[i] = {}

    # initialize simulator
    if fire_init is None:
        sim = FireSimulator(grid_size, rng=s)
        sim.step([]) # start fire in domain
    else:
        sim = FireSimulator(grid_size, rng=s, fire_init=fire_init)
    
    num_init_fires = len(sim.fires)

    # initialize agent position
    n = num_agents
    agent_steps = 0
    agent_pos = np.random.choice(spawn_loc, (n,2)) + np.random.choice(perturbs, (n,2))
    agent_pos = np.squeeze(agent_pos).astype(np.int32)
    agent_data = {}
    for k in range(n):
        agent_data[k] = {}
        agent_data[k]['sf'] = False
        agent_data[k]['cap'] = capacity
        agent_data[k]['reward'] = 0
        agent_data[k]['rewardsteps'] = 0
    
    control = []
    repeat_ctr = 1

    new_agent_pos = np.zeros((n,2)).astype(np.int32)

    while not sim.end:        
        # calculate action for each agent
        for k in range(num_agents):
            img, img_st, _ = CreateImageBW(sim.state, agent_pos[k,:])
            if img_st[8//2,8//2] in [1,2]:
                agent_data[k]['sf'] = True                

            dists = [(np.linalg.norm(agent_pos[k,:]-p,2),j,p) for j,p in enumerate(agent_pos) if j!=k]
            min_dist, min_id, min_pos = min(dists)                
                
            if not agent_data[k]['sf']:
                
                dists = []
                for idx,a in enumerate([2,5,7,4,1,3,8,6]):
                    new_pos = actions_to_trajectory(agent_pos[k,:],[a])[1]
                    incntv = -(8-idx)*0.1
                    dists.append((np.abs(center-new_pos[0])+np.abs(center-new_pos[1])+incntv,new_pos,a))

                score, pos, action = min(dists)
                traj = actions_to_trajectory(agent_pos[k,:], [action])

            else:
                rot_vec = agent_pos[k,:] - center
                rot_vec = rot_vec / np.linalg.norm(rot_vec,2)
                rot_vec = np.array([rot_vec[1],-rot_vec[0]])

                pos_vec = agent_pos[k,:] - min_pos
                if pos_vec[0]!=0 and pos_vec[1]!= 0:
                    pos_vec = pos_vec / np.linalg.norm(pos_vec,2)

                state = np.concatenate((img[3:6,3:6].reshape((img_dim**2,)), rot_vec, 
                                        np.asarray(k>min_id)[np.newaxis], pos_vec))
            
                state = Variable(torch.from_numpy(state)).type(dtype)
                Q = model(state.unsqueeze(0))[0].data.cpu().numpy()
                action = np.argmax(Q)
                traj = actions_to_trajectory(agent_pos[k,:], [action])

            if agent_data[k]['sf']:
                reward = eelfff_reward(traj, img_st, center, min_pos, k>min_id)
                agent_data[k]['reward'] += reward
                agent_data[k]['rewardsteps'] += 1
            
            # generate control from trajectory
            # account for capacity constraint
            agent_control = FindGridIntersections(sim.state, traj)
            for el in agent_control:
                agent_data[k]['cap'] -= 1
                control.extend([el]) 
                if agent_data[k]['cap'] <= 0:
                    break
                    
            control = list(set(control))

            # update agent location
            if agent_data[k]['cap'] <= 0:
                agent_data[k]['sf'] = False
                agent_data[k]['cap'] = capacity
                new_agent_pos[k,:] = base_station
            else:
                new_agent_pos[k,:] = [traj[-1][0], traj[-1][1]]
                
        # update simulator periodically 
        if repeat_ctr % repeat_lim == 0:
            sim.step(control, dbeta=dp)
            control = []
        repeat_ctr += 1
        
        agent_steps += 1

        # update agent position
        agent_pos = new_agent_pos
        new_agent_pos = np.zeros((n,2)).astype(np.int32)

    if (i+1) % 10 == 0:
        st = datetime.today().strftime('%Y-%m-%d %H:%M:%S')
        print("[%s] finished %d simulations" % (st,i+1))
    
    # store simulation result
    results[i]['frac_healthy'] = sim.stats[0]/np.sum(sim.stats)  # fraction of healthy trees
    results[i]['totalsteps'] = sim.iter
    results[i]['agentsteps'] = agent_steps
    for k in range(num_agents):
        results[i][k] = {}
        results[i][k]['reward'] = agent_data[k]['reward']
        results[i][k]['rewardsteps'] = agent_data[k]['rewardsteps']
    
datetime.today().strftime('%Y-%m-%d %H:%M:%S')
print('[%s] finish' % st)

toc = time.clock()
print("%0.2fs = %0.2fm elapsed" % (toc-tic,(toc-tic)/60))

[2018-02-21 19:32:38] start
[2018-02-21 19:32:38] finish
11.09s = 0.18m elapsed


In [21]:
with open('simple_ext_capped_g%d+a%d+f%d+s%d.pkl' %(grid_size,num_agents,num_init_fires,len(seeds)), 'wb') as fp:
    pickle.dump(results, fp)

In [22]:
results

{0: {0: {'reward': 1375.0, 'rewardsteps': 941},
  1: {'reward': 1327.0, 'rewardsteps': 924},
  2: {'reward': 1314.0, 'rewardsteps': 956},
  3: {'reward': 1381.5, 'rewardsteps': 941},
  'frac_healthy': 0.026800000000000001,
  5: {'reward': 1390.5, 'rewardsteps': 936},
  6: {'reward': 1343.0, 'rewardsteps': 960},
  7: {'reward': 1285.0, 'rewardsteps': 953},
  8: {'reward': 1244.0, 'rewardsteps': 955},
  9: {'reward': 1196.5, 'rewardsteps': 949},
  'agentsteps': 1026,
  4: {'reward': 1380.0, 'rewardsteps': 947},
  'totalsteps': 172}}

In [18]:
fire_init is None

False