## Setup

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

import timeit

In [4]:
from datetime import datetime
from FireSimulator import *
from FireSimulatorUtilities import *
import glob
import itertools
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import os
import pickle
import time

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

## Utility functions

In [6]:
class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, w = x.size() 
        return x.view(N, -1)

## Load data

In [7]:
directory = os.path.join(os.getcwd(), 'data')

pattern = os.path.join(directory,'states_seed_*')

data = {}
k = 0
total_states = 0
for file in glob.glob(pattern):
    fh = open(file, 'rb')
    sub_data = pickle.load(fh)
    data[k] = sub_data
    fh.close()
    k += 1
    total_states += sub_data.shape[0]
    
print('loaded %d sims from file' %(k))
print('for a total of %d states' %(total_states))

loaded 50 sims from file
for a total of 7066 states


## Network datatype [cpu/gpu]

In [8]:
torch.cuda.is_available()

True

In [9]:
# dtype = torch.FloatTensor
dtype = torch.cuda.FloatTensor

## Build the network

In [10]:
class eelfff_net(nn.Module):
    """
    network to approximate Q function
    """
    def __init__(self, act_seq=8, img_dim=8):
        self.act_seq = act_seq
        self.img_dim = img_dim
        
        C, H, W = 1, img_dim, img_dim
        hidden_dim = 2048
        
        # conv layer settings
        nf1 = 128; nf2 = 128
        fs1 = 2; fs2 = 2
        cv_s1 = 1; cv_s2 = 1
        cv_p1 = 0; cv_p2 = 0
        
        # pool layer settings
        p_sz1 = 4; p_sz2 = 2
        p_st1 = 1; p_st2 = 1
        
        # calculate affine layer size
        Hp1 = 1 + (H + 2*cv_p1 - fs1) // cv_s1
        Wp1 = Hp1
        Hpp1 = 1 + (Hp1 - p_sz1) // p_st1
        Wpp1 = Hpp1
        
        Hp2 = 1 + (Hpp1 + 2*cv_p2 - fs2) // cv_s2
        Wp2 = Hp2
        Hpp2 = 1 + (Hp2 - p_sz2) // p_st2
        Wpp2 = Hpp2
        
        #aff_flat_size = nf2*Hpp2*Wpp2 + 2*act_seq
        aff_flat_size = nf1*Hpp1*Wpp1 + 2*(act_seq+1)
        
        super(eelfff_net, self).__init__()
        # cnn structure
        self.cnn = nn.Sequential(
                        nn.Conv2d(C, nf1, kernel_size=fs1, stride=cv_s1, padding=cv_p1),
                        nn.ReLU(inplace=True),
                        nn.MaxPool2d(p_sz1,stride=p_st1),
                        #nn.Conv2d(nf1, nf2, kernel_size=fs2, stride=cv_s2, padding=cv_p2),
                        #nn.ReLU(inplace=True),
                        #nn.MaxPool2d(p_sz2,stride=p_st2),
                        Flatten()
                    )
        
        # nonlinear structure
        self.aff = nn.Sequential(
                        nn.Linear(aff_flat_size, hidden_dim),
                        nn.ReLU(inplace=True),
                        nn.Linear(hidden_dim, 5*act_seq)
                    )
        
    def forward(self, img, act):
        img_exp = img.unsqueeze(1)
        feat = self.cnn(img_exp)
        feat = torch.cat((feat, act), dim=1)
        Q = self.aff(feat)
        
        return Q.view(N,5,self.act_seq)

test implementation of network with random data

In [12]:
tic = time.clock()
N = 4
img_dim = 8
act_seq = 8
model = eelfff_net(img_dim, act_seq).type(dtype)

img = torch.randn(N,img_dim,img_dim).type(dtype)
act = torch.randn(N,2*(act_seq+1)).type(dtype)

img_var = Variable(img)
act_var = Variable(act)

Q = model(img_var, act_var)
toc = time.clock()

print(Q.size())
print("%0.2fs = %0.2fm elapsed for this test" %(toc-tic,(toc-tic)/60))

torch.Size([4, 5, 8])
25.17s = 0.42m elapsed for this test


## Define a reward function

In [13]:
def eelfff_reward(states, trajs, fire_flags, other_trajs):
    N = states.shape[0]
    grid_size = states[0].shape[0]
    center = math.ceil(grid_size/2)
    neighbors = [(-1,0),(0,-1),(1,0),(0,1)]
    #reward = Variable(torch.zeros(1), requires_grad=True).type(dtype)
    reward = 0
    
    for n in range(N):
        st = states[n,:,:]
        traj = trajs[n]
        other_traj = other_trajs[n]
        has_fires = fire_flags[n]
        
        # reward for treating fires and boundary fires
        # that weren't already treated by the agent
        if has_fires:
            treated = []
            for (x,y) in traj:
                r = y_to_row(grid_size,y)
                c = x_to_col(x)

                # reward for treating a fire
                if st[r,c] == 1 and (x,y) not in treated:
                    reward += 1

                    counter = 0 
                    for (dc,dr) in neighbors:
                        rn = r + dr
                        cn = c + dc
                        if rn>=0 and rn<grid_size and cn>=0 and cn<grid_size and st[rn,cn] == 0:
                            counter += 1

                    # bonus for treating a boundary fire
                    if counter >= 2:
                        reward += 4
                        
                    treated.append((x,y))
           
        # reward for approaching center [if no fires in image]
        else:
            for k in range(len(traj)-1):
                x1, y1 = traj[k]
                x2, y2 = traj[k+1]
                if np.abs(x2-center)+np.abs(y2-center) < np.abs(x1-center)+np.abs(y1-center):
                    reward += 1.0/(len(traj)-1)
    
        # penalty for intersecting with 'nearest agent'
        if not set(traj).isdisjoint(other_traj):
            reward += -2*len(set(traj).intersection(other_traj))            
    
    return reward/N

test reward function with random data

In [14]:
tic = time.clock()

states = np.zeros((3,5,5)).astype(np.uint8)
states[:,2,2] = 1
trajs = []
trajs.append([(5,5),(5,5),(4,4)])
trajs.append([(3,3),(3,3),(3,3)])
trajs.append([(5,5),(4,4),(3,3)])
other_trajs = []
other_trajs.append([(1,1),(1,2),(1,1)])
other_trajs.append([(1,1),(1,2),(1,3)])
other_trajs.append([(1,1),(2,2),(3,3)])
fire_flags = np.zeros(3, dtype=bool)
fire_flags[1:-1] = True

reward = eelfff_reward(states, trajs, fire_flags, other_trajs)
print('minibatch reward: %0.2f' %reward)

toc = time.clock()
print("%0.2fs = %0.2fm elapsed for this test" %(toc-tic,(toc-tic)/60))

minibatch reward: 1.50
0.00s = 0.00m elapsed for this test


## Train the network

In [20]:
# simulator and network parameters
seeds = range(1)
grid_size = 50
num_agents = [2,5,10,25]
D = []
dp = 0.15/0.2763
act_seq = 8
img_dim = 8

# agent initialization parameters
spawn_loc = np.arange(grid_size//3//2,grid_size,grid_size//3)
perturbs = np.arange(-grid_size//3//2+1,grid_size//3//2+1,1)

# create network instance
model = eelfff_net(act_seq=act_seq, img_dim=img_dim)
model.train()

# optimizer and its parameters
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-2)

In [44]:
# run simulator many times
for s in seeds:
    np.random.seed(s)
    
    # initialize simulator
    sim = FireSimulator(grid_size, rng=s)
    
    # initialize agent positions
    n = np.squeeze(np.random.choice(num_agents, 1))
    agent_pos = np.random.choice(spawn_loc, (n,2)) + np.random.choice(perturbs, (n,2))
        
    # run to termination
    while not sim.end:
        
        control = []
        new_agent_pos = np.zeros((n,2))
        
        # generate control
        for i in range(n):
            # generate image
            img, img_st, all_hlthy = CreateImageBW(sim.state, agent_pos[i,:])
            #print(all_healthy)
            #plt.imshow(img, cmap='gray', vmin=0, vmax=255)
            
            # find nearest neighbor and their trajectory
            
            # generate trajectory using network
            
            # find control from trajectory
            
            # add to replay memory
            5/0
        
        # step simulator
        sim.step(control, dbeta=dp)
        
        # update agent position
        agent_pos = new_agent_pos
        
    # create minibatch from replay memory and update network
    
    # drop from memory if too many elements
        

ZeroDivisionError: division by zero

In [42]:
all_hlthy

True

In [35]:
img

array([[255, 255, 255, 255, 255, 255, 255, 255],
       [255,   0, 255, 255, 255, 255, 255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255]], dtype=uint8)