In [1]:
import gym
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical
import time
import numpy as np

In [2]:
#Hyperparameters
learning_rate = 0.0005
gamma         = 0.98
lmbda         = 0.95
eps_clip      = 0.1
K_epoch       = 2
T_horizon     = 20

In [7]:
class Agent(nn.Module):
    def __init__(self,input_dim,output_dim):
        super(Agent,self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.data = []
        
        self.layer_1 = nn.Linear(input_dim,64)
        self.lstm_layer = nn.LSTM(64,32)
        self.actor = nn.Linear(32,output_dim)
        self.critic = nn.Linear(32,1)
        
        self.optimizer = optim.Adam(self.parameters(),lr = learning_rate)
        
    def get_action(self,x,hidden):
        x = F.relu(self.layer_1(x))
        x = x.view(-1,1,64)
        x, lstm_hidden = self.lstm_layer(x,hidden)
        
        x = self.actor(x)
        prob = F.softmax(x,dim = 2)
        return prob, lstm_hidden
    
    def get_value(self,x,hidden):
        x = F.relu(self.layer_1(x))
        x = x.view(-1,1,64)
        x, lstm_hidden = self.lstm_layer(x,hidden)
        v = self.critic(x)
        return v
    
    def put_data(self,x):
        self.data.append(x)
    
    def make_batch(self):
        state_lst, action_lst,reward_lst,next_state_lst,prob_lst,hidden_in_lst,\
        hidden_out_lst, done_lst = [],[],[],[],[],[],[],[]
        for transition in self.data:
            state,action,reward,next_state, prob,hidden_in,hidden_out,done = transition
            
            state_lst.append(state)
            action_lst.append([action])
            reward_lst.append([reward])
            next_state_lst.append(next_state)
            prob_lst.append([prob])
            hidden_in_lst.append(hidden_in)
            hidden_out_lst.append(hidden_out)
            done_mask = 0 if done else 1
            done_lst.append([done_mask])
            
        state = torch.tensor(state_lst,dtype = torch.float)
        action = torch.tensor(action_lst)
        reward = torch.tensor(reward_lst)
        next_state = torch.tensor(next_state_lst,dtype = torch.float)
        prob = torch.tensor(prob_lst, dtype=torch.float)
        done_mask = torch.tensor(done_lst,dtype = torch.float)
        self.data = []
        return state,action,reward,next_state,prob,hidden_in_lst[0],hidden_out_lst[0],done_mask ##test

    def train(self):
        state,action,reward,next_state,prob,(hidden_in_1,hidden_in_2),\
        (hidden_out_1,hidden_out_2),done_mask = self.make_batch()
        
        first_hidden = (hidden_in_1.detach(),hidden_in_2.detach())
        second_hidden = (hidden_out_1.detach(),hidden_out_2.detach())
        for i in range(K_epoch):
            td_error = self.get_value(next_state,second_hidden).squeeze(1) ##test
            td_target = reward + gamma * td_error * done_mask
            td_value = self.get_value(state,first_hidden).squeeze(1)
            delta = td_target - td_value
            delta = delta.detach().numpy()
            
            advantage_lst = []
            advantage = 0.0
            for item in delta[::-1]:
                advantage = gamma * lmbda * advantage + item[0] ##test
                advantage_lst.append([advantage])
            advantage_lst.reverse()
            advantage = torch.tensor(advantage_lst, dtype = torch.float)
            
            action_prob, _ = self.get_action(state, first_hidden)
            action_selected = action_prob.squeeze(1).gather(1,action)
            ratio = torch.exp(torch.log(action_selected) - torch.log(prob))
            
            surr_1 = ratio * advantage
            surr_2 = torch.clamp(ratio, 1 - eps_clip, 1 + eps_clip)
            loss = -torch.min(surr_1,surr_2) + F.smooth_l1_loss(td_value,td_target.detach())
            self.optimizer.zero_grad()
            loss.mean().backward(retain_graph = True)
            self.optimizer.step()

In [8]:
import numpy as np
import os
from Building import Building
#from Agent import Agent
import time
#====================================================================================


#====================================================================================
#Building Setting
lift_num = 1
buliding_height = 5
max_people_in_floor = 8
max_people_in_elevator = 10

add_people_at_step = 25
add_people_prob = 0.8

#Create building with 4 elevators, height 10, max people 30 in each floor
building = Building(lift_num, buliding_height, max_people_in_floor,max_people_in_elevator)

#Agent controls each elevator
#agent = Agent(buliding_height, lift_num, 4)
#agent.reload(280)
#The goal is to bring down all the people in the building to the ground floor

epochs = 1000
max_steps = 100
global_step = 0
T_horizon = 20
reward_list = []
print_interval = 20

In [9]:
model = Agent((buliding_height)+ max_people_in_elevator + (lift_num *2),4)
print_interval = 20
ave_reward = 0 

In [33]:
epochs = 100000

In [34]:
for epoch in range(epochs):
    building.empty_building()
    h_out = (torch.zeros([1, 1, 32], dtype=torch.float), torch.zeros([1, 1, 32], dtype=torch.float))
    while building.target == 0 :
        building.generate_people(add_people_prob)
    first_state = building.target
    state = building.get_state()
    done = False
    global_step = 0
    while not done:
        for t in range(T_horizon):
            h_in = h_out
            global_step += 1
            if (global_step % 25 == 0) & global_step > 0 :
                #building.generate_people(add_people_prob/2)
                pass
            action_prob, h_out = model.get_action(torch.from_numpy(np.array(state)).float(), h_in)
            action_prob = action_prob.view(-1)
            m = Categorical(action_prob)
            action = m.sample().item()
            building.perform_action([action])
            reward = building.get_reward() 
            
            next_state = building.get_state()
            finished = next_state.copy()
            del finished[-2:]
            if (sum(finished) == 0.0) :
                reward = 100. #* building.target
                done = True
            #print(sum(finished))
            #print('global_step : ',global_step,'state : ',state, 'action : ', action, 'reward : ',reward/float(first_state), 'done : ',done)
            #print('global_step : ',global_step,'state : ',state, 'action : ', action, 'reward : ',reward/10., 'done : ',done)
            #model.put_data((state, action, reward/float(first_state), next_state, action_prob[action].item(), done))
            #model.put_data((state, action, reward/10.0, next_state, action_prob[action].item(), done))
            model.put_data((state, action, reward/10.0, next_state, action_prob[action].item(), h_in, h_out, done))
            state = next_state
            
            if done or (global_step > 300):
                done = True
                break

        model.train()
    ave_reward += global_step 
    #print("Epoch: %d Step: %d Average Reward: %.4f"%(epoch, global_step, ave_reward/global_step))
    if epoch%print_interval==0 and epoch!=0:
        print("# of episode :{}, avg score : {:.1f}".format(epoch, ave_reward/print_interval))
        ave_reward = 0
    if (epoch % 1000 == 0 )& (epoch != 0):
        torch.save(model.state_dict(), './model_weights/recurrent_model_'+str(epoch))
    reward_list.append(global_step)

# of episode :20, avg score : 68.5
# of episode :40, avg score : 42.0
# of episode :60, avg score : 43.2
# of episode :80, avg score : 37.6
# of episode :100, avg score : 44.0
# of episode :120, avg score : 41.2
# of episode :140, avg score : 40.2
# of episode :160, avg score : 35.2
# of episode :180, avg score : 40.0
# of episode :200, avg score : 36.6
# of episode :220, avg score : 33.6
# of episode :240, avg score : 41.1
# of episode :260, avg score : 59.4
# of episode :280, avg score : 43.2
# of episode :300, avg score : 40.2
# of episode :320, avg score : 38.5
# of episode :340, avg score : 41.7
# of episode :360, avg score : 44.5
# of episode :380, avg score : 44.5
# of episode :400, avg score : 43.0
# of episode :420, avg score : 43.4
# of episode :440, avg score : 44.8
# of episode :460, avg score : 49.5
# of episode :480, avg score : 38.1
# of episode :500, avg score : 45.8
# of episode :520, avg score : 42.2
# of episode :540, avg score : 44.6
# of episode :560, avg score : 3

# of episode :4480, avg score : 33.3
# of episode :4500, avg score : 46.6
# of episode :4520, avg score : 38.6
# of episode :4540, avg score : 40.8
# of episode :4560, avg score : 40.9
# of episode :4580, avg score : 36.0
# of episode :4600, avg score : 34.9
# of episode :4620, avg score : 45.9
# of episode :4640, avg score : 34.3
# of episode :4660, avg score : 41.4
# of episode :4680, avg score : 40.7
# of episode :4700, avg score : 44.2
# of episode :4720, avg score : 36.3
# of episode :4740, avg score : 36.6
# of episode :4760, avg score : 41.2
# of episode :4780, avg score : 40.9
# of episode :4800, avg score : 40.2
# of episode :4820, avg score : 42.9
# of episode :4840, avg score : 38.2
# of episode :4860, avg score : 39.9
# of episode :4880, avg score : 41.5
# of episode :4900, avg score : 40.5
# of episode :4920, avg score : 43.4
# of episode :4940, avg score : 48.0
# of episode :4960, avg score : 42.4
# of episode :4980, avg score : 39.0
# of episode :5000, avg score : 40.7
#

# of episode :8920, avg score : 34.8
# of episode :8940, avg score : 39.1
# of episode :8960, avg score : 38.2
# of episode :8980, avg score : 42.0
# of episode :9000, avg score : 37.2
# of episode :9020, avg score : 37.3
# of episode :9040, avg score : 38.3
# of episode :9060, avg score : 39.6
# of episode :9080, avg score : 32.6
# of episode :9100, avg score : 34.4
# of episode :9120, avg score : 41.2
# of episode :9140, avg score : 44.5
# of episode :9160, avg score : 38.5
# of episode :9180, avg score : 33.3
# of episode :9200, avg score : 38.8
# of episode :9220, avg score : 41.7
# of episode :9240, avg score : 40.1
# of episode :9260, avg score : 35.5
# of episode :9280, avg score : 41.9
# of episode :9300, avg score : 38.0
# of episode :9320, avg score : 34.4
# of episode :9340, avg score : 35.5
# of episode :9360, avg score : 38.5
# of episode :9380, avg score : 38.0
# of episode :9400, avg score : 37.0
# of episode :9420, avg score : 39.0
# of episode :9440, avg score : 38.8
#

# of episode :13280, avg score : 34.2
# of episode :13300, avg score : 37.2
# of episode :13320, avg score : 37.5
# of episode :13340, avg score : 34.6
# of episode :13360, avg score : 35.1
# of episode :13380, avg score : 37.7
# of episode :13400, avg score : 35.4
# of episode :13420, avg score : 33.5
# of episode :13440, avg score : 37.0
# of episode :13460, avg score : 31.1
# of episode :13480, avg score : 30.6
# of episode :13500, avg score : 41.9
# of episode :13520, avg score : 31.2
# of episode :13540, avg score : 48.6
# of episode :13560, avg score : 34.4
# of episode :13580, avg score : 35.0
# of episode :13600, avg score : 35.3
# of episode :13620, avg score : 28.5
# of episode :13640, avg score : 33.5
# of episode :13660, avg score : 33.2
# of episode :13680, avg score : 39.0
# of episode :13700, avg score : 30.8
# of episode :13720, avg score : 30.4
# of episode :13740, avg score : 27.2
# of episode :13760, avg score : 36.6
# of episode :13780, avg score : 38.6
# of episode

# of episode :17600, avg score : 41.2
# of episode :17620, avg score : 30.6
# of episode :17640, avg score : 31.1
# of episode :17660, avg score : 30.2
# of episode :17680, avg score : 37.4
# of episode :17700, avg score : 31.4
# of episode :17720, avg score : 35.1
# of episode :17740, avg score : 26.6
# of episode :17760, avg score : 40.1
# of episode :17780, avg score : 34.9
# of episode :17800, avg score : 26.6
# of episode :17820, avg score : 32.9
# of episode :17840, avg score : 33.0
# of episode :17860, avg score : 33.0
# of episode :17880, avg score : 33.6
# of episode :17900, avg score : 34.7
# of episode :17920, avg score : 36.6
# of episode :17940, avg score : 33.0
# of episode :17960, avg score : 31.3
# of episode :17980, avg score : 28.4
# of episode :18000, avg score : 31.6
# of episode :18020, avg score : 37.3
# of episode :18040, avg score : 32.1
# of episode :18060, avg score : 35.2
# of episode :18080, avg score : 28.9
# of episode :18100, avg score : 31.1
# of episode

# of episode :21920, avg score : 31.6
# of episode :21940, avg score : 34.9
# of episode :21960, avg score : 28.4
# of episode :21980, avg score : 32.4
# of episode :22000, avg score : 37.9
# of episode :22020, avg score : 29.1
# of episode :22040, avg score : 32.4
# of episode :22060, avg score : 33.2
# of episode :22080, avg score : 31.2
# of episode :22100, avg score : 31.7
# of episode :22120, avg score : 33.5
# of episode :22140, avg score : 32.0
# of episode :22160, avg score : 31.3
# of episode :22180, avg score : 33.0
# of episode :22200, avg score : 26.5
# of episode :22220, avg score : 32.8
# of episode :22240, avg score : 32.8
# of episode :22260, avg score : 30.4
# of episode :22280, avg score : 31.9
# of episode :22300, avg score : 34.5
# of episode :22320, avg score : 35.3
# of episode :22340, avg score : 28.2
# of episode :22360, avg score : 29.6
# of episode :22380, avg score : 35.2
# of episode :22400, avg score : 29.1
# of episode :22420, avg score : 32.0
# of episode

# of episode :26240, avg score : 36.9
# of episode :26260, avg score : 32.8
# of episode :26280, avg score : 53.0
# of episode :26300, avg score : 35.7
# of episode :26320, avg score : 31.8
# of episode :26340, avg score : 32.8
# of episode :26360, avg score : 30.0
# of episode :26380, avg score : 30.1
# of episode :26400, avg score : 32.6
# of episode :26420, avg score : 30.5
# of episode :26440, avg score : 33.1
# of episode :26460, avg score : 30.1
# of episode :26480, avg score : 28.4
# of episode :26500, avg score : 31.6
# of episode :26520, avg score : 28.4
# of episode :26540, avg score : 29.0
# of episode :26560, avg score : 30.9
# of episode :26580, avg score : 30.1
# of episode :26600, avg score : 29.2
# of episode :26620, avg score : 32.6
# of episode :26640, avg score : 31.2
# of episode :26660, avg score : 29.6
# of episode :26680, avg score : 29.9
# of episode :26700, avg score : 29.6
# of episode :26720, avg score : 32.4
# of episode :26740, avg score : 31.2
# of episode

# of episode :30560, avg score : 28.1
# of episode :30580, avg score : 35.0
# of episode :30600, avg score : 30.4
# of episode :30620, avg score : 27.6
# of episode :30640, avg score : 30.1
# of episode :30660, avg score : 31.8
# of episode :30680, avg score : 33.1
# of episode :30700, avg score : 30.1
# of episode :30720, avg score : 31.4
# of episode :30740, avg score : 30.3
# of episode :30760, avg score : 29.4
# of episode :30780, avg score : 27.5
# of episode :30800, avg score : 28.7
# of episode :30820, avg score : 31.4
# of episode :30840, avg score : 28.6
# of episode :30860, avg score : 30.4
# of episode :30880, avg score : 29.7
# of episode :30900, avg score : 29.4
# of episode :30920, avg score : 34.7
# of episode :30940, avg score : 32.1
# of episode :30960, avg score : 30.9
# of episode :30980, avg score : 27.1
# of episode :31000, avg score : 30.9
# of episode :31020, avg score : 31.6
# of episode :31040, avg score : 37.2
# of episode :31060, avg score : 28.4
# of episode

# of episode :34880, avg score : 27.1
# of episode :34900, avg score : 25.2
# of episode :34920, avg score : 28.8
# of episode :34940, avg score : 26.9
# of episode :34960, avg score : 25.4
# of episode :34980, avg score : 28.6
# of episode :35000, avg score : 31.3
# of episode :35020, avg score : 32.9
# of episode :35040, avg score : 28.8
# of episode :35060, avg score : 27.9
# of episode :35080, avg score : 28.6
# of episode :35100, avg score : 27.8
# of episode :35120, avg score : 28.1
# of episode :35140, avg score : 28.6
# of episode :35160, avg score : 27.1
# of episode :35180, avg score : 26.2
# of episode :35200, avg score : 27.9
# of episode :35220, avg score : 28.2
# of episode :35240, avg score : 28.4
# of episode :35260, avg score : 47.6
# of episode :35280, avg score : 35.9
# of episode :35300, avg score : 35.1
# of episode :35320, avg score : 32.4
# of episode :35340, avg score : 29.3
# of episode :35360, avg score : 29.9
# of episode :35380, avg score : 29.9
# of episode

# of episode :39200, avg score : 30.8
# of episode :39220, avg score : 27.9
# of episode :39240, avg score : 29.2
# of episode :39260, avg score : 28.2
# of episode :39280, avg score : 26.8
# of episode :39300, avg score : 26.9
# of episode :39320, avg score : 26.4
# of episode :39340, avg score : 32.3
# of episode :39360, avg score : 30.6
# of episode :39380, avg score : 25.5
# of episode :39400, avg score : 26.4
# of episode :39420, avg score : 27.6
# of episode :39440, avg score : 27.4
# of episode :39460, avg score : 31.4
# of episode :39480, avg score : 27.9
# of episode :39500, avg score : 28.2
# of episode :39520, avg score : 27.9
# of episode :39540, avg score : 28.5
# of episode :39560, avg score : 31.0
# of episode :39580, avg score : 27.6
# of episode :39600, avg score : 27.6
# of episode :39620, avg score : 29.2
# of episode :39640, avg score : 26.1
# of episode :39660, avg score : 27.8
# of episode :39680, avg score : 26.2
# of episode :39700, avg score : 28.4
# of episode

# of episode :43520, avg score : 26.8
# of episode :43540, avg score : 27.4
# of episode :43560, avg score : 26.1
# of episode :43580, avg score : 25.8
# of episode :43600, avg score : 28.8
# of episode :43620, avg score : 30.9
# of episode :43640, avg score : 29.0
# of episode :43660, avg score : 27.9
# of episode :43680, avg score : 24.8
# of episode :43700, avg score : 26.1
# of episode :43720, avg score : 30.2
# of episode :43740, avg score : 26.6
# of episode :43760, avg score : 25.8
# of episode :43780, avg score : 29.2
# of episode :43800, avg score : 30.8
# of episode :43820, avg score : 30.8
# of episode :43840, avg score : 26.4
# of episode :43860, avg score : 25.9
# of episode :43880, avg score : 25.9
# of episode :43900, avg score : 28.1
# of episode :43920, avg score : 30.9
# of episode :43940, avg score : 26.4
# of episode :43960, avg score : 31.4
# of episode :43980, avg score : 29.8
# of episode :44000, avg score : 29.4
# of episode :44020, avg score : 27.1
# of episode

# of episode :47840, avg score : 27.5
# of episode :47860, avg score : 27.9
# of episode :47880, avg score : 25.3
# of episode :47900, avg score : 27.6
# of episode :47920, avg score : 24.8
# of episode :47940, avg score : 24.4
# of episode :47960, avg score : 23.9
# of episode :47980, avg score : 26.1
# of episode :48000, avg score : 24.9
# of episode :48020, avg score : 24.8
# of episode :48040, avg score : 25.6
# of episode :48060, avg score : 21.3
# of episode :48080, avg score : 24.8
# of episode :48100, avg score : 27.4
# of episode :48120, avg score : 24.2
# of episode :48140, avg score : 26.4
# of episode :48160, avg score : 26.2
# of episode :48180, avg score : 27.3
# of episode :48200, avg score : 25.5
# of episode :48220, avg score : 26.1
# of episode :48240, avg score : 25.4
# of episode :48260, avg score : 25.6
# of episode :48280, avg score : 25.2
# of episode :48300, avg score : 24.9
# of episode :48320, avg score : 29.2
# of episode :48340, avg score : 25.4
# of episode

# of episode :52160, avg score : 24.7
# of episode :52180, avg score : 26.9
# of episode :52200, avg score : 24.4
# of episode :52220, avg score : 26.4
# of episode :52240, avg score : 23.1
# of episode :52260, avg score : 41.7
# of episode :52280, avg score : 24.1
# of episode :52300, avg score : 25.3
# of episode :52320, avg score : 25.9
# of episode :52340, avg score : 21.3
# of episode :52360, avg score : 24.5
# of episode :52380, avg score : 27.8
# of episode :52400, avg score : 24.4
# of episode :52420, avg score : 26.8
# of episode :52440, avg score : 25.0
# of episode :52460, avg score : 25.2
# of episode :52480, avg score : 24.9
# of episode :52500, avg score : 26.2
# of episode :52520, avg score : 26.4
# of episode :52540, avg score : 25.6
# of episode :52560, avg score : 27.2
# of episode :52580, avg score : 22.2
# of episode :52600, avg score : 24.4
# of episode :52620, avg score : 25.9
# of episode :52640, avg score : 23.2
# of episode :52660, avg score : 26.7
# of episode

# of episode :56480, avg score : 23.1
# of episode :56500, avg score : 23.4
# of episode :56520, avg score : 21.4
# of episode :56540, avg score : 24.7
# of episode :56560, avg score : 26.4
# of episode :56580, avg score : 50.5
# of episode :56600, avg score : 25.1
# of episode :56620, avg score : 27.8
# of episode :56640, avg score : 27.6
# of episode :56660, avg score : 25.2
# of episode :56680, avg score : 31.9
# of episode :56700, avg score : 25.0
# of episode :56720, avg score : 24.9
# of episode :56740, avg score : 24.5
# of episode :56760, avg score : 24.4
# of episode :56780, avg score : 24.1
# of episode :56800, avg score : 21.6
# of episode :56820, avg score : 21.8
# of episode :56840, avg score : 37.2
# of episode :56860, avg score : 32.1
# of episode :56880, avg score : 25.4
# of episode :56900, avg score : 24.7
# of episode :56920, avg score : 24.9
# of episode :56940, avg score : 26.1
# of episode :56960, avg score : 24.1
# of episode :56980, avg score : 25.1
# of episode

# of episode :60800, avg score : 21.4
# of episode :60820, avg score : 21.2
# of episode :60840, avg score : 21.6
# of episode :60860, avg score : 20.6
# of episode :60880, avg score : 22.2
# of episode :60900, avg score : 22.2
# of episode :60920, avg score : 20.6
# of episode :60940, avg score : 19.9
# of episode :60960, avg score : 22.6
# of episode :60980, avg score : 20.8
# of episode :61000, avg score : 22.9
# of episode :61020, avg score : 22.6
# of episode :61040, avg score : 21.9
# of episode :61060, avg score : 25.0
# of episode :61080, avg score : 22.1
# of episode :61100, avg score : 23.7
# of episode :61120, avg score : 22.4
# of episode :61140, avg score : 23.8
# of episode :61160, avg score : 22.1
# of episode :61180, avg score : 29.0
# of episode :61200, avg score : 23.9
# of episode :61220, avg score : 20.6
# of episode :61240, avg score : 18.6
# of episode :61260, avg score : 23.3
# of episode :61280, avg score : 21.9
# of episode :61300, avg score : 20.9
# of episode

# of episode :65120, avg score : 21.9
# of episode :65140, avg score : 21.6
# of episode :65160, avg score : 22.4
# of episode :65180, avg score : 21.5
# of episode :65200, avg score : 22.9
# of episode :65220, avg score : 21.1
# of episode :65240, avg score : 21.3
# of episode :65260, avg score : 21.6
# of episode :65280, avg score : 18.9
# of episode :65300, avg score : 21.2
# of episode :65320, avg score : 21.8
# of episode :65340, avg score : 19.5
# of episode :65360, avg score : 23.8
# of episode :65380, avg score : 20.3
# of episode :65400, avg score : 21.4
# of episode :65420, avg score : 20.9
# of episode :65440, avg score : 19.8
# of episode :65460, avg score : 20.9
# of episode :65480, avg score : 21.5
# of episode :65500, avg score : 21.1
# of episode :65520, avg score : 21.4
# of episode :65540, avg score : 21.6
# of episode :65560, avg score : 19.1
# of episode :65580, avg score : 19.9
# of episode :65600, avg score : 21.9
# of episode :65620, avg score : 21.9
# of episode

# of episode :69440, avg score : 27.5
# of episode :69460, avg score : 20.2
# of episode :69480, avg score : 22.2
# of episode :69500, avg score : 22.1
# of episode :69520, avg score : 19.3
# of episode :69540, avg score : 20.1
# of episode :69560, avg score : 22.2
# of episode :69580, avg score : 22.8
# of episode :69600, avg score : 19.6
# of episode :69620, avg score : 20.7
# of episode :69640, avg score : 22.2
# of episode :69660, avg score : 20.9
# of episode :69680, avg score : 19.9
# of episode :69700, avg score : 23.5
# of episode :69720, avg score : 21.8
# of episode :69740, avg score : 19.8
# of episode :69760, avg score : 23.9
# of episode :69780, avg score : 20.6
# of episode :69800, avg score : 23.8
# of episode :69820, avg score : 18.4
# of episode :69840, avg score : 23.4
# of episode :69860, avg score : 20.5
# of episode :69880, avg score : 21.2
# of episode :69900, avg score : 24.6
# of episode :69920, avg score : 20.9
# of episode :69940, avg score : 28.9
# of episode

# of episode :73760, avg score : 20.6
# of episode :73780, avg score : 19.5
# of episode :73800, avg score : 20.6
# of episode :73820, avg score : 20.9
# of episode :73840, avg score : 20.8
# of episode :73860, avg score : 20.0
# of episode :73880, avg score : 21.2
# of episode :73900, avg score : 19.5
# of episode :73920, avg score : 19.9
# of episode :73940, avg score : 18.4
# of episode :73960, avg score : 21.6
# of episode :73980, avg score : 20.8
# of episode :74000, avg score : 20.4
# of episode :74020, avg score : 17.9
# of episode :74040, avg score : 20.0
# of episode :74060, avg score : 20.4
# of episode :74080, avg score : 19.9
# of episode :74100, avg score : 21.1
# of episode :74120, avg score : 19.9
# of episode :74140, avg score : 21.4
# of episode :74160, avg score : 22.1
# of episode :74180, avg score : 18.9
# of episode :74200, avg score : 19.0
# of episode :74220, avg score : 21.2
# of episode :74240, avg score : 19.7
# of episode :74260, avg score : 19.3
# of episode

# of episode :78080, avg score : 20.1
# of episode :78100, avg score : 23.1
# of episode :78120, avg score : 24.0
# of episode :78140, avg score : 21.9
# of episode :78160, avg score : 24.1
# of episode :78180, avg score : 17.9
# of episode :78200, avg score : 21.9
# of episode :78220, avg score : 21.1
# of episode :78240, avg score : 20.4
# of episode :78260, avg score : 17.2
# of episode :78280, avg score : 20.8
# of episode :78300, avg score : 22.9
# of episode :78320, avg score : 20.8
# of episode :78340, avg score : 19.2
# of episode :78360, avg score : 17.9
# of episode :78380, avg score : 20.3
# of episode :78400, avg score : 20.1
# of episode :78420, avg score : 19.7
# of episode :78440, avg score : 18.4
# of episode :78460, avg score : 18.6
# of episode :78480, avg score : 18.8
# of episode :78500, avg score : 20.5
# of episode :78520, avg score : 20.4
# of episode :78540, avg score : 19.1
# of episode :78560, avg score : 28.6
# of episode :78580, avg score : 21.4
# of episode

# of episode :82400, avg score : 20.6
# of episode :82420, avg score : 19.4
# of episode :82440, avg score : 19.6
# of episode :82460, avg score : 18.9
# of episode :82480, avg score : 18.1
# of episode :82500, avg score : 20.4
# of episode :82520, avg score : 21.9
# of episode :82540, avg score : 21.5
# of episode :82560, avg score : 19.9
# of episode :82580, avg score : 21.6
# of episode :82600, avg score : 23.2
# of episode :82620, avg score : 20.8
# of episode :82640, avg score : 20.2
# of episode :82660, avg score : 21.4
# of episode :82680, avg score : 21.4
# of episode :82700, avg score : 17.4
# of episode :82720, avg score : 21.8
# of episode :82740, avg score : 20.9
# of episode :82760, avg score : 18.9
# of episode :82780, avg score : 19.8
# of episode :82800, avg score : 21.2
# of episode :82820, avg score : 20.4
# of episode :82840, avg score : 22.3
# of episode :82860, avg score : 20.5
# of episode :82880, avg score : 19.2
# of episode :82900, avg score : 20.1
# of episode

# of episode :86720, avg score : 20.1
# of episode :86740, avg score : 19.9
# of episode :86760, avg score : 19.4
# of episode :86780, avg score : 22.5
# of episode :86800, avg score : 20.2
# of episode :86820, avg score : 21.9
# of episode :86840, avg score : 19.4
# of episode :86860, avg score : 17.9
# of episode :86880, avg score : 20.6
# of episode :86900, avg score : 19.1
# of episode :86920, avg score : 20.4
# of episode :86940, avg score : 19.8
# of episode :86960, avg score : 19.5
# of episode :86980, avg score : 20.1
# of episode :87000, avg score : 18.6
# of episode :87020, avg score : 20.8
# of episode :87040, avg score : 19.5
# of episode :87060, avg score : 18.6
# of episode :87080, avg score : 22.1
# of episode :87100, avg score : 19.9
# of episode :87120, avg score : 19.1
# of episode :87140, avg score : 20.6
# of episode :87160, avg score : 19.5
# of episode :87180, avg score : 20.5
# of episode :87200, avg score : 19.7
# of episode :87220, avg score : 18.4
# of episode

# of episode :91040, avg score : 19.7
# of episode :91060, avg score : 18.6
# of episode :91080, avg score : 20.2
# of episode :91100, avg score : 18.5
# of episode :91120, avg score : 19.7
# of episode :91140, avg score : 19.8
# of episode :91160, avg score : 20.4
# of episode :91180, avg score : 18.9
# of episode :91200, avg score : 19.6
# of episode :91220, avg score : 18.2
# of episode :91240, avg score : 21.4
# of episode :91260, avg score : 18.6
# of episode :91280, avg score : 19.8
# of episode :91300, avg score : 19.9
# of episode :91320, avg score : 20.2
# of episode :91340, avg score : 17.9
# of episode :91360, avg score : 18.0
# of episode :91380, avg score : 21.9
# of episode :91400, avg score : 20.9
# of episode :91420, avg score : 17.5
# of episode :91440, avg score : 19.8
# of episode :91460, avg score : 19.7
# of episode :91480, avg score : 20.2
# of episode :91500, avg score : 20.5
# of episode :91520, avg score : 20.7
# of episode :91540, avg score : 17.2
# of episode

# of episode :95360, avg score : 18.3
# of episode :95380, avg score : 21.6
# of episode :95400, avg score : 20.1
# of episode :95420, avg score : 19.1
# of episode :95440, avg score : 21.2
# of episode :95460, avg score : 19.1
# of episode :95480, avg score : 18.1
# of episode :95500, avg score : 19.6
# of episode :95520, avg score : 19.2
# of episode :95540, avg score : 20.4
# of episode :95560, avg score : 19.8
# of episode :95580, avg score : 18.4
# of episode :95600, avg score : 20.0
# of episode :95620, avg score : 22.8
# of episode :95640, avg score : 20.0
# of episode :95660, avg score : 21.8
# of episode :95680, avg score : 21.3
# of episode :95700, avg score : 18.4
# of episode :95720, avg score : 19.4
# of episode :95740, avg score : 21.4
# of episode :95760, avg score : 20.4
# of episode :95780, avg score : 18.7
# of episode :95800, avg score : 18.9
# of episode :95820, avg score : 19.1
# of episode :95840, avg score : 18.5
# of episode :95860, avg score : 17.5
# of episode

# of episode :99680, avg score : 20.2
# of episode :99700, avg score : 17.6
# of episode :99720, avg score : 18.6
# of episode :99740, avg score : 19.1
# of episode :99760, avg score : 19.5
# of episode :99780, avg score : 20.4
# of episode :99800, avg score : 19.6
# of episode :99820, avg score : 18.8
# of episode :99840, avg score : 19.4
# of episode :99860, avg score : 19.8
# of episode :99880, avg score : 17.9
# of episode :99900, avg score : 19.1
# of episode :99920, avg score : 19.1
# of episode :99940, avg score : 20.4
# of episode :99960, avg score : 25.1
# of episode :99980, avg score : 19.8


In [35]:
torch.save(model.state_dict(), './model_weights/recurrent_model_'+str(100000))