In [1]:
from model import DQN
import os
import minerl
import gym
import torch
import torch.nn.functional as F
import torch.optim as optim
import ray
import pandas as pd
import gc
import asyncio
from _collections import deque
from utils import *
import random
import copy

def learner_append_sample(memory, model, target_model, state, action, reward, next_state, done):
    # Caluclating Priority (TD Error)
    target = model(state.float()).data.cpu()
    old_val = target[0][action].cpu()
    target_val = target_model(next_state.float()).data.cpu()
    if done:
        target[0][action] = reward
    else:
        target[0][action] = reward + 0.99 * torch.max(target_val)

    error = abs(old_val - target[0][action])
    error = error.cpu()
    memory.add.remote(error, [state, action, reward, next_state, done])





In [2]:
@ray.remote
class Actor:
    def __init__(self, learner, actor_idx, startEpsilon, endEpsilon, paramServer, action_size):
        # environment initialization
        self.actor_idx = actor_idx
        self.env = gym.make("MineRLTreechop-v0")
        self.port_number = int("12340") + actor_idx
        print("actor environment %d initialize successfully" % self.actor_idx)
        self.env.make_interactive(port=self.port_number, realtime=False)
        self.shared_network_cpu = ray.get(learner.get_network.remote())
        # self.shared_memory = ray.get(shared_memory_id)
        # print("shared memory assign successfully")
        
        # network initalization
        self.actor_network = DQN(action_size).cpu()
        self.actor_target_network = DQN(action_size).cpu()
        self.actor_network.load_state_dict(self.shared_network_cpu.state_dict())
        self.actor_target_network.load_state_dict(self.actor_network.state_dict())
        print("actor network %d initialize successfully" % self.actor_idx)

        self.initialized = False
        self.epi_counter = 0
        # exploring info
        self.startEpsilon = startEpsilon
        self.endEpsilon = endEpsilon
        self.max_episodes = 100

        self.paramServer = paramServer
        
    
    # 1. 네트워크 파라미터 복사
    # 2. 환경 탐험 (초기화, 행동)
    # 3. 로컬버퍼에 저장
    # 4. priority 계산
    # 5. 글로벌 버퍼에 저장
    # 6. 주기적으로 네트워크 업데이트

    def get_initialized(self):
        return self.initialized

    def get_counter(self):
        return self.epi_counter

    # 각 환경 인스턴스에서 각 엡실론에 따라 탐험을 진행한다.
    # 탐험 과정에서 local buffer에 transition들을 저장한다.
    # local buffer의 개수가 특정 개수 이상이면 global buffer에 추가해준다.

    def explore(self, learner, shared_memory):
        
        self.initialized = True
        stepDrop = (self.startEpsilon - self.endEpsilon) / self.max_episodes
        epsilon = self.startEpsilon
        total_steps = 0
        
        episodes = [x for x in range(self.max_episodes)]
        train_stats = pd.DataFrame(index=episodes, columns=['rewards'])
        
        for num_epi in range(self.max_episodes):
            obs = self.env.reset()
            state = converter(obs).cpu().float()
            done = False
            total_reward = 0
            steps = 0
            if (epsilon > self.endEpsilon):
                epsilon -= stepDrop
                
            while not done:
                steps += 1
                total_steps += 1
                a_out = self.actor_network.sample_action(state, epsilon)
                action_index = a_out
                action = make_action2(self.env, action_index)
                obs_prime, reward, done, info = self.env.step(action)
                total_reward += reward
                state_prime = converter(obs_prime)

                self.actor_append_sample(shared_memory, self.actor_network, self.actor_target_network, \
                                       state, action_index, reward, state_prime, done)

                state = state_prime.float().cpu()
                if done:
                    break
                    
            # pandas로 리워드 기록하기
            print("%d episode is done" % num_epi)
            print("total rewards : %d " % total_reward)
            train_stats.loc[num_epi]['rewards'] = total_reward
            train_stats.to_csv('train_stat_minerl_agent {}.csv'.format(str(self.actor_idx)))
            
  
            self.pull_parameters(learner) 
            print("actor network is updated ")
            print("actor target_network is updated")
    
    def pull_parameters(self, learner):
        ray.get(self.paramServer.pull_parameters.remote(learner)) 
        policy_params, target_params = ray.get(self.paramServer.return_parameters.remote())
        self.actor_network.load_state_dict(policy_params)
        self.actor_target_network.load_state_dict(target_params)
        
    def env_close(self):
        self.env.close()        

    def actor_append_sample(self, memory, model, target_model, state, action, reward, next_state, done):
        # Caluclating Priority (TD Error)
        target = model(state.float()).data.cpu()
        old_val = target[0][action].cpu()
        target_val = target_model(next_state.float()).data.cpu()
        if done:
            target[0][action] = reward
        else:
            target[0][action] = reward + 0.99 * torch.max(target_val)

        error = abs(old_val - target[0][action])
        error = error.cpu()
        memory.add.remote(error, [state, action, reward, next_state, done])






In [3]:
@ray.remote
class ParameterServer:
    def __init__(self, action_size):
        self.policy_params = DQN(action_size).state_dict()
        self.target_params = DQN(action_size).state_dict()
    
    def pull_parameters(self, learner):
        learner.push_parameters.remote(self.policy_params, self.target_params)
        return 1
    
    def return_parameters(self):
        return self.policy_params, self.target_params
    

In [4]:
@ray.remote(num_gpus=1)
class Learner:
    def __init__(self, network, batch_size, paramServer, action_size):
        self.learner_network = DQN(action_size).cuda().float()
        self.learner_target_network = DQN(action_size).cuda().float()
        self.learner_network.load_state_dict(network.state_dict())
        self.learner_target_network.load_state_dict(network.state_dict())
        self.shared_network = DQN(action_size).cpu()
        self.shared_target_network = DQN(action_size).cpu()
        
        self.paramServer = paramServer
        
        self.count = 0
        self.batch_size = batch_size
        self.max_counts= 100000

    # 1. sampling
    # 2. calculate gradient
    # 3. weight update
    # 4. compute priorities
    # 5. priorities of buffer update
    # 6. remove old memory
    
    def push_parameters(self, server_policy_params, server_target_params):
        self.shared_network.load_state_dict(self.learner_network.state_dict())
        self.shared_target_network.load_state_dict(self.learner_target_network.state_dict())
        policy_net_params = self.shared_network.state_dict()
        target_net_params = self.shared_target_network.state_dict()
        server_policy_params = policy_net_params
        server_target_params = target_net_params
        
    def count(self):
        return self.count
    
    def get_network(self):
        self.shared_network.load_state_dict(self.learner_network.state_dict())
        print("return learner network")
        return self.shared_network
    
    def get_target_network(self):
        self.shared_target_network.load_state_dict(self.learner_target_network.state_dict())
        return self.shared_target_network

    def update_network(self, memory, batch_size, optimizer):
        print("started")

        
        counts = [x for x in range(self.max_counts)]
        train_stats = pd.DataFrame(index=counts, columns=['loss'])
        while(self.count < 10000000):
            agent_batch, agent_idxs, agent_weights = ray.get(memory.sample.remote(batch_size))
            state_list = []
            action_list = []
            reward_list = []
            next_state_list = []
            done_mask_list = []

            #print("agent batch len : {} ".format(str(len(agent_batch))))
            for agent_transition in agent_batch:
                s, a, r, s_prime, done_mask = agent_transition
                state_list.append(s)
                action_list.append([a])
                reward_list.append([r])
                next_state_list.append(s_prime)
                done_mask_list.append([done_mask])

            s = torch.stack(state_list).float().cuda()
            a = torch.tensor(action_list, dtype=torch.int64).cuda()
            r = torch.tensor(reward_list).cuda()
            s_prime = torch.stack(next_state_list).float().cuda()
            done_mask = torch.tensor(done_mask_list).float().cuda()

            q_vals = self.learner_network(s)
            state_action_values = q_vals.gather(1, a)

            # comparing the q values to the values expected using the next states and reward
            next_state_values = self.learner_target_network(s_prime).max(1)[0].unsqueeze(1)
            target = r + (next_state_values * gamma * done_mask)

            # calculating the q loss, n-step return lossm supervised_loss
            is_weights = torch.FloatTensor(agent_weights).to(device)
            q_loss = (is_weights * F.mse_loss(state_action_values, target)).mean()
            #supervised_loss = margin_loss(q_vals, a, 1, 1)

            loss = q_loss #+ supervised_loss
            errors = torch.abs(state_action_values - target).data.cpu().detach()
            errors = errors.numpy()
            # update priority
            for i in range(batch_size):
                idx = agent_idxs[i]
                memory.update.remote(idx, errors[i])

            train_stats.loc[self.count ]['loss'] = float(loss.item())
            train_stats.to_csv('train_stat_minerl_learner.csv')

            # optimization step and logging
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            self.count +=1
            if(self.count % 50 == 0 and self.count != 0):
                self.learner_target_network.load_state_dict(self.learner_network.state_dict())
                print("Count : {} leaner_target_network updated".format(self.count))
                
            if(self.count % 10 == 0 and self.count!= 0):
                print("Count : {} leaner_network updated".format(self.count))
                torch.save(self.learner_network.state_dict(),"apex_dqfd_learner_9action.pth")
                print("learner model saved")
                




In [5]:
ray.init()

#하이퍼 파라미터
learning_rate = 0.0001
gamma = 0.999
buffer_limit = 50000
L1 = 0.9
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

root_path = os.curdir
model_path = root_path + '/dqn_model/'
action_size = 9

2021-06-10 18:08:05,470	INFO services.py:1269 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


In [6]:
policy_net = DQN(action_size).cuda()
target_net = DQN(action_size).cuda()
target_net.load_state_dict(policy_net.state_dict())
memory = Memory.remote(40000)
optimizer = optim.Adam(policy_net.parameters(), lr=learning_rate, weight_decay=1e-5)

# Copy network params from pretrained Agent
#model_path = './dqn_model/apex_dqfd_learner.pth'
#policy_net.load_state_dict(torch.load(model_path, map_location='cuda:0'))
#target_net.load_state_dict(policy_net.state_dict())


In [7]:
params_server = ParameterServer.remote(action_size)

In [8]:
# learner network initialzation
batch_size = 512

learner = Learner.remote(policy_net, batch_size, params_server, action_size)

In [9]:
# actor network, environments initialization
# Generating each own instances

actor1 = Actor.remote(learner, 0, 0.1, 0.01, params_server, action_size)
actor2 = Actor.remote(learner, 1, 0.05, 0.01, params_server,action_size)
#actor3 = Actor.remote(learner, 2, 0.1, 0.01, params_server, action_size)
actor_list = [actor1, actor2]
#actor_list = [actor1, actor2, actor3]

[2m[36m(pid=7999)[0m Memory is initialized




[2m[36m(pid=7996)[0m return learner network
[2m[36m(pid=7998)[0m actor environment 0 initialize successfully
[2m[36m(pid=7998)[0m actor network 0 initialize successfully
[2m[36m(pid=7996)[0m return learner network
[2m[36m(pid=7993)[0m actor environment 1 initialize successfully
[2m[36m(pid=7993)[0m actor network 1 initialize successfully


Traceback (most recent call last):
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/monitor.py", line 284, in run
    self._run()
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/monitor.py", line 175, in _run
    self.update_load_metrics()
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/monitor.py", line 140, in update_load_metrics
    request, timeout=4)
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/grpc/_channel.py", line 923, in __call__
    return _end_unary_response_blocking(state, call, False, None)
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/grpc/_channel.py", line 826, in _end_unary_response_blocking
    raise _InactiveRpcError(state)
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
	status = StatusCode.DEADLINE_EXCEEDED
	details = "Deadline Exceeded"
	debug_error_string = "{"created":"@162

In [10]:
explore = [actor.explore.remote(learner, memory) for actor in actor_list]


[2m[36m(pid=7993)[0m MineRL agent is public, connect on port 12341 with Minecraft 1.11
[2m[36m(pid=7998)[0m MineRL agent is public, connect on port 12340 with Minecraft 1.11


[2m[36m(pid=7993)[0m 0 episode is done
[2m[36m(pid=7993)[0m total rewards : 0 
[2m[36m(pid=7993)[0m actor network is updated 
[2m[36m(pid=7993)[0m actor target_network is updated


2021-06-11 15:13:48,971	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([32, 3, 8, 8]) and whose dimensions in the checkpoint are torch.Size([32, 3, 8, 8]), an exce

[2m[36m(pid=7998)[0m 0 episode is done
[2m[36m(pid=7998)[0m total rewards : 0 
[2m[36m(pid=7998)[0m actor network is updated 
[2m[36m(pid=7998)[0m actor target_network is updated


[2m[36m(pid=7993)[0m MineRL agent is public, connect on port 12341 with Minecraft 1.11
2021-06-11 15:14:05,902	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([3

[2m[36m(pid=7993)[0m 1 episode is done
[2m[36m(pid=7993)[0m total rewards : 2 
[2m[36m(pid=7993)[0m actor network is updated 
[2m[36m(pid=7993)[0m actor target_network is updated
[2m[36m(pid=7998)[0m 1 episode is done
[2m[36m(pid=7998)[0m total rewards : 5 
[2m[36m(pid=7998)[0m actor network is updated 
[2m[36m(pid=7998)[0m actor target_network is updated


2021-06-11 15:15:50,398	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([32, 3, 8, 8]) and whose dimensions in the checkpoint are torch.Size([32, 3, 8, 8]), an exce

[2m[36m(pid=7993)[0m MineRL agent is public, connect on port 12341 with Minecraft 1.11
[2m[36m(pid=7998)[0m MineRL agent is public, connect on port 12340 with Minecraft 1.11


[2m[36m(pid=7993)[0m 2 episode is done
[2m[36m(pid=7993)[0m total rewards : 1 
[2m[36m(pid=7993)[0m actor network is updated 
[2m[36m(pid=7993)[0m actor target_network is updated
[2m[36m(pid=7998)[0m 2 episode is done
[2m[36m(pid=7998)[0m total rewards : 7 
[2m[36m(pid=7998)[0m actor network is updated 
[2m[36m(pid=7998)[0m actor target_network is updated


2021-06-11 15:17:29,913	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([32, 3, 8, 8]) and whose dimensions in the checkpoint are torch.Size([32, 3, 8, 8]), an exce

[2m[36m(pid=7993)[0m MineRL agent is public, connect on port 12341 with Minecraft 1.11
[2m[36m(pid=7998)[0m MineRL agent is public, connect on port 12340 with Minecraft 1.11


[2m[36m(pid=7993)[0m 3 episode is done
[2m[36m(pid=7993)[0m total rewards : 5 
[2m[36m(pid=7993)[0m actor network is updated 
[2m[36m(pid=7993)[0m actor target_network is updated


2021-06-11 15:19:11,776	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([32, 3, 8, 8]) and whose dimensions in the checkpoint are torch.Size([32, 3, 8, 8]), an exce

[2m[36m(pid=7998)[0m 3 episode is done
[2m[36m(pid=7998)[0m total rewards : 4 
[2m[36m(pid=7998)[0m actor network is updated 
[2m[36m(pid=7998)[0m actor target_network is updated


2021-06-11 15:19:17,342	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([32, 3, 8, 8]) and whose dimensions in the checkpoint are torch.Size([32, 3, 8, 8]), an exce

[2m[36m(pid=7993)[0m 4 episode is done
[2m[36m(pid=7993)[0m total rewards : 2 
[2m[36m(pid=7993)[0m actor network is updated 
[2m[36m(pid=7993)[0m actor target_network is updated


2021-06-11 15:20:48,489	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([32, 3, 8, 8]) and whose dimensions in the checkpoint are torch.Size([32, 3, 8, 8]), an exce

[2m[36m(pid=7998)[0m 4 episode is done
[2m[36m(pid=7998)[0m total rewards : 0 
[2m[36m(pid=7998)[0m actor network is updated 
[2m[36m(pid=7998)[0m actor target_network is updated


2021-06-11 15:20:54,216	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([32, 3, 8, 8]) and whose dimensions in the checkpoint are torch.Size([32, 3, 8, 8]), an exce

[2m[36m(pid=7993)[0m 5 episode is done
[2m[36m(pid=7993)[0m total rewards : 0 
[2m[36m(pid=7993)[0m actor network is updated 
[2m[36m(pid=7993)[0m actor target_network is updated
[2m[36m(pid=7998)[0m 5 episode is done
[2m[36m(pid=7998)[0m total rewards : 5 
[2m[36m(pid=7998)[0m actor network is updated 
[2m[36m(pid=7998)[0m actor target_network is updated


2021-06-11 15:22:31,307	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([32, 3, 8, 8]) and whose dimensions in the checkpoint are torch.Size([32, 3, 8, 8]), an exce

[2m[36m(pid=7993)[0m MineRL agent is public, connect on port 12341 with Minecraft 1.11
[2m[36m(pid=7998)[0m MineRL agent is public, connect on port 12340 with Minecraft 1.11


[2m[36m(pid=7993)[0m 6 episode is done
[2m[36m(pid=7993)[0m total rewards : 3 
[2m[36m(pid=7993)[0m actor network is updated 
[2m[36m(pid=7993)[0m actor target_network is updated
[2m[36m(pid=7998)[0m 6 episode is done
[2m[36m(pid=7998)[0m total rewards : 6 
[2m[36m(pid=7998)[0m actor network is updated 
[2m[36m(pid=7998)[0m actor target_network is updated


2021-06-11 15:24:10,116	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([32, 3, 8, 8]) and whose dimensions in the checkpoint are torch.Size([32, 3, 8, 8]), an exce

[2m[36m(pid=7993)[0m MineRL agent is public, connect on port 12341 with Minecraft 1.11
[2m[36m(pid=7998)[0m MineRL agent is public, connect on port 12340 with Minecraft 1.11


[2m[36m(pid=7993)[0m 7 episode is done
[2m[36m(pid=7993)[0m total rewards : 7 
[2m[36m(pid=7993)[0m actor network is updated 
[2m[36m(pid=7993)[0m actor target_network is updated


2021-06-11 15:25:57,794	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([32, 3, 8, 8]) and whose dimensions in the checkpoint are torch.Size([32, 3, 8, 8]), an exce

[2m[36m(pid=7998)[0m 7 episode is done
[2m[36m(pid=7998)[0m total rewards : 7 
[2m[36m(pid=7998)[0m actor network is updated 
[2m[36m(pid=7998)[0m actor target_network is updated


2021-06-11 15:26:03,693	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([32, 3, 8, 8]) and whose dimensions in the checkpoint are torch.Size([32, 3, 8, 8]), an exce

[2m[36m(pid=7993)[0m 8 episode is done
[2m[36m(pid=7993)[0m total rewards : 0 
[2m[36m(pid=7993)[0m actor network is updated 
[2m[36m(pid=7993)[0m actor target_network is updated


2021-06-11 15:27:40,952	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([32, 3, 8, 8]) and whose dimensions in the checkpoint are torch.Size([32, 3, 8, 8]), an exce

[2m[36m(pid=7998)[0m 8 episode is done
[2m[36m(pid=7998)[0m total rewards : 8 
[2m[36m(pid=7998)[0m actor network is updated 
[2m[36m(pid=7998)[0m actor target_network is updated


2021-06-11 15:27:47,375	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.push_parameters()[39m (pid=7996, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 25, in push_parameters
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1052, in load_state_dict
    self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for DQN:
	While copying the parameter named "conv1.weight", whose dimensions in the model are torch.Size([32, 3, 8, 8]) and whose dimensions in the checkpoint are torch.Size([32, 3, 8, 8]), an exce

In [11]:
update = learner.update_network.remote(memory,batch_size, optimizer)


[2m[36m(pid=5612)[0m started


[2m[36m(pid=5612)[0m   action="store_true",


In [12]:
ray.get(memory.size.remote())

1128

[2m[36m(pid=5612)[0m Count : 10 leaner_network updated
[2m[36m(pid=5612)[0m learner model saved
[2m[36m(pid=5612)[0m Count : 20 leaner_network updated
[2m[36m(pid=5612)[0m learner model saved
[2m[36m(pid=5612)[0m Count : 30 leaner_network updated
[2m[36m(pid=5612)[0m learner model saved
[2m[36m(pid=5612)[0m Count : 40 leaner_network updated
[2m[36m(pid=5612)[0m learner model saved
[2m[36m(pid=5612)[0m Count : 50 leaner_target_network updated
[2m[36m(pid=5612)[0m Count : 50 leaner_network updated
[2m[36m(pid=5612)[0m learner model saved
[2m[36m(pid=5612)[0m Count : 60 leaner_network updated
[2m[36m(pid=5612)[0m learner model saved
[2m[36m(pid=5612)[0m Count : 70 leaner_network updated
[2m[36m(pid=5612)[0m learner model saved
[2m[36m(pid=5612)[0m Count : 80 leaner_network updated
[2m[36m(pid=5612)[0m learner model saved
[2m[36m(pid=5612)[0m Count : 90 leaner_network updated
[2m[36m(pid=5612)[0m learner model saved


[2m[36m(pid=5615)[0m   is_weight = np.power(self.tree.n_entries * sampling_probabilities, -self.beta)
[2m[36m(pid=5615)[0m   is_weight /= (is_weight.max() + 1e-5)
2021-06-10 18:01:41,322	ERROR worker.py:1056 -- Possible unhandled error from worker: [36mray::Learner.update_network()[39m (pid=5612, ip=192.168.0.22)
  File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
  File "/home/kukjin/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
    return method(__ray_actor, *args, **kwargs)
  File "<ipython-input-4-7598620ccee7>", line 60, in update_network
TypeError: cannot unpack non-iterable int object


[2m[36m(pid=5611)[0m 0 episode is done
[2m[36m(pid=5611)[0m total rewards : 0 
[2m[36m(pid=5611)[0m actor network is updated 
[2m[36m(pid=5611)[0m actor target_network is updated
[2m[36m(pid=5613)[0m 0 episode is done
[2m[36m(pid=5613)[0m total rewards : 0 
[2m[36m(pid=5613)[0m actor network is updated 
[2m[36m(pid=5613)[0m actor target_network is updated


[2m[36m(pid=5611)[0m MineRL agent is public, connect on port 12341 with Minecraft 1.11
[2m[36m(pid=5613)[0m MineRL agent is public, connect on port 12340 with Minecraft 1.11


[2m[36m(pid=5611)[0m 1 episode is done
[2m[36m(pid=5611)[0m total rewards : 15 
[2m[36m(pid=5611)[0m actor network is updated 
[2m[36m(pid=5611)[0m actor target_network is updated
[2m[36m(pid=5613)[0m 1 episode is done
[2m[36m(pid=5613)[0m total rewards : 9 
[2m[36m(pid=5613)[0m actor network is updated 
[2m[36m(pid=5613)[0m actor target_network is updated


[2m[36m(pid=5611)[0m MineRL agent is public, connect on port 12341 with Minecraft 1.11
[2m[36m(pid=5613)[0m MineRL agent is public, connect on port 12340 with Minecraft 1.11


[2m[36m(pid=5611)[0m 2 episode is done
[2m[36m(pid=5611)[0m total rewards : 8 
[2m[36m(pid=5611)[0m actor network is updated 
[2m[36m(pid=5611)[0m actor target_network is updated
[2m[36m(pid=5613)[0m 2 episode is done
[2m[36m(pid=5613)[0m total rewards : 0 
[2m[36m(pid=5613)[0m actor network is updated 
[2m[36m(pid=5613)[0m actor target_network is updated


[2m[36m(pid=5611)[0m MineRL agent is public, connect on port 12341 with Minecraft 1.11
[2m[36m(pid=5613)[0m MineRL agent is public, connect on port 12340 with Minecraft 1.11


In [None]:
actor1.env_close.remote()

In [None]:
actor2.env_close.remote()

In [None]:
actor3.env_close.remote()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
def visualize(datapath, index, agent):
    data = pd.read_csv(datapath)
    plt.figure(figsize=(14, 10))
    plt.subplot(index)
    plt.title("Total Rewards : agent {}".format(agent))
    plt.xlabel("Episodes")
    plt.ylabel("Total Rewards");
    x = range(1, len(data['rewards'].values) + 1)
    plt.plot(data.index, data['rewards'].values, '-b', label='rewards');

In [None]:
data0 = "./trainstat_1/train_stat_minerl_agent 0.csv"
agent0 = 0
index0 = 311

data1 = "./trainstat_1/train_stat_minerl_agent 1.csv"
agent1 = 1
index1 = 312

data2 = "./trainstat_1/train_stat_minerl_agent 2.csv"
agent2 = 2
index2 = 313

visualize(data0, index0, agent0)
visualize(data1, index1, agent1)
visualize(data2, index2, agent2)

data = pd.read_csv("./trainstat_1/train_stat_minerl_learner.csv")
plt.figure(figsize=(20, 10))
plt.subplot(111)
plt.title("loss")
plt.xlabel("steps")
plt.ylabel("Total loss");
x = range(1, len(data['loss'].values) + 1)
plt.plot(data.index, data['loss'].values, '-g', label='loss');


plt.show()


In [None]:
data0 = "./trainstat_2/train_stat_minerl_agent 0.csv"
agent0 = 0
index0 = 311

data1 = "./trainstat_2/train_stat_minerl_agent 1.csv"
agent1 = 1
index1 = 312

data2 = "./trainstat_2/train_stat_minerl_agent 2.csv"
agent2 = 2
index2 = 313

visualize(data0, index0, agent0)
visualize(data1, index1, agent1)
visualize(data2, index2, agent2)

data = pd.read_csv("./trainstat_2/train_stat_minerl_learner.csv")
plt.figure(figsize=(20, 10))
plt.subplot(111)
plt.title("loss")
plt.xlabel("steps")
plt.ylabel("Total loss");
x = range(1, len(data['loss'].values) + 1)
plt.plot(data.index, data['loss'].values, '-g', label='loss');


plt.show()


In [None]:
data0 = "./trainstat_4/train_stat_minerl_agent 0.csv"
agent0 = 0
index0 = 311

data1 = "./trainstat_4/train_stat_minerl_agent 1.csv"
agent1 = 1
index1 = 312

data2 = "./trainstat_4/train_stat_minerl_agent 2.csv"
agent2 = 2
index2 = 313

visualize(data0, index0, agent0)
visualize(data1, index1, agent1)
visualize(data2, index2, agent2)

plt.show()

data = pd.read_csv("./trainstat_4/train_stat_minerl_learner.csv")
plt.figure(figsize=(20, 10))
plt.subplot(111)
plt.title("loss")
plt.xlabel("steps")
plt.ylabel("Total loss");
x = range(1, len(data['loss'].values) + 1)
plt.plot(data.index, data['loss'].values, '-g', label='loss');
