In [None]:
import gfootball.env as football_env
import time
import pprint
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

from FeatureEncoder import *
from ppoLstm import *

import torch.multiprocessing as mp 
import time
        
def actor(actor_num, center_model, data_queue, signal_queue, rollout_len):
    print("actor {} started".format(actor_num))
    #11_vs_11_easy_stochastic
    #academy_empty_goal_close 300 epi done
    #academy_empty_goal 450 epi done
    model = PPO()
    env = football_env.create_environment(env_name="academy_empty_goal", representation="raw", stacked=False, logdir='/tmp/football', write_goal_dumps=False, write_full_episode_dumps=False, render=False)
    fe = FeatureEncoder()
    
    n_epi = 0
    score = 0
    rollout = []
    
    while True:
        env.reset()
        done = False
        steps = 0
#         score = 0
        n_epi += 1
        h_out = (torch.zeros([1, 1, 256], dtype=torch.float), torch.zeros([1, 1, 256], dtype=torch.float))
        
        while not done:
            t1 = time.time()
            while signal_queue.qsize() > 0:
                time.sleep(0.02)
            else:
                model.load_state_dict(center_model.state_dict())
                
            obs = env.observation()
            state_dict = fe.encode(obs[0])
            player_state = torch.from_numpy(state_dict["player"]).float().unsqueeze(0).unsqueeze(0)
            ball_state = torch.from_numpy(state_dict["ball"]).float().unsqueeze(0).unsqueeze(0)
            left_team_state = torch.from_numpy(state_dict["left_team"]).float().unsqueeze(0).unsqueeze(0)
            right_team_state = torch.from_numpy(state_dict["right_team"]).float().unsqueeze(0).unsqueeze(0)
            
            h_in = h_out

            state_dict_tensor = {
              "player" : player_state,
              "ball" : ball_state,
              "left_team" : left_team_state,
              "right_team" : right_team_state,
              "hidden" : h_in
            }

            with torch.no_grad():
                prob, _, h_out = model(state_dict_tensor)
            m = Categorical(prob)
            a = m.sample().item()

            obs, rew, done, info = env.step(a)
            state_prime_dict = fe.encode(obs[0])
            
            (h1_in, h2_in) = h_in
            (h1_out, h2_out) = h_out
            h_in_np = (h1_in.numpy(), h2_in.numpy())
            h_out_np = (h1_out.numpy(), h2_out.numpy())
            state_dict["hidden"] = h_in_np
            state_prime_dict["hidden"] = h_out_np

            transition = (state_dict, a, rew, state_prime_dict, prob[0][0][a].item(), done)
            rollout.append(transition)

            if len(rollout) == rollout_len:
                data_queue.put(rollout)
                rollout = []
                
            state_dict = state_prime_dict

            steps += 1
            score += rew

            if done:
                if n_epi % 10 == 0 and actor_num == 0:
                    print("%d, Done, Step %d Reward: %f" % (n_epi, steps, score))
                    score = 0   


def learner(center_model, queue, signal_queue, batch_size, buffer_size):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = PPO(device)
    model.load_state_dict(center_model.state_dict())
    model.to(device)
    
    print("learner start")
    
    while True:
        if queue.qsize() > batch_size*buffer_size:
            signal_queue.put(1)
            data = []
            for j in range(buffer_size):
                mini_batch_np = []
                for i in range(batch_size):
                    rollout = queue.get()
                    mini_batch_np.append(rollout)
                mini_batch = model.make_batch(mini_batch_np)
                data.append(mini_batch)
            model.train_net(data)
            center_model.load_state_dict(model.state_dict())
            
            if queue.qsize() > batch_size*buffer_size:
                print(" data remaining. queue size : ", queue.qsize())
            signal_queue.get()
            
        else:
            time.sleep(0.1)
    

if __name__ == '__main__':
    # hyperparameters
    num_processes = 6
    batch_size = 16 # 16   # learner
    buffer_size = 5 # 5    # learner
    rollout_len = 10       # actor
       
    np.set_printoptions(precision=3)
    np.set_printoptions(suppress=True)
    pp = pprint.PrettyPrinter(indent=4)
    torch.set_num_threads(1)
    
    center_model = PPO()
    center_model.share_memory()
    data_queue = mp.Queue()
    signal_queue = mp.Queue()
    processes = []
    
    p = mp.Process(target=learner, args=(center_model, data_queue, signal_queue, batch_size, buffer_size))
    p.start()
    processes.append(p)
    for rank in range(num_processes):
        p = mp.Process(target=actor, args=(rank, center_model, data_queue, signal_queue, rollout_len))
        p.start()
        processes.append(p)
        
    for p in processes:
        p.join()
        
        


actor 0 started
actor 1 started
actor 2 started
actor 3 startedactor 4 started

actor 5 started
learner start
 data remaining. queue size :  389
 data remaining. queue size :  309
 data remaining. queue size :  229
 data remaining. queue size :  149
10, Done, Step 188 Reward: 0.000000
20, Done, Step 111 Reward: 1.000000
30, Done, Step 136 Reward: 0.000000
40, Done, Step 177 Reward: 0.000000
50, Done, Step 65 Reward: 0.000000
60, Done, Step 49 Reward: 1.000000
70, Done, Step 43 Reward: 0.000000
80, Done, Step 55 Reward: 0.000000
90, Done, Step 113 Reward: -1.000000
100, Done, Step 78 Reward: 1.000000
110, Done, Step 63 Reward: 0.000000
120, Done, Step 127 Reward: 0.000000
130, Done, Step 44 Reward: 1.000000
140, Done, Step 66 Reward: -1.000000
150, Done, Step 44 Reward: 0.000000
160, Done, Step 35 Reward: -1.000000
170, Done, Step 54 Reward: 1.000000
180, Done, Step 331 Reward: 0.000000
190, Done, Step 34 Reward: 2.000000
200, Done, Step 125 Reward: -2.000000
210, Done, Step 401 Reward:

In [1]:
x = torch.tensor(1.0)
x = x+1
print(x)

NameError: name 'torch' is not defined