In [2]:
'''
The code is used to train BC imitator, or pretrained GAIL imitator
'''

import argparse
import tempfile
import os.path as osp
import gym
import logging
from tqdm import tqdm
import tensorflow as tf

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [3]:
def generate_expert_data_with_rewards(scenario_file, num_interactions=1000, file_name="expert_data", act_ndim=19):
    #from gfrl.base.run_my_ppo2 import create_single_scenic_environment
    import numpy as np
    expert_observations = []
    expert_actions = []
    expert_rewards = []

    gf_env_settings = {
        "stacked": True,
        "rewards": 'scoring',
        "representation": 'extracted',
        "players": [f"agent:left_players=1"],
        "action_set": "default",#"default" "v2"
    }

    from scenic.simulators.gfootball.rl_interface import GFScenicEnv
    from scenic.simulators.gfootball.utilities.scenic_helper import buildScenario
    scenario = buildScenario(scenario_file)
    env = GFScenicEnv(initial_scenario=scenario, gf_env_settings=gf_env_settings, use_scenic_behavior_in_step=True, constraints_checking=True)
    
    obs = env.reset()
    tr = 0
    for i in tqdm(range(num_interactions)):
        expert_observations.append(obs)

        obs, reward, done, info = env.step(env.action_space.sample())
        tr+=reward
        # print(info)
        action = info["action_taken"]
        expert_actions.append(action)

        if done:
            obs = env.reset()
            expert_rewards.append(tr)
            tr = 0

    expert_observations = np.array(expert_observations)
    #expert_observations = np.moveaxis(expert_observations, [3], [1])
    acts = np.array(expert_actions)

    acts_oh = np.zeros((acts.shape[0], act_ndim))
    acts_oh[np.arange(acts.shape[0]), acts] = 1

    expert_rewards = np.array(expert_rewards)
    print("Expert observation shape: ", expert_observations.shape)
    print("Expert actions shape: ", acts_oh.shape)
    print("Num Episode: ", expert_rewards.shape[0])
    print("Mean Reward: ", expert_rewards.mean())

    #np.savez(expert_file_name, obs=mb_obs, acs=mb_act_oh, num_epi = num_episodes, mean_reward = np.sum(mb_rewards)/num_episodes)
    np.savez_compressed(
        file_name,
        acs=acts_oh,
        obs=expert_observations,
        num_epi = expert_rewards.shape[0],
        mean_reward = expert_rewards.mean(),
        rewards = expert_rewards
    )
    return expert_observations, acts_oh, expert_rewards



In [4]:
def generate_expert_successful_data(scenario_file, num_interactions=1000, file_name="expert_data", act_ndim=19):
    #from gfrl.base.run_my_ppo2 import create_single_scenic_environment
    import numpy as np
    expert_observations = []
    expert_actions = []
    expert_rewards = []

    gf_env_settings = {
        "stacked": True,
        "rewards": 'scoring',
        "representation": 'extracted',
        "players": [f"agent:left_players=1"],
        "action_set": "default",#"default" "v2"
    }

    from scenic.simulators.gfootball.rl_interface import GFScenicEnv
    from scenic.simulators.gfootball.utilities.scenic_helper import buildScenario
    scenario = buildScenario(scenario_file)
    env = GFScenicEnv(initial_scenario=scenario, gf_env_settings=gf_env_settings, use_scenic_behavior_in_step=True, constraints_checking=True)
    
    
    tr = 0
    
    obs_buf, acts_buf, rew_buf  = [], [], []
    policy_rews = []
    
    with tqdm(total=num_interactions) as pbar:
        
        while(len(expert_observations)<num_interactions):
            
            
            if len(obs_buf)==0:
                obs = env.reset()
            
            obs_buf.append(obs)
            obs, reward, done, info = env.step(env.action_space.sample())
            
            #rew_buf.append(reward)

            tr+=reward
            # print(info)
            action = info["action_taken"]
            acts_buf.append(action)

            if done:
                
                #print(f"New Epi: {len(obs_buf)} R: {tr}")
                
                policy_rews.append(tr)
                
                if tr>0: 
                    expert_observations.extend(obs_buf)
                    expert_actions.extend(acts_buf)
                    expert_rewards.append(tr)
                    
                    if len(expert_observations)> num_interactions:
                        pbar.update(num_interactions)
                    else:
                        pbar.update(len(obs_buf)) 
                        
                    #print("Added new Api. Current Size: ", len(expert_observations))


                obs_buf, acts_buf, rew_buf  = [], [], []
                obs = env.reset()
                tr = 0

            
    print("Collection Done")        
        

    expert_observations = np.array(expert_observations)
    #expert_observations = np.moveaxis(expert_observations, [3], [1])
    acts = np.array(expert_actions)

    acts_oh = np.zeros((acts.shape[0], act_ndim))
    acts_oh[np.arange(acts.shape[0]), acts] = 1

    expert_rewards = np.array(expert_rewards)
    
    print("Expert observation shape: ", expert_observations.shape)
    print("Expert actions shape: ", acts_oh.shape)
    print("Num Expert Episode: ", expert_rewards.shape[0])
    print("Mean Expert Reward: ", expert_rewards.mean())
    
    print("Num Trajectories Collected: ", len(policy_rews))
    print("Mean Policy Reward: ", np.mean(policy_rews))
    

    #np.savez(expert_file_name, obs=mb_obs, acs=mb_act_oh, num_epi = num_episodes, mean_reward = np.sum(mb_rewards)/num_episodes)
    np.savez_compressed(
        file_name,
        acs=acts_oh,
        obs=expert_observations,
        num_epi = expert_rewards.shape[0],
        mean_reward = expert_rewards.mean(),
        rewards = expert_rewards,
        policy_mean_reward = np.mean(policy_rews),
        policy_total_trajectories = len(policy_rews)
    )
    return expert_observations, acts_oh, expert_rewards



In [6]:
scenario = "../_scenarios/academy/wb/pass_n_shoot_wb.scenic"

num_interactions=500
expert_file_name = f"../_data/pns_success_{num_interactions}.npz"

obs, act, rew = generate_expert_successful_data(scenario_file=scenario, num_interactions=num_interactions, file_name=expert_file_name)



  0%|          | 0/500 [00:00<?, ?it/s]

pygame 2.0.1 (SDL 2.0.14, Python 3.9.1)
Hello from the pygame community. https://www.pygame.org/contribute.html
Environment will ignore actions passed to step() and take action provided by Scenic


973it [00:13, 74.69it/s]                         


Collection Done
Expert observation shape:  (508, 72, 96, 16)
Expert actions shape:  (508, 19)
Num Expert Episode:  15
Mean Expert Reward:  1.0
Num Trajectories Collected:  31
Mean Policy Reward:  0.4838709677419355


In [7]:
"""
from gfrl.common.mybase.cloning.dataset import GFDset
expert_file_name = f"../_data/pns_success_{num_interactions}.npz"
dataset = GFDset(expert_file_name)
print(dataset.num_epi, dataset.mean_reward, dataset.size, dataset.acts.shape)
"""

TypeError: __init__() missing 2 required positional arguments: 'labels' and 'info'

In [9]:
path = f"../_data/pns_success_{num_interactions}.npz"

from gfrl.common.mybase.cloning.dataset import get_datasets
#path = "/home/ubuntu/ScenicGFootBall/training/gfrl/_data/sc4rl_fg11v1_rns_success_10000.npz"
tds, vds = get_datasets(path, validation_ratio=0.0)

print("train")
print(tds.summary())
print()

print("validation")
print(vds.summary())
print()

train
Obs Shape: (508, 72, 96, 16)
Act Shape: (508, 19)
num_epi: 15
mean_reward: 1.0
policy_mean_reward: 0.4838709677419355
policy_total_trajectories: 31


validation
Obs Shape: (0, 72, 96, 16)
Act Shape: (0, 19)
num_epi: 15
mean_reward: 1.0
policy_mean_reward: 0.4838709677419355
policy_total_trajectories: 31




In [5]:
#import numpy as np
#td = np.load(expert_file_name)

In [6]:
#list(td.keys())

['acs', 'obs', 'num_epi', 'mean_reward', 'rewards']

In [8]:
scenario = "../_scenarios/academy/wb/pass_n_shoot_wb.scenic"

num_interactions=5000
expert_file_name = f"../_data/pns_{num_interactions}.npz"

obs, act, rew = generate_expert_data_with_rewards(scenario_file=scenario, num_interactions=num_interactions, file_name=expert_file_name)

from gfrl.common.mybase.cloning.dataset import GFDset
dataset = GFDset(expert_file_name)
print(dataset.num_epi, dataset.mean_reward, dataset.size, dataset.acts.shape)


  0%|          | 0/5000 [00:00<?, ?it/s]

Environment will ignore actions passed to step() and take action provided by Scenic


100%|██████████| 5000/5000 [00:59<00:00, 83.51it/s] 


Expert observation shape:  (5000, 72, 96, 16)
Expert actions shape:  (5000, 19)
Num Episode:  133
Mean Reward:  0.6240601503759399
133 0.6240601503759399 5000 (5000, 19)


In [13]:

"""
scenario = "../_scenarios/academy/wb/pass_n_shoot_wb.scenic"

num_interactions=10000
expert_file_name = f"../_data/pns_{num_interactions}.npz"

obs, act, rew = generate_expert_data_with_rewards(scenario_file=scenario, num_interactions=num_interactions, file_name=expert_file_name)
"""

'\nscenario = "../_scenarios/academy/wb/pass_n_shoot_wb.scenic"\n\nnum_interactions=10000\nexpert_file_name = f"../_data/pns_{num_interactions}.npz"\n\nobs, act, rew = generate_expert_data_with_rewards(scenario_file=scenario, num_interactions=num_interactions, file_name=expert_file_name)\n'

In [12]:
"""
from gfrl.common.mybase.cloning.dataset import GFDset
dataset = GFDset(expert_file_name)
print(dataset.num_epi, dataset.mean_reward, dataset.size, dataset.acts.shape)
"""

'\nfrom gfrl.common.mybase.cloning.dataset import GFDset\ndataset = GFDset(expert_file_name)\nprint(dataset.num_epi, dataset.mean_reward, dataset.size, dataset.acts.shape)\n'

In [11]:
"""
scenario = "../_scenarios/academy/wb/pass_n_shoot_wb.scenic"

num_interactions=20000
expert_file_name = f"../_data/pns_{num_interactions}.npz"

obs, act, rew = generate_expert_data_with_rewards(scenario_file=scenario, num_interactions=num_interactions, file_name=expert_file_name)
from gfrl.common.mybase.cloning.dataset import GFDset
dataset = GFDset(expert_file_name)
print(dataset.num_epi, dataset.mean_reward, dataset.size, dataset.acts.shape)
"""

'\nscenario = "../_scenarios/academy/wb/pass_n_shoot_wb.scenic"\n\nnum_interactions=20000\nexpert_file_name = f"../_data/pns_{num_interactions}.npz"\n\nobs, act, rew = generate_expert_data_with_rewards(scenario_file=scenario, num_interactions=num_interactions, file_name=expert_file_name)\nfrom gfrl.common.mybase.cloning.dataset import GFDset\ndataset = GFDset(expert_file_name)\nprint(dataset.num_epi, dataset.mean_reward, dataset.size, dataset.acts.shape)\n'

In [10]:
"""
scenario = "../_scenarios/academy/wb/pass_n_shoot_wb.scenic"

num_interactions=50000
expert_file_name = f"../_data/pns_{num_interactions}.npz"

obs, act, rew = generate_expert_data_with_rewards(scenario_file=scenario, num_interactions=num_interactions, file_name=expert_file_name)

from gfrl.common.mybase.cloning.dataset import GFDset
dataset = GFDset(expert_file_name)
print(dataset.num_epi, dataset.mean_reward, dataset.size, dataset.acts.shape)
"""

'\nscenario = "../_scenarios/academy/wb/pass_n_shoot_wb.scenic"\n\nnum_interactions=50000\nexpert_file_name = f"../_data/pns_{num_interactions}.npz"\n\nobs, act, rew = generate_expert_data_with_rewards(scenario_file=scenario, num_interactions=num_interactions, file_name=expert_file_name)\n\nfrom gfrl.common.mybase.cloning.dataset import GFDset\ndataset = GFDset(expert_file_name)\nprint(dataset.num_epi, dataset.mean_reward, dataset.size, dataset.acts.shape)\n'