# Install Dependencis

In [None]:
!pip install tensorflow_version==1.15.0
!pip install "gym==0.19"
!pip install stable-baselines[mpi]==2.9.0

# Clone PCGRL Repository 

In [None]:
! git clone https://github.com/maryamkheirkhah/gym-pcgrl.git

In [None]:
import os
os.chdir('gym-pcgrl')
!pip install -e .

# Check Your System Number Of CPUs

In [11]:
import os
print("Number of CPUs:", os.cpu_count())

Number of CPUs: 8


# Train

In [None]:
#pip install tensorflow==1.15
#Install stable-baselines as described in the documentation
print("first")
import model
from model import FullyConvPolicyBigMap, FullyConvPolicySmallMap, CustomPolicyBigMap, CustomPolicySmallMap
from utils import get_exp_name, max_exp_idx, load_model, make_vec_envs
from stable_baselines import PPO2
from stable_baselines.results_plotter import load_results, ts2xy
import tensorflow as tf
import numpy as np
import os

n_steps = 0
log_dir = './'
best_mean_reward, n_steps = -np.inf, 0

def callback(_locals, _globals):
    print("in callback")
    """
    Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
    :param _locals: (dict)
    :param _globals: (dict)
    """
    global n_steps, best_mean_reward
    # Print stats every 1000 calls
    if (n_steps + 1) % 10 == 0:
        x, y = ts2xy(load_results(log_dir), 'timesteps')
        if len(x) > 100:
           #pdb.set_trace()
            mean_reward = np.mean(y[-100:])
            print(x[-1], 'timesteps')
            print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(best_mean_reward, mean_reward))

            # New best model, we save the agent here
            if mean_reward > best_mean_reward:
                best_mean_reward = mean_reward
                # Example for saving best model
                print("Saving new best model")
                _locals['self'].save(os.path.join(log_dir, 'best_model.pkl'))
            else:
                print("Saving latest model")
                _locals['self'].save(os.path.join(log_dir, 'latest_model.pkl'))
        else:
            print('{} monitor entries'.format(len(x)))
            pass
    n_steps += 1
    # Returning False will stop training early
    return True


def main(game, representation, experiment, steps, n_cpu, render, logging, **kwargs):
    print("in main")
    env_name = '{}-{}-v0'.format(game, representation)
    exp_name = get_exp_name(game, representation, experiment, **kwargs)
    resume = kwargs.get('resume', False)
    if representation == 'wide':
        policy = FullyConvPolicyBigMap
        if game == "sokoban":
            policy = FullyConvPolicySmallMap
    else:
        policy = CustomPolicyBigMap
        if game == "sokoban":
            policy = CustomPolicySmallMap
    if game == "binary":
        kwargs['cropped_size'] = 28
    elif game == "zelda":
        kwargs['cropped_size'] = 22
    elif game == "sokoban":
        kwargs['cropped_size'] = 10
    n = max_exp_idx(exp_name)
    global log_dir
    if not resume:
        print("resume")
        n = n + 1
    log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log')
    if not resume:
        print("make resume")
        os.makedirs(log_dir)
    else:
        print("load model")
        model = load_model(log_dir)
    kwargs = {
        **kwargs,
        'render_rank': 0,
        'render': render,
    }
    used_dir = log_dir
    if not logging:
        print("1")
        used_dir = None
    env = make_vec_envs(env_name, representation, log_dir, n_cpu, **kwargs)
    if not resume or model is None:
        print("2")
        model = PPO2(policy, env, verbose=1, tensorboard_log="./runs")
    else:
        print("3")
        model.set_env(env)
    if not logging:
        print("4")
        model.learn(total_timesteps=int(steps), tb_log_name=exp_name)
    else:
        print("5")
        model.learn(total_timesteps=int(steps), tb_log_name=exp_name, callback=callback)

################################## MAIN ########################################
games =['binary','zelda','sokoban']
representations = ['narrow','turtle','wide']
# Fill these parameters according to your equipments and wishes 
experiment = None
steps = 10000
render = True
logging = True
n_cpu = 1
kwargs = {
    'resume': False
}
for g in games:
    for r in representations:
        if __name__ == '__main__':
            main(g, r, experiment, steps, n_cpu, render, logging, **kwargs)


# Plotting

I used tensorboard for plotting trained models 

In [1]:
%pwd

'f:\\learning\\master\\PCGRL\\gym-pcgrl'

In [15]:
!kill 6020

'kill' is not recognized as an internal or external command,
operable program or batch file.


# Display Tensorboard inline
Run & Display tensorboard   
**PS.** *sometimes it does not show up at all, then test to uncomment the reload code, or jusrt run cell again*

It's correctly loaded when you see this view
![Tensorboard](https://nextgrid.ai/wp-content/uploads/2019/12/Screenshot-2019-12-27-at-16.40.02.png)

In [2]:
logs_base_dir = './real-100-runs/zelda/' # Log DIR


In [3]:
import tensorboard
print(tensorboard.__version__)


2.11.2


In [37]:
!lsof -i :6006


'lsof' is not recognized as an internal or external command,
operable program or batch file.


In [22]:
!kill 6020

'kill' is not recognized as an internal or external command,
operable program or batch file.


In [4]:
# Often not loading on first try, run again until u see the screen
%load_ext tensorboard

%tensorboard --logdir {logs_base_dir}/ --host localhost --port 8084
#%tensorboard --logdir=data/ --host localhost --port 808

%reload_ext tensorboard
#!kill 6020


Launching TensorBoard...

# inference

In [13]:
import model
from stable_baselines import PPO2

import time
from utils import make_vec_envs

%matplotlib inline
import matplotlib.pyplot as plt
from IPython import display

In [14]:
def make_game_env(game, representation,change_percentage):
    global model_path, kwargs,agent
    model_path = 'runs/{}_{}_1_log/best_model.pkl'.format(game, representation)
    kwargs = {
        'change_percentage': change_percentage,
        'verbose': True
    }

    if game == "binary":
        model.FullyConvPolicy = model.FullyConvPolicyBigMap
    elif game == "zelda":
        model.FullyConvPolicy = model.FullyConvPolicyBigMap
    elif game == "sokoban":
        model.FullyConvPolicy = model.FullyConvPolicySmallMap

    agent = PPO2.load(model_path)

    env_name = '{}-{}-v0'.format(game, representation)
    if game == "binary":
        kwargs['cropped_size'] = 28
    elif game == "zelda":
        kwargs['cropped_size'] = 22
    elif game == "sokoban":
        kwargs['cropped_size'] = 10
        
    env = make_vec_envs(env_name, representation, None, 1, **kwargs)
    return env


In [15]:
def show_state(env, step=0, changes=0, total_reward=0, name=""):
    fig = plt.figure(10)
    plt.clf()
    plt.title("{} | Step: {} Changes: {} Total Reward: {}".format(name, step, changes, total_reward))
    plt.axis('off')
    plt.imshow(env.render(mode='rgb_array'))
    display.clear_output(wait=True)
    display.display(plt.gcf())
    
def infer(env, agent, **kwargs):
    obs = env.reset()
    dones = False
    total_rewards = 0
    while not dones:
        action, _ = agent.predict(obs)
        obs, rewards, dones, info = env.step(action)
        total_rewards += rewards
        if dones:
            break
        show_state(env, info[0]['iterations'], info[0]['changes'], total_rewards)
    if kwargs.get('verbose', False):
        print(info[0])
    change_percentage = kwargs.get('change_percentage')
    return info[0]['iterations'], info[0]['changes'], total_rewards[0], change_percentage


In [None]:
games =['zelda','sokoban']
representations = ['turtle','wide']
arr_all_game = []
for game in games:
    for representation in representations:
        for c in range(0,11):
            print(game, representation)
            env = make_game_env(game, representation,c/10)
            total_rewards_arr=[]
            for i in range(40):
                r = infer(env, agent, **kwargs)
                print(r)
                total_rewards_arr.append(r)
            arr_all_game.append(total_rewards_arr)
            with open('result-{}.txt'.format(game), 'w') as f:
                for line in arr_all_game:
                    f.write(f"{representation} ")
                    f.write(f"{line}\n")
            
       


# Zelda Wide

In [39]:
print(sum)
rewards_wide_zelda = total_rewards_arr
mean_rewards_wide_zelda=sum/50
print(rewards_wide_zelda)
print(mean_rewards_wide_zelda)

[2436.]
[array([34.], dtype=float32), array([18.], dtype=float32), array([53.], dtype=float32), array([53.], dtype=float32), array([58.], dtype=float32), array([47.], dtype=float32), array([35.], dtype=float32), array([42.], dtype=float32), array([55.], dtype=float32), array([38.], dtype=float32), array([49.], dtype=float32), array([80.], dtype=float32), array([63.], dtype=float32), array([52.], dtype=float32), array([33.], dtype=float32), array([51.], dtype=float32), array([41.], dtype=float32), array([56.], dtype=float32), array([45.], dtype=float32), array([49.], dtype=float32), array([71.], dtype=float32), array([31.], dtype=float32), array([46.], dtype=float32), array([69.], dtype=float32), array([59.], dtype=float32), array([86.], dtype=float32), array([61.], dtype=float32), array([51.], dtype=float32), array([67.], dtype=float32), array([48.], dtype=float32), array([53.], dtype=float32), array([47.], dtype=float32), array([46.], dtype=float32), array([57.], dtype=float32), array

# Sokoban Wide

In [34]:
print(sum)
rewards_wide_sokoban = total_rewards_arr
mean_rewards_wide_sokoban=sum/50
print(rewards_wide_sokoban)
print(mean_rewards_wide_sokoban)

[1061.]
[array([8.], dtype=float32), array([22.], dtype=float32), array([23.], dtype=float32), array([25.], dtype=float32), array([21.], dtype=float32), array([26.], dtype=float32), array([-1.], dtype=float32), array([22.], dtype=float32), array([17.], dtype=float32), array([12.], dtype=float32), array([6.], dtype=float32), array([28.], dtype=float32), array([4.], dtype=float32), array([11.], dtype=float32), array([38.], dtype=float32), array([21.], dtype=float32), array([25.], dtype=float32), array([14.], dtype=float32), array([27.], dtype=float32), array([22.], dtype=float32), array([21.], dtype=float32), array([23.], dtype=float32), array([11.], dtype=float32), array([40.], dtype=float32), array([16.], dtype=float32), array([19.], dtype=float32), array([16.], dtype=float32), array([22.], dtype=float32), array([43.], dtype=float32), array([35.], dtype=float32), array([15.], dtype=float32), array([25.], dtype=float32), array([34.], dtype=float32), array([20.], dtype=float32), array([2

# Sokoban Narrow

In [30]:
print(sum)
rewards_narrow_sokoban = total_rewards_arr
mean_rewards_narrow_sokoban=sum/50
print(rewards_narrow_sokoban)
print(mean_rewards_narrow_sokoban)

[1016.]
[array([32.], dtype=float32), array([10.], dtype=float32), array([32.], dtype=float32), array([35.], dtype=float32), array([13.], dtype=float32), array([42.], dtype=float32), array([34.], dtype=float32), array([19.], dtype=float32), array([15.], dtype=float32), array([12.], dtype=float32), array([22.], dtype=float32), array([7.], dtype=float32), array([26.], dtype=float32), array([23.], dtype=float32), array([28.], dtype=float32), array([16.], dtype=float32), array([17.], dtype=float32), array([13.], dtype=float32), array([17.], dtype=float32), array([7.], dtype=float32), array([22.], dtype=float32), array([5.], dtype=float32), array([20.], dtype=float32), array([22.], dtype=float32), array([14.], dtype=float32), array([30.], dtype=float32), array([7.], dtype=float32), array([30.], dtype=float32), array([20.], dtype=float32), array([12.], dtype=float32), array([25.], dtype=float32), array([37.], dtype=float32), array([42.], dtype=float32), array([30.], dtype=float32), array([18

# binary turtle

In [13]:
print(total_rewards_arr)
total_rewards_arr[1][0]

[array([75.], dtype=float32), array([54.], dtype=float32), array([145.], dtype=float32), array([20.], dtype=float32), array([144.], dtype=float32), array([-1.], dtype=float32), array([141.], dtype=float32), array([102.], dtype=float32), array([70.], dtype=float32), array([7.], dtype=float32), array([27.], dtype=float32), array([153.], dtype=float32), array([22.], dtype=float32), array([219.], dtype=float32), array([20.], dtype=float32), array([126.], dtype=float32), array([57.], dtype=float32), array([154.], dtype=float32), array([80.], dtype=float32), array([155.], dtype=float32), array([143.], dtype=float32), array([143.], dtype=float32), array([43.], dtype=float32), array([172.], dtype=float32), array([6.], dtype=float32), array([23.], dtype=float32), array([20.], dtype=float32), array([56.], dtype=float32), array([150.], dtype=float32), array([119.], dtype=float32), array([53.], dtype=float32), array([149.], dtype=float32), array([141.], dtype=float32), array([201.], dtype=float32)

54.0

sokoban turtle:

In [26]:
print(total_rewards_arr)
total_rewards_arr[1][0]

[array([23.], dtype=float32), array([21.], dtype=float32), array([13.], dtype=float32), array([24.], dtype=float32), array([19.], dtype=float32), array([10.], dtype=float32), array([24.], dtype=float32), array([15.], dtype=float32), array([35.], dtype=float32), array([29.], dtype=float32), array([7.], dtype=float32), array([9.], dtype=float32), array([16.], dtype=float32), array([24.], dtype=float32), array([32.], dtype=float32), array([0.], dtype=float32), array([15.], dtype=float32), array([19.], dtype=float32), array([30.], dtype=float32), array([40.], dtype=float32), array([20.], dtype=float32), array([32.], dtype=float32), array([20.], dtype=float32), array([25.], dtype=float32), array([14.], dtype=float32), array([26.], dtype=float32), array([7.], dtype=float32), array([11.], dtype=float32), array([45.], dtype=float32), array([8.], dtype=float32), array([24.], dtype=float32), array([34.], dtype=float32), array([49.], dtype=float32), array([24.], dtype=float32), array([16.], dtype

21.0

# Sokoban Turtle Inference

In [25]:
print(sum)
rewards_turtle_sokoban = total_rewards_arr
mean_rewards_turtle_sokoban=sum/50
print(mean_rewards_turtle_sokoban)

[1147.]
[22.94]


# Zelda Turtle Inference

In [None]:
print(sum)
rewards_turtle_zelda = total_rewards_arr
mean_rewards_turtle_zelda=sum/50
print(mean_rewards_turtle_zelda)

In [14]:
print(sum)



[4160.]


#  Binary Turtle Inference

In [15]:
print(sum)
rewards_turtle_binary = total_rewards_arr
mean_rewards_turtle_binary=sum/50
print(mean_rewards_turtle_binary)

[4160.]
[83.2]


# Binary Wide Inference

In [7]:
rewards_wide_binary = [[175.],[34.],[106.],[169.],[171.],[38.],[24.],[25.],[155.],[51.],[162.],[36.],[22.],[25.],[156.],[121.],[24.],[34.],[192.],[96.],[151.],[100.],[27.],[170.],[22.],[127.],[42.],[107.],[163.], [124.], [209.], [113.], [150.], [22.], [103.], [30.], [79.], [30.], [50.],[92.],[63.],[175.], [32.], [161.], [22.], [191.], [154.], [22.], [105.], [29.]]
print(rewards_wide_binary)
mean_rewards_wide_binary = 4681./50
print(mean_rewards_wide_binary)

[[175.0], [34.0], [106.0], [169.0], [171.0], [38.0], [24.0], [25.0], [155.0], [51.0], [162.0], [36.0], [22.0], [25.0], [156.0], [121.0], [24.0], [34.0], [192.0], [96.0], [151.0], [100.0], [27.0], [170.0], [22.0], [127.0], [42.0], [107.0], [163.0], [124.0], [209.0], [113.0], [150.0], [22.0], [103.0], [30.0], [79.0], [30.0], [50.0], [92.0], [63.0], [175.0], [32.0], [161.0], [22.0], [191.0], [154.0], [22.0], [105.0], [29.0]]
93.62


# binary Narrow inference

In [39]:
print(sum)
rewards_narrow_binary 
mean_rewards_narrow_binary
print(mean_rewards_narrow_binary)

[4140.]
[82.8]


# Plot Base On Change Percentage

In [None]:
with open('result-{}.txt'.format(game), 'w') as f:
                for line in arr_all_game:
                    f.write(f"{representation} ")
                    f.write(f"{line}\n")

In [None]:
def readFile(game):
    file1 = open('result-{}.txt'.format(game), 'r')
    Lines = file1.readlines()
    narrow= []
    for i in range(0,11):
        narrow.append(lines[i])
    turtle=[]
        for i in range(12,23):
        turtle.append(lines[i])
    wide = []
    for i in range(13,34):
        wide.append(lines[i])
    return narrow ,turtle,wide

In [None]:
def Plot():
    for game in games:
        readFile(game)

In [None]:
def reward(game,arr):
    cropped_size = 0
    if game == "binary":
        cropped_size = 28
    elif game == "zelda":
        cropped_size = 22
    elif game == "sokoban":
        cropped_size= 10
    

In [17]:
binarys = [rewards_turtle_binary,rewards_wide_binary]

In [None]:
import gym
import gym_pcgrl

env = gym.make('sokoban-narrow-v0')
obs = env.reset()
for t in range(1000):
  action = env.action_space.sample()
  obs, reward, done, info = env.step(action)
  env.render('human')
  if done:
    print("Episode finished after {} timesteps".format(t+1))
    break

In [13]:
import numpy as np
x = np.array([1, 2, 3], dtype='f')


1.0

# get - if didn't get any positive reward get - reward
توی ترتل هیچ کاری نکردن هیچ مزیتی نداره!فقط ضرره پس باید منفی بگیره 