In [1]:
# OPTIONAL: Load the "autoreload" extension so that code can change
%load_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2

# Imports

In [None]:
from os.path import exists
from rl.callbacks import FileLogger, ModelIntervalCheckpoint
from rl.policy import EpsGreedyQPolicy, LinearAnnealedPolicy, BoltzmannQPolicy, MaxBoltzmannQPolicy
from rl.memory import SequentialMemory
from rl.agents import DQNAgent

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Convolution2D, Dense, Flatten

import numpy as np

import matplotlib.pyplot as plt
import json
import argparse
import pandas as pd
import glob

# Env

In [2]:
def env_creator(render_mode="rgb_array", cycles=200):
    from src.world import world_utils
    env = world_utils.env(render_mode=render_mode, max_cycles=cycles)
    return env

# Model

In [3]:
def build_model(env):
    height, width, channels = env.observation_space.shape
    actions = env.action_space.n

    print(height, width, channels)
    print(actions)
    print(env.unwrapped.get_action_meanings())

    # 34,812,326 parameters
    model = Sequential()
    model.add(Convolution2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=(FRAMES, height, width, channels)))
    model.add(Convolution2D(64, (4, 4), strides=(2, 2), activation='relu'))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='softmax'))
    return model

# Agent

In [4]:
def build_agent(model, actions):
    if POLICY == "boltzman":
        policy = BoltzmannQPolicy()
    elif POLICY == "max":
        policy = MaxBoltzmannQPolicy()
    elif POLICY == "annealed":
        policy = LinearAnnealedPolicy(EpsGreedyQPolicy(
        ), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=NB_STEPS)
    elif POLICY == "greedy":
        policy = EpsGreedyQPolicy()
    else:
        raise ValueError
    memory = SequentialMemory(limit=FRAME_LIMIT, window_length=FRAMES)
    agent = DQNAgent(model=model, memory=memory, policy=policy,
                     enable_dueling_network=True, dueling_type='avg',
                     nb_actions=actions, nb_steps_warmup=NB_STEPS_WARMUP
                     )
    agent.compile(Adam(lr=LEARNING_RATE), metrics=['mae'])
    return agent

def load_agent(env, filename):
    model = build_model(env)
    # model.summary()
    actions = env.action_space.n
    agent = build_agent(model, actions)
    agent.load_weights(filename)
    return agent

def train_agent(env, vis=False, agent=None):
    model = build_model(env)
    # model.summary()
    actions = env.action_space.n
    if agent is None:
        agent = build_agent(model, actions)
        cont_prefix = ""
    else:
        cont_prefix = CONT_PREFIX

    weights_filename = 'agents/dqn_' + cont_prefix + MODEL_TYPE+ '_' + POLICY + '_weights.h5f'
    checkpoint_weights_filename = 'agents/dqn_'+ cont_prefix + \
        MODEL_TYPE + '_' + POLICY + '_weights_{step}.h5f'
    log_filename = f'agents/dqn_{cont_prefix}{MODEL_TYPE}_{POLICY}_log.json'

    callbacks = [ModelIntervalCheckpoint(
        checkpoint_weights_filename, interval=5000)]
    callbacks += [FileLogger(log_filename, interval=100)]

    agent.fit(env, callbacks=callbacks, nb_steps=NB_STEPS,
              visualize=vis, verbose=1, log_interval=1000)
    agent.save_weights(weights_filename, overwrite=True)
    return agent

def continue_train_agent(env, filename, vis=False):
    agent = load_agent(env,filename)
    agent = train_agent(env, vis=vis, agent=agent)
    return agent

# Play

In [5]:
def evaluate_agent(env, agent, eps=20, vis=False):
    scores = agent.test(env, nb_episodes=eps, visualize=vis)
    print(np.mean(scores.history['episode_reward']))
    return scores

def record_video(eps=50):
    rank_df = pd.read_csv("ranking_path.csv")
    recorded_df = pd.read_csv("recorded.csv")
    already = list(recorded_df['path'])

    add_to_file = True

    file = None

    for item in rank_df['path']:
        env = gym.make(ENV_NAME)
        if item in already:
            continue
        try:
            agent = load_agent(env, item)
            scores = evaluate_agent(env, agent, eps=eps, vis=True)
        except KeyboardInterrupt:
            print("Stopped")
        except Exception as e:
            print(e)
            add_to_file = False

        print("File: ",item)
        path = []
        done = []
        path.append(item)
        done.append(True)
        new_recorded_df = pd.DataFrame({'path': path, 'done': done})
        if add_to_file:
            new_recorded_df.to_csv("recorded.csv", index=False, mode='a', header=False)
        env.close()
        break


# Tests

In [6]:
def run_visual_tests(env, eps=10):
    rank_df = pd.read_csv("ranking_path.csv")
    note_df = pd.read_csv("notes.csv")
    already = list(note_df['path'])

    path = []
    note = []

    for item in rank_df['path']:
        if item in already:
            continue
        print("File: ",item)
        try:
            agent = load_agent(env, item)
            scores = evaluate_agent(env, agent, eps=eps, vis=True)
        except KeyboardInterrupt:
            print("Stopped")
            path.append(item)
            note.append(input("Enter notes: "))
        except Exception as e:
            print(e)
            new_note_df = pd.DataFrame({'path': path, 'note': note})
            new_note_df.to_csv("notes.csv", index=False, mode='a', header=False)
            return

        if input('Do You Want To Continue? ') != 'y':
            break

    new_note_df = pd.DataFrame({'path': path, 'note': note})
    new_note_df.to_csv("notes.csv", index=False, mode='a', header=False)

def run_tests(env, eps=20, vis=False):
    models = [x[:-6] for x in glob.glob("./agents/*.h5f.index")]

    for filename in models:
        print(filename)
        agent = load_agent(env, filename)
        scores = evaluate_agent(env, agent, eps=eps, vis=vis)
        df = pd.DataFrame(scores.history)
        df["model"] = filename
        if exists("results.csv"):
            df.to_csv("results.csv", index=False, mode='a', header=False)
        else:
            df.to_csv("results.csv", index=False)

def run_tests_new(env, eps=30, vis=False):
    #models = [x[:-6] for x in glob.glob('./agents/final/*.h5f.index')]
    #names = [x[15:-4] for x in models]

    already = []

    if exists("results.csv"):
        results = pd.read_csv("results.csv")
        already =  results['model'].unique()

    # models = [x[:-6] for x in glob.glob('./agents/Done/100k_greedy/*.h5f.index')][::-1]
    # names = [x[x.rindex('/',)+1:-4] for x in models]

    models = [x[:-6] for x in glob.glob('./agents/Done/**/*.h5f.index', recursive=True)]
    names = [x[x.index('Done')+5:-4] for x in models]

    for i in range(len(models)):
        if names[i] in already:
            continue
        agent = load_agent(env, models[i])
        print(names[i])
        scores = evaluate_agent(env, agent, eps=eps, vis=vis)
        df = pd.DataFrame(scores.history)
        df["model"] = names[i]
        if exists("results.csv"):
            df.to_csv("results.csv", index=False, mode='a', header=False)
        else:
            df.to_csv("results.csv", index=False)


# Visualize

In [7]:
def combine_logs():
    if not exists("training.csv"):
        jsons = glob.glob("./agents/final/*.json")

        results = []
        for filename in jsons:
            with open(filename, 'r') as f:
                data = json.load(f)
                df = pd.DataFrame(data)
                df["model"] = filename
                results.append(df)
        results_df = pd.concat(results, ignore_index=True)
        results_df.to_csv("training.csv")
    else:
        raise FileExistsError


def visualize_log(filename, figsize=None, output=None):
    with open(filename, 'r') as f:
        data = json.load(f)
    if 'episode' not in data:
        raise ValueError(
            f'Log file "{filename}" does not contain the "episode" key.')
    episodes = data['episode']

    # Get value keys. The x axis is shared and is the number of episodes.
    keys = sorted(list(set(data.keys()).difference({'episode'})))

    if figsize is None:
        figsize = (15., 5. * len(keys))
    f, axarr = plt.subplots(len(keys), sharex=True, figsize=figsize)
    for idx, key in enumerate(keys):
        axarr[idx].plot(episodes, data[key])
        axarr[idx].set_ylabel(key)
    plt.xlabel('episodes')
    plt.tight_layout()
    if output is None:
        plt.show()
    else:
        plt.savefig(output)

# Main

In [9]:
run_main = ""
env = env_creator()

In [14]:
env.observation_spaces

{'adversary_0': Box(-inf, inf, (34,), float32),
 'adversary_1': Box(-inf, inf, (34,), float32),
 'adversary_2': Box(-inf, inf, (34,), float32),
 'agent_0': Box(-inf, inf, (34,), float32)}

In [32]:
type(env.action_spaces['agent_0'])

gymnasium.spaces.discrete.Discrete

In [None]:
#height, width, channels = env.observation_space.shape
#actions = env.action_space.n

In [None]:
if run_main == "train":
        agent = train_agent(env, vis=args.vis)
    elif run_main == "continue":
        agent= continue_train_agent(env, args.file, vis=args.vis)
    elif run_main == "eval":
        agent = load_agent(env, args.file)
        # agent = load_agent(env,'./agents/dqn_Qbert-v0_weights_1000.h5f')
        evaluate_agent(env, agent, vis=args.vis)
    elif run_main == "plot":
        visualize_log(args.file)
    elif run_main == "test":
        run_tests(env, eps=1, vis=args.vis)
    elif run_main == "testnew":
        run_tests_new(env, eps=30, vis=args.vis)
    elif run_main == "vtest":
        run_visual_tests(env)
    elif run_main == "record":
        record_video()
    elif run_main == "combine":
        combine_logs()
    else:
        print("Incorrect args")

In [None]:
env