# AI in Games, _Reinforcement Learning_<br>Assignment 2, Main (i.e. running all the functions)

## Preparing the context
The following are the necessary preparations and imports needed to run and test the main code of this document in the intended context. Mounting directory & setting present working directory...

In [5]:
# Mounting the Google Drive folder (run if necessary):
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)
# Saving the present working directory's path:
# NOTE: Change `pwd` based on your own Google Drive organisation
pwd = "./drive/MyDrive/ColabNotebooks/AIG-Labs/AIG-Assignment2/"

Mounted at /content/drive/


To install module `import_ipynb` to enable importing Jupyter Notebooks as modules...

`!pip install import_ipynb`

Importing the code in notebook `Q1_environment.ipynb`...




In [6]:
import import_ipynb
N = import_ipynb.NotebookLoader(path=[pwd])
N.load_module("Q1_environment")
N.load_module("Q2_tabularModelBasedMethods")
N.load_module("Q3_tabularModelFreeMethods")
N.load_module("Q4_nonTabularModelFreeMethods")
N.load_module("Q5_deepReinforcementLearning")
from Q1_environment import *
from Q2_tabularModelBasedMethods import *
from Q3_tabularModelFreeMethods import *
from Q4_nonTabularModelFreeMethods import *
from Q5_deepReinforcementLearning import *

importing Jupyter notebook from ./drive/MyDrive/ColabNotebooks/AIG-Labs/AIG-Assignment2/Q1_environment.ipynb
importing Jupyter notebook from ./drive/MyDrive/ColabNotebooks/AIG-Labs/AIG-Assignment2/Q2_tabularModelBasedMethods.ipynb
importing Jupyter notebook from ./drive/MyDrive/ColabNotebooks/AIG-Labs/AIG-Assignment2/Q3_tabularModelFreeMethods.ipynb
importing Jupyter notebook from ./drive/MyDrive/ColabNotebooks/AIG-Labs/AIG-Assignment2/Q4_nonTabularModelFreeMethods.ipynb
importing Jupyter notebook from ./drive/MyDrive/ColabNotebooks/AIG-Labs/AIG-Assignment2/Q5_deepReinforcementLearning.ipynb


Other necessary imports...

In [7]:
import numpy as np

## Main function

In [22]:
def main():
    # Preliminary definitions:
    seed = 0
    lake = LAKE['small']
    env = FrozenLake(lake, slip=0.1, max_steps=16, seed=seed)
    gamma = 0.9
    max_episodes = 4000
    H1 = '\n================================================\n'
    H2 = '------------------------------------\n'

    #================================================

    print(f'{H1}Model-based algorithms')
    '''
    ARGUMENTS:
    - Environment
    - Discount factor
    - Error margin for convergence of value function
    - Maximum iterations
    '''

    print(f'{H2}Policy iteration')
    args = [env, gamma, 0.001, 128]

    policy, value, i = policy_iteration(*args)
    env.render(policy, value)

    print(f'{H2}Value iteration')
    policy, value, i = value_iteration(*args)
    env.render(policy, value)

    #================================================

    print(f'{H1}Tabular model-free algorithms')
    '''
    ARGUMENTS:
    - Environment
    - Maximum episodes to iterate over (1 episode ==> play until absorbed)
    - Initial learning rate
    - Discount factor
    - Exploration factor (epsilon)
    - Pseudorandom number generator seed
    '''
    args = [env, max_episodes, 0.5, gamma, 1.0, seed]

    print(f'{H2}SARSA')
    env.resetRandomState()
    policy, value = sarsa(*args)
    env.render(policy, value)

    print(f'{H2}Q-learning')
    env.resetRandomState()
    policy, value = q_learning(*args)
    env.render(policy, value)

    #================================================

    print(f'{H1}Non-tabular model-free algorithms')
    # NOTE: Except for environment, all arguments are the same as before
    args[0] = LinearWrapper(env)

    print(f'{H2}Linear SARSA')
    env.resetRandomState()
    parameters = linear_sarsa(*args)
    policy, value = linear_env.decode_policy(parameters)
    args[0].render(policy, value)

    print(f'{H2}Linear Q-learning')
    env.resetRandomState()
    params = linear_q_learning(*args)
    policy, value = linear_env.decode_policy(parameters)
    args[0].render(policy, value)

    #================================================

    print(f'{H1}Deep Q-network learning')
    # ARGUMENTS:
    args = [FrozenLakeImageWrapper(env), # Wrapped environment
            max_episodes,                # Maximum episodes
            0.001,                       # Learning rate
            gamma,                       # Discount factor
            0.2,                         # Exploration factor (epsilon)
            32,                          # Batch size (random sample size)
            4,                           # Target update frequency
            256,                         # Replay buffer size
            3,                           # Kernel size
            4,                           # Convolution layer output channels
            8,                           # Fully-connected layer output features
            4]                           # Pseudorandom number generator seed

    env.resetRandomState()
    dqn = deep_q_network_learning(*args)
    policy, value = args[0].decode_policy(dqn)
    args[0].render(policy, value)

# Run function of the current file is the file being executed:
if __name__ == '__main__': main()


Model-based algorithms
------------------------------------
Policy iteration
Lake:
[['&' '.' '.' '.']
 ['.' '#' '.' '#']
 ['.' '.' '.' '#']
 ['#' '.' '.' '$']]
Policy:
[['_' '>' '_' '<']
 ['_' '^' '_' '^']
 ['>' '>' '_' '^']
 ['^' '>' '>' '^']]
Value:
[[0.403 0.469 0.552 0.480]
 [0.472 0.000 0.637 0.000]
 [0.555 0.654 0.751 0.000]
 [0.000 0.737 0.867 1.000]]
------------------------------------
Value iteration
Lake:
[['&' '.' '.' '.']
 ['.' '#' '.' '#']
 ['.' '.' '.' '#']
 ['#' '.' '.' '$']]
Policy:
[['_' '>' '_' '<']
 ['_' '^' '_' '^']
 ['>' '_' '_' '^']
 ['^' '>' '>' '^']]
Value:
[[0.455 0.504 0.579 0.505]
 [0.508 0.000 0.653 0.000]
 [0.584 0.672 0.768 0.000]
 [0.000 0.771 0.887 1.000]]

Tabular model-free algorithms
------------------------------------
SARSA
Lake:
[['&' '.' '.' '.']
 ['.' '#' '.' '#']
 ['.' '.' '.' '#']
 ['#' '.' '.' '$']]
Policy:
[['_' '>' '_' '<']
 ['_' '^' '_' '^']
 ['>' '_' '_' '^']
 ['^' '>' '>' '^']]
Value:
[[0.411 0.361 0.451 0.306]
 [0.463 0.000 0.559 0.000