# AI that cannot win Connect-Four

In [None]:
# Install Connect-Four environment and training module.
!git clone https://github.com/nicolas-chan-42/COMP-3359-Project-Deep-Q-RL.git
!pip install -e ./COMP-3359-Project-Deep-Q-RL
%cd ./COMP-3359-Project-Deep-Q-RL

## Import modules

In [None]:
import os

import gym
import numpy as np

# Must be put before any tensorflow import statement.
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from losing_connect_four.deep_q_networks import PlaceholderSgdDqn, Simple512SgdDqn
from losing_connect_four.player import RandomPlayer, DeepQPlayer, Player
from losing_connect_four.training import (
    train_one_episode, pretrain_v2, load_model_to_players,
    Record, plot_records, create_plot_list, save_model_from_player,
)

---
## Hyper-parameters

In [None]:
"""Hyper-parameters"""
PARAMS = {
    "ENV_NAME": "ConnectFour-v1",
    "LR": 0.001,
    "REPLAY_BUFFER_MAX_LENGTH": 100_000,
    "BATCH_SIZE": 32,
    "EPS_START": 1,
    "EPS_END": 0.01,
    "EPS_DECAY_STEPS": 10000,
    "GAMMA": 0.95,
    "N_EPISODES": 1000,
    "EPOCHS_PER_LEARNING": 2,
    "EPOCHS_PER_PRETRAIN_LEARNING": 2,
    "N_STEPS_PER_TARGET_UPDATE": 1000,
    "PRETRAIN": True,
    "PRETRAIN_UTILISATION_RATE": 0.95,
}

## Training Configurations

In [None]:
"""Configurations"""
CONFIG = {
    # Please use "/" only for filepath and directory paths.
    # Use None as placeholder.
    "MODEL_DIR": "saved_models",  # Input directory path here.
    "LOAD_MODEL": [None, None],  # Input filename here.
    "SAVE_MODEL": None,  # Input filename here
    "N_EPISODE_PER_PRINT": 100,
    "SAVE_PLOT": "saved_plots",  # Input directory path here.
}

---
## Setup
### Gym Connect-Four Environment

In [None]:
"""Set-up Environment"""
print("\rMaking Connect-Four Gym Environment...", end="")
env = gym.make(PARAMS["ENV_NAME"])
print("\rConnect-Four Gym Environment Made")

### Players

In [None]:
"""Setup Players"""
player1: Player = DeepQPlayer(env, PARAMS, Simple512SgdDqn(momentum=0))
player2: Player = RandomPlayer(env, seed=3407)
players = {1: player1, 2: player2,
           "trainee_id": 1}

---
## Model Preparations Before Training
### Pre-train model

In [None]:
"""Pre-train Player"""
if PARAMS.get("PRETRAIN"):
    # noinspection PyTypeChecker
    pretrain_v2(env, PARAMS, players[players["trainee_id"]])

### Load trained model

In [None]:
"""Load the saved player if requested"""
load_model_to_players(CONFIG, PARAMS, players)

---
## Prepare record storage

In [None]:
"""Logging"""
total_step = 0

# Reward and Losses recording.
total_reward = 0
total_losses = 0
reward_records = Record(PARAMS, CONFIG, name="Rewards", dtype=np.int32)
loss_records = Record(PARAMS, CONFIG, name="Losses", dtype=np.int32)

---
## Training

In [None]:
"""Main training loop"""
print(f"Training through {PARAMS['N_EPISODES']} episodes")
print("-" * 30)

for episode in range(PARAMS["N_EPISODES"]):
    print(f"\rIn episode {episode + 1}", end="")

    # Train 1 episode.
    episode_reward, total_step = train_one_episode(
        env, PARAMS, players, total_step)

    # Collect results from the one episode.
    episode_loss = int(episode_reward > 0)  # Count losses only.

    # Log the episode reward.
    reward_records.add_record(episode, record=episode_reward)
    total_reward += episode_reward

    # Log the episode loss.
    loss_records.add_record(episode, record=episode_loss)
    total_losses += episode_loss

    # Periodically print episode information.
    if (episode + 1) % CONFIG["N_EPISODE_PER_PRINT"] == 0:
        print(f"\rEpisode: {episode + 1}")
        print(f"Total Steps: {total_step}")
        print("-" * 25)
        # Reward.
        reward_records.print_info(episode)
        print("-" * 25)
        # Losses.
        loss_records.print_info(episode)
        print("=" * 25)

### Print Training statistics

In [None]:
# Print training information.
print("\rIn the end of training,")
print(f"Total Steps: {total_step}")
print(f"Total Reward: {total_reward}")
print(f"Average Reward: {total_reward / PARAMS['N_EPISODES']}")
print(f"Total Number of Losses: {total_losses}")
print(f"Average Number of Losses: {total_losses / PARAMS['N_EPISODES']}")
print("=" * 30)

### Visualisations

In [None]:
"""Visualize the training results"""
plot_list = create_plot_list([reward_records, loss_records])
plot_records(CONFIG, plot_list)

---
## Save model & summary

In [None]:
"""Save Trained Models and Summary"""
save_model_from_player(CONFIG, PARAMS, players)

---
---

## Evaluation

In [None]:
if CONFIG.get("SAVE_MODEL") is None:
    raise SystemExit("No model was saved for training!")

---
## Setup
### Setting changes

In [None]:
PARAMS["N_EPISODES"] = 1000
CONFIG["LOAD_MODEL"] = [CONFIG["SAVE_MODEL"], None]

### Players

In [None]:
player1: Player = DeepQPlayer(env, PARAMS, PlaceholderSgdDqn(momentum=0),
                              is_eval=True)
player2: Player = RandomPlayer(env, seed=2119)
players = {1: player1, 2: player2,
           "trainee_id": 1}

---
## Model Preparations Before Evaluation
### Load Trained Model

In [None]:
"""Load the saved player if requested"""
load_model_to_players(CONFIG, PARAMS, players)

---
### Prepare Record Storage

In [None]:
"""Logging"""
total_step = 0

# Reward and Losses recording.
total_reward = 0
total_losses = 0
eval_reward_records = Record(PARAMS, CONFIG,
                             name="Evaluation Rewards", dtype=np.int32)
eval_loss_records = Record(PARAMS, CONFIG,
                           name="Evaluation Losses", dtype=np.int32)

---
## Evaluation

In [None]:
"""Main evaluation loop"""
print(f"Evaluating through {PARAMS['N_EPISODES']} episodes")
print("-" * 30)

for episode in range(PARAMS["N_EPISODES"]):
    print(f"\rIn evaluation episode {episode + 1}", end="")

    # Train 1 episode.
    episode_reward, total_step = train_one_episode(
        env, PARAMS, players, total_step)

    # Collect results from the one episode.
    episode_loss = int(episode_reward > 0)  # Count losses only.

    # Log the episode reward.
    eval_reward_records.add_record(episode, record=episode_reward)
    total_reward += episode_reward

    # Log the episode loss.
    eval_loss_records.add_record(episode, record=episode_loss)
    total_losses += episode_loss

    # Periodically print episode information.
    if (episode + 1) % CONFIG["N_EPISODE_PER_PRINT"] == 0:
        print(f"\rEvaluation Episode: {episode + 1}")
        print(f"Total Steps: {total_step}")
        print("-" * 25)
        # Reward.
        eval_reward_records.print_info(episode)
        print("-" * 25)
        # Losses.
        eval_loss_records.print_info(episode)
        print("=" * 25)

### Print Training statistics

In [None]:
# Print evaluation information.
print("\rIn the end of evaluation,")
print(f"Total Steps: {total_step}")
print(f"Total Reward: {total_reward}")
print(f"Average Reward: {total_reward / PARAMS['N_EPISODES']}")
print(f"Total Number of Losses: {total_losses}")
print(f"Average Number of Losses: {total_losses / PARAMS['N_EPISODES']}")
print("=" * 30)

### Visualisations

In [None]:
"""Visualize the training results"""
plot_list = create_plot_list([eval_reward_records, eval_loss_records])
plot_records(CONFIG, plot_list)

---
---
## Colab Data (model & plots) downloads

In [None]:
"""Download files"""
# model_name = CONFIG["SAVE_MODEL"]
# from google.colab import files
# files.download(f'saved_models/{model_name}.txt')
# files.download(f'saved_models/{model_name}.h5')
# files.download(f'saved_models/{model_name}.json')
# !zip -r "saved_plots/z.zip" "saved_plots"
# files.download("saved_plots/z.zip")