# Training Colab
This notebook is "colab ready" so I can simply link the GitHub repository and run with their GPU to speed up my training. 

---
## Imports and Setup

In [None]:
# Load the rest of the repo 
! git clone https://github.com/kel89/Backgammon.git

In [None]:
# Impor the self made modules from the cloned repo
import sys
sys.path.append("./Backgammon")
from backgammon import *
from RandomPlayer import *
from PlayerANN import *

In [3]:
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

In [2]:
# Import standard packages
from tqdm.notebook import tqdm
import numpy as np
import tensorflow as tf

# Self packages for loca used
from backgammon import *
from RandomPlayer import *
from PlayerANN import *

---
## Game Setup and Helpers

In [None]:
def get_player_boards(obj):
    """
    Takes in the game output dictionary
    and returns (in another dictionary), all the boards 
    for each player, as well as a vector of wins and losses
    (1's and 0's) to accopmony the board arrays
    """
    # Extract needed info
    starter = obj['starting_player'].which
    winner = obj['winner'].which
    boards = np.array(obj['boards'])
    
    # Setup the mods 
    if (starter == "player 1"):
        p1_mod = 0
        p2_mod = 1
    else:
        p1_mod = 1
        p2_mod = 0
        
    # Parse the boards
    p1_inds = np.where(np.arange(len(boards)) % 2 == p1_mod)[0]
    p2_inds = np.where(np.arange(len(boards)) % 2 == p2_mod)[0]
    p1_boards = boards[p1_inds]
    p2_boards = boards[p2_inds]
    
    # Track the outcomes
    if winner == "player 1":
        p1_outcome = np.ones(p1_boards.shape[0])
        p2_outcome = np.zeros(p2_boards.shape[0])
    else:
        p1_outcome = np.zeros(p1_boards.shape[0])
        p2_outcome = np.ones(p2_boards.shape[0])
        
    return {
        "p1_boards" : p1_boards,
        "p2_boards" : p2_boards,
        "p1_outcome": p1_outcome,
        "p2_outcome": p2_outcome
    }

---
## Training

In [None]:
# Setup the parameters and trackers
N_games = 30
TRAIN_EVERY = 10
win_tracker = np.zeros(N_games)
turn_count_tracker = np.zeros(N_games)

In [4]:
# Initialize players
p1 = PlayerANN("player 1")
p2 = PlayerANN("player 2")

In [None]:
# Run and Train
cum_boards = []
cum_outcomes = []
cum_boards2 = []
cum_outcomes2 =[]
first = True
for i in tqdm(range(N_games)):
    # Play the game
    obj = play_game(p1, p2)
    
    # Get the boards and outcomes
    parsed = get_player_boards(obj)
    p1_boards = parsed['p1_boards']
    p2_boards = parsed['p2_boards']
    p1_outcome = parsed['p1_outcome']
    p2_outcome = parsed['p2_outcome']
    
    # Track the outcome
    win_tracker[i] = 1 if obj['winner'].which == "player 1" else 2
    turn_count_tracker[i] = obj['turns']
    
    # Add boards and outcomes to intermediate trackers
    cum_boards.append(p1_boards)
    cum_outcomes.append(p1_outcome)
    cum_boards2.append(p2_boards)
    cum_outcomes2.append(p2_outcome)
    
    # Train every 100 games
    if (i % TRAIN_EVERY == 0) and not first:
        # Stack the boards and outcomes
        stacked_boards = np.vstack(cum_boards)
        stacked_outcomes = np.concatenate(cum_outcomes)
        stacked_boards2 = np.vstack(cum_boards2)
        stacked_outcomes2 = np.concatenate(cum_outcomes2)
        
        # Update the model
        p1.update_model(stacked_boards, stacked_outcomes)
        p2.update_model(stacked_boards2, stacked_outcomes2)
        
        # Reset the cummulators
        cum_boards = []
        cum_outcomes = []
        cum_boards2 = []
        cum_outcomes2 = []
    
    first = False

---
## Save Models

In [None]:
# Define the names
p1_file_name = "p1_model_weights"
p2_file_name = "p2_model_weights"

# Save them
p1.save_model("Backgammon/saved_models/p2_model_weights")
p2.save_model("Backgammom/saved_models/p2_model_weights")