This notebook uses the q_learning_lib.py to perform Q-learning on tic tac toe.

A variation of different hyperparameters are tried, against many different cases.

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import q_learning_lib as qlb
import json
import os

Assuming alpha = 0.1 and tau = e, lets record what happens in each of the 4 cases of training (optimal, non optimal, x, o)

In [None]:
NUMBER_OF_BATCHES = 10000
BATCH_SIZE = 20

In [None]:
def make_alpha(rate_val):
    """Function factorie for creating different alpha functions"""
    def alpha(turn):
        return rate_val * (BATCH_SIZE * NUMBER_OF_BATCHES - turn) / (NUMBER_OF_BATCHES * BATCH_SIZE)

    return alpha

def make_tau(rate_val):
    """Function factorie for creating different tau functions"""
    def tau(turn):
        return 1 + rate_val * (BATCH_SIZE * NUMBER_OF_BATCHES - turn) / (NUMBER_OF_BATCHES * BATCH_SIZE) 
    return tau

def record_params(results, filename):

    df = pd.DataFrame(np.flip(results, axis=1), columns=['o_win', 'draw', 'x_win'])

    # Save to CSV (index=False prevents it from adding a row-number column)
    df.to_csv(filename, index=False)

In [None]:
alpha_rates = np.linspace(0.5, 0.01, 10)
tau_values = np.linspace(1, 2000, 10)

In [103]:
def try_different_hyperparams(alpha_rates, tau_values, player, strategy):
    """this functions tries a variation of different hyperparameters
    results from every single iteration are saved in a .csv file
    how alpha params, tau values affect the final winrates is recorded in a pandas df and also saved to a file """
 
    df_list = [] #will be turned to a df later
    
    for alpha_rate in alpha_rates: 
        for tau_value in tau_values:

            #trains
            Q_Table_x_optimal, results = qlb.perform_training(player, strategy, NUMBER_OF_BATCHES, BATCH_SIZE, False, make_alpha(alpha_rate), make_tau(tau_value))

            #records the results
            filename = f"parameter_results/results_{player}_{strategy}_opponent_alpha_{alpha_rate}_tau_{tau_value}.csv"
            record_params(results, filename)


            #records end winrates vs hyperparams
            end_winrate = np.mean(results[:, 1][-5:])
            new_results = {'alpha_rate': alpha_rate, 'tau_rate': tau_value, 'final_winrate': end_winrate}
            df_list.append(new_results)
            print(f"alpha = {alpha_rate}, tau = {tau_value}, end winrates = {end_winrate}")

    df = pd.DataFrame(df_list) 
    df.to_csv("parameter_results/hyperparams.csv", index=False)

In [104]:
try_different_hyperparams(alpha_rates, tau_values, 'x', 'perfect')

ValueError: Shape of passed values is (0, 1), indices imply (0, 3)