In [None]:
import os
import numpy as np
from ray import tune
import torch

In [None]:
experiment_path = os.path.join("ray_tuner/run1_0")
print(f"Loading results from {experiment_path}...")

def run(config):  
    '''
    Training function. Gets dictionary of tunable hyperparameters as an input.
    '''
    train_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #device = "cpu"
    print(f"Device: {train_device}")
    play_device = "cpu" # This is much faster on cpu, even if training with GPU, as it minimizes overhead

    # Tunable hyperparameters
    GAMMA=0.9
    LAMBDA_=0.9
    hidden_size = 80
    lr_actor = config["alr"]
    lr_critic = config["clr"]
    n_rollouts = 3
    n_batches = 10
    n_updates_per_iter = 10
    emb_size = 60
    EPS_N = 2_000_000
    EPS_N_normed = int(EPS_N/n_rollouts/n_batches)
    scheduler_lmbd= config["sched_lmbd"]
    eps_clip = 0.13
    entropy_scale = 7.5e-3
    entropy_steepness = config["e_st"] 

    id_string = ""
    
    for key, value in config.items():
        try:
            sci_value = "{:.1e}".format(float(value))
            id_string += f"{key}{sci_value}_"
        except (ValueError, TypeError):
            # If value can't be converted to float, use original value
            id_string += f"{key}{value}_"
    # Remove the trailing comma and space if the string isn't empty
    id_string = id_string[:-2]

    identifier="V1-0_"+id_string
    
    # Load training net
    round_n=5
    card_n=5
    bet_n=3
    player_n=2
    n_inputs = card_n + player_n*(1+round_n) + player_n 
    n_outputs = 52+bet_n
    Actor = model.Actor(n_inputs,hidden_size,n_outputs,emb_size).to(train_device)
    Critic = model.Critic(n_inputs,hidden_size,emb_size).to(train_device)

    # Load enemy
    enemy_net = model.Actor(n_inputs,hidden_size,n_outputs,emb_size)
    load_dict = torch.load(os.path.join(script_dir,r"model/trained_actor_V1-0.pt"))
    enemy_net.load_state_dict(load_dict["model_state"])
    enemy = AI("XXX_SLAYAH_XXX",enemy_net,play_device)

    # Set optimizers
    actor_optim = optim.Adam(Actor.parameters(),lr=lr_actor,foreach=True)
    critic_optim = optim.Adam(Critic.parameters(),lr=lr_critic,foreach=True)
    actor_scheduler = lr_scheduler.StepLR(actor_optim,step_size=max(1,EPS_N_normed//40),gamma=scheduler_lmbd)
    critic_scheduler = lr_scheduler.StepLR(critic_optim,step_size=max(1,EPS_N_normed//40),gamma=scheduler_lmbd)

    ME = Agent(identifier,EPS_N_normed,Actor,Critic,play_device,train_device,actor_optim,critic_optim,actor_scheduler,critic_scheduler,GAMMA,LAMBDA_,eps_clip,enemy,n_rollouts,n_batches,n_updates_per_iter,entropy_scale,entropy_steepness)
    policy_net = ME.learn()

    [print(f"{key}: {np.array(value).mean()}") for key,value in times.items()]

restored_tuner = tune.Tuner.restore(experiment_path,trainable=run)
result_grid = restored_tuner.get_results()
#result_grid.get_dataframe()

In [None]:
ax = None
print(f"Results: {len(result_grid)}")
for result in result_grid:
    if result.config is not None:
        config = result.config
        winrate = result.metrics_dataframe.winrate.max()
        id_string = ""
    
        for key, value in config.items():
            try:
                sci_value = "{:.1e}".format(float(value))
                id_string += f"{key}{sci_value}_"
            except (ValueError, TypeError):
                # If value can't be converted to float, use original value
                id_string += f"{key}{value}_"
        # Remove the trailing comma and space if the string isn't empty
        id_string = id_string[:-1]

        label=id_string
        if winrate > 0.0:
            if ax is None:
                ax = result.metrics_dataframe.plot("training_iteration", "winrate", label=label)
            else:
                result.metrics_dataframe.plot("training_iteration", "winrate", ax=ax, label=label)
ax.set_title("Winrate vs. Training Iteration for All Trials")
ax.set_ylabel("Winrate")
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

In [None]:
from Tikki import Tikki
from player import player,AI, random_win_player, random_player
import model_relu_deep_skip as model
import os
import numpy as np
import copy
import torch

# Example usage
n_players = 2
round_n = 5
card_n = 5
n_bets = 3
n_inputs = card_n + n_players*(1+round_n) + n_players
n_outputs = 52+3

# LOAD ME
Actor = model.Actor(n_inputs,80,n_outputs,60)
model_folder_path = ""
file_name = os.path.join(model_folder_path)
load_dict = torch.load(file_name)
Actor.load_state_dict(load_dict["model_state"])

# LOAD ENEMY
enemy_net = model.Actor(n_inputs,80, n_outputs,emb_size=60)
model_folder_path = ""
file_name = os.path.join(model_folder_path)
thing = torch.load(file_name)
enemy_net.load_state_dict(thing["model_state"])

N_games = 100
winners = []
me = AI("tappaja",Actor,"cpu")
enemy = random_player("xslayx")

players = [enemy,me]

passes = []
def run_agent_simulation():
    for i in range(N_games):
        game = Tikki(players)
        game.new_game()
        
        while 1:
            action, state, reward, done,_ = game.step(me)
            #print(action)
            if action in torch.tensor([52,53,54]):
                passes.append(action)
                    
            if done:
                    winners.append(game.game_winner == me)
                    #print(game.game_winner)
                    break

run_agent_simulation()

wins = copy.deepcopy(winners)
wins = np.array(wins)
wins_mean = wins.mean()
print(wins_mean)
print(f"Pass: {100*(np.array(passes) == 52).sum()/len(passes):.2f}%, Win: {100*(np.array(passes) == 53).sum()/len(passes):.2f}%, 2-Win; {100*(np.array(passes) == 54).sum()/len(passes):.2f}%")