In [None]:
import torch
import os
import time
import numpy as np
import rlcard
from rlcard import models
from rlcard.agents.human_agents.uno_human_agent import HumanAgent, _print_action
from rlcard.games.uno.card import UnoCard
import tensorflow as tf
from rlcard.agents import DQNAgent
from rlcard.utils import (
    get_device,
    set_seed,
    tournament,
    reorganize,
    Logger,
    plot_curve,
    print_card
)

In [None]:
#Include in step function of agent to run with output
def print_state(state, action_record):
    

    ''' Print out the state of a given player

    Args:
        player (int): Player id
    '''
    _action_list = []
    for i in range(1, len(action_record)+1):
        if action_record[-i][0] == state['current_player']:
            break
        _action_list.insert(0, action_record[-i])
    for pair in _action_list:
        print('>> Player', pair[0]+1, 'chooses ', end='')
        _print_action(pair[1])
        print('')

    curr_player = state['current_player'] + 1
    print(f'\n=============== Player {curr_player} ===============')
    UnoCard.print_cards(state['hand'])
    print('')
    print('=============== Table Card ===============')
    UnoCard.print_cards(state['target'], wild_color=True)
    print('')
    print('========== Players Card Number ===========')
    for i in range(state['num_players']):
        if i != state['current_player']:
            print('Player {} has {} cards.'.format(i+1, state['num_cards'][i]))
    print('======== Actions You Can Choose =========')
    for i, action in enumerate(state['legal_actions']):
        print(str(i)+': ', end='')
        UnoCard.print_cards(action, wild_color=True)
        if i < len(state['legal_actions']) - 1:
            print(', ', end='')
    print('\n')

def print_action(action):
    ''' Print out an action in a nice form

    Args:
        action (str): A string a action
    '''
    UnoCard.print_cards(action, wild_color=True)

In [None]:
def train(opponent = "Random"):
    device = get_device()
    set_seed(42)
    env = rlcard.make(
            'uno',
            config={
                'seed': 42,
            }
        )
    agent = DQNAgent(
            num_actions=env.num_actions,
            state_shape=env.state_shape[0],
            mlp_layers=[64,64],
            device=device,
        )
    agents = [agent]
    if opponent == "Random":
        agents.append(RandomAgent(env.num_actions))
    else:
        agents.append(opponent)
    env.set_agents(agents)
    with Logger('uno/dqn_results') as logger:
        for episode,i in enumerate(range(500)):  # number of episodes
            #print(f"Episode: {i}")
            trajectories, payoffs = env.run(is_training=True)

            trajectories = reorganize(trajectories, payoffs)

            # Feed transitions into agent memory, and train the agent
            # Assume that agent always plays the first position
            for ts in trajectories[0]:
                agent.feed(ts)

            # Evaluate the performance every 10.
            if episode % 10 == 0:
                logger.log_performance(
                    episode,
                    tournament(
                        env,
                        200, #Number of games to run during eval
                    )[0]
                )

        # Get the paths
        csv_path, fig_path = logger.csv_path, logger.fig_path

    # Plot the learning curve
    plot_curve(csv_path, fig_path, 'dqn')

    # Save model
    if opponent == "Random":
        save_path = os.path.join('uno/dqn_results', f'vs_Random_model.pth')
    else:
        save_path = os.path.join('uno/dqn_results', 'vs_pretrained_model.pth')
    torch.save(agent, save_path)
    print('Model saved in', save_path)

In [None]:
def run_game(agents, runs=1, supress_state_print=False, train_dqn=False):
    env = rlcard.make("uno")
    #Creating agents from list
    agent_list = []
    for agent in agents:
        if agent == "human":
            agent_list.append(HumanAgent(env.num_actions))
        elif agent == "random":
            agent_list.append(RandomAgent(env.num_actions))
        elif agent == "custom_agent":
            agent_list.append(UnoAgent(env.num_actions))
        elif agent == "custom_agent_color":
            agent_list.append(UnoAgentColorMatching(env.num_actions))
        elif agent == "custom_agent_number":
            agent_list.append(UnoAgentNumberMatching(env.num_actions))
        elif agent == "custom_agent_special":
            agent_list.append(UnoAgentSpecialCard(env.num_actions))
        elif agent == "dqn" and train_dqn:
            train()
            device = get_device()
            dqn_agent = torch.load('uno/dqn_results/vs_Random_model.pth', map_location=device)
            dqn_agent.set_device(device)
            agent_list.append(dqn_agent)
    env.set_agents(agent_list)
    
    #start loop here
    win_record = []
    for x in range(runs):
        trajectories = [[] for _ in range(env.num_players)]
        state, player_id = env.reset()

        # Loop to play the game
        trajectories[player_id].append(state)
        while not env.is_over():
            if not supress_state_print:
                print_state(state['raw_obs'],state['action_record'])
                time.sleep(1)
            action, _ = env.agents[player_id].eval_step(state)
            # Environment steps
            next_state, next_player_id = env.step(action, env.agents[player_id].use_raw)
            # Set the state and player
            state = next_state
            player_id = next_player_id

        # Payoffs
        payoffs = env.get_payoffs()
        if not supress_state_print:
            print('===============     Result     ===============')
            if payoffs[0] > 0:
                print('Player 1 Wins')
                win_record.append(1)
            else:
                print('Player 2 Wins')
                win_record.append(2)
            print('')
        else:
            if payoffs[0] > 0:
                win_record.append(1)
            else:
                win_record.append(2)
        
    print(f'Game history: {win_record}')
    print(f'Player 1 wins: {win_record.count(1)}')
    print(f'Player 2 wins: {win_record.count(2)}')
    print(f'Win % (p1:p2): {int(win_record.count(1)/len(win_record)*100)}:{int(win_record.count(2)/len(win_record)*100)}')
    

In [None]:
class RandomAgent(object):
    ''' A random agent. Random agents is for running toy examples on the card games
    '''

    def __init__(self, num_actions):
        ''' Initilize the random agent

        Args:
            num_actions (int): The size of the ouput action space
        '''
        self.use_raw = False
        self.num_actions = num_actions

    @staticmethod
    def step(state):
        ''' Predict the action given the curent state in gerenerating training data.

        Args:
            state (dict): An dictionary that represents the current state

        Returns:
            action (int): The action predicted (randomly chosen) by the random agent
        '''
        return np.random.choice(list(state['legal_actions'].keys()))

    def eval_step(self, state):
        ''' Predict the action given the current state for evaluation.
            Since the random agents are not trained. This function is equivalent to step function

        Args:
            state (dict): An dictionary that represents the current state

        Returns:
            action (int): The action predicted (randomly chosen) by the random agent
            probs (list): The list of action probabilities
        '''
        probs = [0 for _ in range(self.num_actions)]
        for i in state['legal_actions']:
            probs[i] = 1/len(state['legal_actions'])

        info = {}
        info['probs'] = {state['raw_legal_actions'][i]: probs[list(state['legal_actions'].keys())[i]] for i in range(len(state['legal_actions']))}

        return self.step(state), info