In [13]:
!pip install tqdm

Defaulting to user installation because normal site-packages is not writeable


In [24]:
import os
import argparse
import sys

import torch

import rlcard

from rlcard.agents import DQNAgent, RandomAgent
from rlcard.utils import (
    get_device,
    set_seed,
    tournament,
    reorganize,
    Logger,
    plot_curve,
)

import numpy as np
from tqdm import tqdm

In [15]:
# Check if CUDA is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("Using CPU")

Using GPU: NVIDIA GeForce RTX 2080 Ti


In [28]:
def block():
    sys.stdout = open(os.devnull, 'w')
    
def unblock():
    sys.stdout = sys.__stdout__
    
# block()
# print("HI")
# unblock()
    

In [16]:
# Initialize the Uno environment
env = rlcard.make('uno')

In [17]:
# The paths for saving the logs and learning curves
log_dir = './experiments/uno_dqn_result/'

In [18]:
agent = DQNAgent(
                 num_actions=env.num_actions,
                 state_shape=env.state_shape[0],
                 mlp_layers=[128,128], #changed to 128
                 replay_memory_size=5000,
                 update_target_estimator_every=100,
                 epsilon_decay_steps=10000,
                 learning_rate=0.0005,
                 batch_size=32,
                 device=device,
                 save_path=log_dir,
                 )

In [19]:
# Set the number of players
num_players = 2

# Set the players in the environment
env.set_agents([agent] + [RandomAgent(env.num_actions) for _ in range(num_players - 1)])

# Reset the environment
state = env.reset()

## Training the Agent
To train the agent, you will need to create a training loop. During each iteration of the loop, the agent makes a decision, the environment is updated, and the agent learns from the result.

In [20]:
# Reward function for static moves on action cards, playing and drawing cards
def adjust_rewards(trajectories, payoffs):
    adjusted_trajectories = []
    for traj in trajectories:
        adjusted_traj = []
        for state, action, reward, next_state, done in traj:
            if action == 60:  # Draw a card
                reward -= 1  # Penalty for drawing a card

            elif action >= 0 and action <= 9:  # Red number cards
                reward += 1
            elif action >= 10 and action <= 12:  # Red action cards
                reward += 3
            elif action == 13:  # Red wild card
                reward += 6
            elif action == 14:  # Red wild and draw 4 card
                reward += 10

            elif action >= 15 and action <= 24:  # Green number cards
                reward += 1
            elif action >= 25 and action <= 27:  # Green action cards
                reward += 3
            elif action == 28:  # Green wild card
                reward += 6
            elif action == 29:  # Green wild and draw 4 card
                reward += 10

            elif action >= 30 and action <= 39:  # Blue number cards
                reward += 1
            elif action >= 40 and action <= 42:  # Blue action cards
                reward += 3
            elif action == 43:  # Blue wild card
                reward += 6
            elif action == 44:  # Blue wild and draw 4 card
                reward += 10

            elif action >= 45 and action <= 54:  # Yellow number cards
                reward += 1
            elif action >= 55 and action <= 57:  # Yellow action cards
                reward += 3
            elif action == 58:  # Yellow wild card
                reward += 6
            elif action == 59:  # Yellow wild and draw 4 card
                reward += 10
                
            adjusted_traj.append((state, action, reward, next_state, done))
        adjusted_trajectories.append(adjusted_traj)
    return adjusted_trajectories

In [21]:
def adjust_rewards(trajectories, payoffs):
    adjusted_trajectories = []
    for traj in trajectories:
        adjusted_traj = []
        for state, action, reward, next_state, done in traj:
            # Actual game state details
            raw_obs = state['raw_obs']
            
            # Retrieve the number of cards in player's hand
            num_cards_player = len(raw_obs['hand'])
            
            # Provide the number of cards for each player with the current player being index 0
            num_cards_opponent = raw_obs['num_cards'][1] if raw_obs['current_player'] == 0 else raw_obs['num_cards'][0]

            if action == 60:  # Draw a card
                reward -= max(1, 3 - num_cards_player / 7)

            # Adjust rewards for action cards based on the opponent's hand size
            action_card_reward_multiplier = max(1, (7 - num_cards_opponent) / 7)

            if action in range(10, 15) or action in range(25, 30) or action in range(40, 45) or action in range(55, 60):
                reward += 2 * action_card_reward_multiplier

            if action in range(0, 10) or action in range(15, 25) or action in range(30, 40) or action in range(45, 55):
                reward += 1 + (3 - num_cards_player / 7)

            adjusted_traj.append((state, action, reward, next_state, done))
        adjusted_trajectories.append(adjusted_traj)
    return adjusted_trajectories


# Building out the simulation

Previously, the code would run an entire/complete simulation. However, this restricts the ability to learn from moves within the simulation.

In [26]:
episode_num = 25000  # Number of episodes 

evaluate_every = 1000 # Evaluate the agent every X episodes
evaluate_num = 100  # Number of games played in evaluation

with Logger(log_dir) as logger:
    for episode in tqdm(range(episode_num)):  # Number of episodes

        trajectories, payoffs = env.run(is_training=True)

        # Assuming 'payoffs' are the game outcomes for each player
        for i, payoff in enumerate(payoffs):
            if payoff > 0:  # Assuming a positive payoff means winning
                payoffs[i] = 100
            else:
                payoffs[i] = -25

        trajectories = reorganize(trajectories, payoffs)

        # After reorganizing the trajectories, adjust the rewards
        trajectories = adjust_rewards(trajectories, payoffs)
        # print(trajectories[0])

        for ts in trajectories[0]:
            agent.feed(ts)
        
        if episode % evaluate_every == 0:
                block()
                logger.log_performance(
                    episode,
                    tournament(
                        env,
                        evaluate_num,
                    )[0]
                )
                
                unblock()

    # Get the paths
    csv_path, fig_path = logger.csv_path, logger.fig_path

  0%|          | 0/25000 [00:00<?, ?it/s]

INFO - Step 3183, rl-loss: 61.45465850830078INFO - Step 3184, rl-loss: 52.54437255859375INFO - Step 3185, rl-loss: 61.61508560180664INFO - Step 3186, rl-loss: 46.972389221191406INFO - Step 3187, rl-loss: 68.94454193115234INFO - Step 3188, rl-loss: 206.26991271972656INFO - Step 3189, rl-loss: 35.135711669921875INFO - Step 3190, rl-loss: 94.83740234375INFO - Step 3191, rl-loss: 40.110862731933594INFO - Step 3192, rl-loss: 43.160823822021484INFO - Step 3193, rl-loss: 216.81182861328125INFO - Step 3194, rl-loss: 241.00839233398438INFO - Step 3195, rl-loss: 190.69671630859375INFO - Step 3196, rl-loss: 162.21092224121094INFO - Step 3197, rl-loss: 277.01214599609375INFO - Step 3198, rl-loss: 44.337459564208984INFO - Step 3199, rl-loss: 45.88159942626953INFO - Step 3200, rl-loss: 187.46588134765625INFO - Step 3201, rl-loss: 225.4972381591797
INFO - Copied model parameters to target network.
INFO - Step 3202, rl-loss: 61.76237487792969

  0%|          | 11/25000 [00:01<44:11,  9.42it/s] 

INFO - Step 3203, rl-loss: 73.74034881591797INFO - Step 3204, rl-loss: 129.84027099609375INFO - Step 3205, rl-loss: 59.21306610107422INFO - Step 3206, rl-loss: 200.42050170898438INFO - Step 3207, rl-loss: 105.14641571044922INFO - Step 3208, rl-loss: 87.26130676269531INFO - Step 3209, rl-loss: 243.77017211914062INFO - Step 3210, rl-loss: 346.0421142578125INFO - Step 3211, rl-loss: 43.248931884765625INFO - Step 3212, rl-loss: 198.2099609375INFO - Step 3213, rl-loss: 707.9168701171875INFO - Step 3214, rl-loss: 192.38290405273438INFO - Step 3215, rl-loss: 176.2235107421875INFO - Step 3216, rl-loss: 146.63587951660156INFO - Step 3217, rl-loss: 132.84085083007812INFO - Step 3218, rl-loss: 560.5632934570312INFO - Step 3219, rl-loss: 95.70018005371094INFO - Step 3220, rl-loss: 208.0276641845703INFO - Step 3221, rl-loss: 196.57847595214844INFO - Step 3222, rl-loss: 47.44309616088867INFO - Step 3223, rl-loss: 174.73789978027344INFO - Step 3224, rl-loss: 231.57675170898438I

  0%|          | 20/25000 [00:02<32:36, 12.77it/s]

INFO - Step 3383, rl-loss: 48.911170959472656INFO - Step 3384, rl-loss: 118.4203872680664INFO - Step 3385, rl-loss: 550.7944946289062INFO - Step 3386, rl-loss: 227.14694213867188INFO - Step 3387, rl-loss: 147.84756469726562INFO - Step 3388, rl-loss: 38.971923828125INFO - Step 3389, rl-loss: 438.03192138671875INFO - Step 3390, rl-loss: 357.01702880859375INFO - Step 3391, rl-loss: 405.2966613769531INFO - Step 3392, rl-loss: 40.974365234375INFO - Step 3393, rl-loss: 40.986026763916016INFO - Step 3394, rl-loss: 103.07438659667969INFO - Step 3395, rl-loss: 229.79039001464844INFO - Step 3396, rl-loss: 161.78677368164062INFO - Step 3397, rl-loss: 49.19243621826172INFO - Step 3398, rl-loss: 104.63958740234375INFO - Step 3399, rl-loss: 86.70748901367188INFO - Step 3400, rl-loss: 89.76404571533203INFO - Step 3401, rl-loss: 41.978851318359375
INFO - Copied model parameters to target network.
INFO - Step 3402, rl-loss: 190.33401489257812INFO - Step 3403, rl-loss: 122.647811889

  0%|          | 28/25000 [00:03<39:20, 10.58it/s]

INFO - Step 3561, rl-loss: 192.22271728515625INFO - Step 3562, rl-loss: 151.12863159179688INFO - Step 3563, rl-loss: 48.768104553222656INFO - Step 3564, rl-loss: 357.47930908203125INFO - Step 3565, rl-loss: 61.47281265258789INFO - Step 3566, rl-loss: 67.99372100830078INFO - Step 3567, rl-loss: 126.28960418701172INFO - Step 3568, rl-loss: 58.43463897705078INFO - Step 3569, rl-loss: 64.36871337890625INFO - Step 3570, rl-loss: 48.2669563293457INFO - Step 3571, rl-loss: 140.428466796875INFO - Step 3572, rl-loss: 76.96734619140625INFO - Step 3573, rl-loss: 489.9808044433594INFO - Step 3574, rl-loss: 98.49095916748047INFO - Step 3575, rl-loss: 309.6670837402344INFO - Step 3576, rl-loss: 74.231689453125INFO - Step 3577, rl-loss: 153.03309631347656INFO - Step 3578, rl-loss: 476.0849914550781INFO - Step 3579, rl-loss: 127.99111938476562INFO - Step 3580, rl-loss: 370.2466125488281INFO - Step 3581, rl-loss: 275.98846435546875INFO - Step 3582, rl-loss: 234.46435546875INFO - 

  0%|          | 37/25000 [00:04<37:22, 11.13it/s]

INFO - Step 3740, rl-loss: 342.9959716796875INFO - Step 3741, rl-loss: 482.5440673828125INFO - Step 3742, rl-loss: 75.27110290527344INFO - Step 3743, rl-loss: 57.358978271484375INFO - Step 3744, rl-loss: 149.8240203857422INFO - Step 3745, rl-loss: 124.48252868652344INFO - Step 3746, rl-loss: 210.42501831054688INFO - Step 3747, rl-loss: 227.38778686523438INFO - Step 3748, rl-loss: 239.8907012939453INFO - Step 3749, rl-loss: 168.64248657226562INFO - Step 3750, rl-loss: 165.64697265625INFO - Step 3751, rl-loss: 193.8618621826172INFO - Step 3752, rl-loss: 36.38011169433594INFO - Step 3753, rl-loss: 62.08038330078125INFO - Step 3754, rl-loss: 43.509178161621094INFO - Step 3755, rl-loss: 61.96166229248047INFO - Step 3756, rl-loss: 52.86485290527344INFO - Step 3757, rl-loss: 61.58356475830078INFO - Step 3758, rl-loss: 59.322078704833984INFO - Step 3759, rl-loss: 48.08494186401367INFO - Step 3760, rl-loss: 92.36627197265625INFO - Step 3761, rl-loss: 192.25025939941406INF

  0%|          | 46/25000 [00:05<34:43, 11.98it/s]

INFO - Step 3918, rl-loss: 252.39883422851562INFO - Step 3919, rl-loss: 41.38932800292969INFO - Step 3920, rl-loss: 199.69204711914062INFO - Step 3921, rl-loss: 169.75173950195312INFO - Step 3922, rl-loss: 57.714881896972656INFO - Step 3923, rl-loss: 152.70709228515625INFO - Step 3924, rl-loss: 239.3529510498047INFO - Step 3925, rl-loss: 189.63259887695312INFO - Step 3926, rl-loss: 202.63015747070312INFO - Step 3927, rl-loss: 202.241943359375INFO - Step 3928, rl-loss: 310.81298828125INFO - Step 3929, rl-loss: 59.19672393798828INFO - Step 3930, rl-loss: 394.26995849609375INFO - Step 3931, rl-loss: 60.45562744140625INFO - Step 3932, rl-loss: 179.05245971679688INFO - Step 3933, rl-loss: 51.398216247558594INFO - Step 3934, rl-loss: 56.61960220336914INFO - Step 3935, rl-loss: 99.94207000732422INFO - Step 3936, rl-loss: 310.3231506347656INFO - Step 3937, rl-loss: 216.7433319091797INFO - Step 3938, rl-loss: 441.2132873535156INFO - Step 3939, rl-loss: 383.5748596191406IN

  0%|          | 53/25000 [00:05<28:53, 14.39it/s]

INFO - Step 4097, rl-loss: 87.38688659667969INFO - Step 4098, rl-loss: 314.84527587890625INFO - Step 4099, rl-loss: 35.81758117675781INFO - Step 4100, rl-loss: 174.98086547851562INFO - Step 4101, rl-loss: 145.16552734375
INFO - Copied model parameters to target network.
INFO - Step 4102, rl-loss: 128.6160125732422INFO - Step 4103, rl-loss: 79.06996154785156INFO - Step 4104, rl-loss: 414.7591857910156INFO - Step 4105, rl-loss: 160.8731231689453INFO - Step 4106, rl-loss: 89.5592041015625INFO - Step 4107, rl-loss: 71.30786895751953INFO - Step 4108, rl-loss: 81.4061279296875INFO - Step 4109, rl-loss: 138.80532836914062INFO - Step 4110, rl-loss: 72.788330078125INFO - Step 4111, rl-loss: 78.43595886230469INFO - Step 4112, rl-loss: 160.55068969726562INFO - Step 4113, rl-loss: 51.728729248046875INFO - Step 4114, rl-loss: 118.01020812988281INFO - Step 4115, rl-loss: 311.61236572265625INFO - Step 4116, rl-loss: 214.94631958007812INFO - Step 4117, rl-loss: 46.59010696411133I

  0%|          | 59/25000 [00:06<47:28,  8.76it/s]

INFO - Step 4276, rl-loss: 80.21424865722656INFO - Step 4277, rl-loss: 71.73915100097656INFO - Step 4278, rl-loss: 229.62905883789062INFO - Step 4279, rl-loss: 203.63917541503906INFO - Step 4280, rl-loss: 323.7493896484375INFO - Step 4281, rl-loss: 45.93803787231445INFO - Step 4282, rl-loss: 213.17881774902344INFO - Step 4283, rl-loss: 294.61993408203125INFO - Step 4284, rl-loss: 64.99214935302734INFO - Step 4285, rl-loss: 390.40625INFO - Step 4286, rl-loss: 60.45489501953125INFO - Step 4287, rl-loss: 730.6761474609375INFO - Step 4288, rl-loss: 155.55136108398438INFO - Step 4289, rl-loss: 159.47398376464844INFO - Step 4290, rl-loss: 203.5487060546875INFO - Step 4291, rl-loss: 193.65797424316406INFO - Step 4292, rl-loss: 67.71263122558594INFO - Step 4293, rl-loss: 88.90097045898438INFO - Step 4294, rl-loss: 313.88275146484375INFO - Step 4295, rl-loss: 85.83805847167969INFO - Step 4296, rl-loss: 148.26329040527344INFO - Step 4297, rl-loss: 110.6671142578125INFO - S

  0%|          | 67/25000 [00:07<45:02,  9.23it/s]

INFO - Step 4454, rl-loss: 59.931549072265625INFO - Step 4455, rl-loss: 61.516380310058594INFO - Step 4456, rl-loss: 80.7306900024414INFO - Step 4457, rl-loss: 392.0959167480469INFO - Step 4458, rl-loss: 55.77306365966797INFO - Step 4459, rl-loss: 315.9853210449219INFO - Step 4460, rl-loss: 198.34873962402344INFO - Step 4461, rl-loss: 112.45957946777344INFO - Step 4462, rl-loss: 104.45378112792969INFO - Step 4463, rl-loss: 75.99278259277344INFO - Step 4464, rl-loss: 232.22396850585938INFO - Step 4465, rl-loss: 120.391845703125INFO - Step 4466, rl-loss: 229.51394653320312INFO - Step 4467, rl-loss: 103.97528839111328INFO - Step 4468, rl-loss: 54.652748107910156INFO - Step 4469, rl-loss: 119.28065490722656INFO - Step 4470, rl-loss: 219.73338317871094INFO - Step 4471, rl-loss: 56.620750427246094INFO - Step 4472, rl-loss: 186.1097412109375INFO - Step 4473, rl-loss: 153.61721801757812INFO - Step 4474, rl-loss: 173.6732177734375INFO - Step 4475, rl-loss: 191.791610717773

  0%|          | 75/25000 [00:07<35:30, 11.70it/s]

INFO - Step 4633, rl-loss: 149.8221435546875INFO - Step 4634, rl-loss: 95.56571197509766INFO - Step 4635, rl-loss: 40.805912017822266INFO - Step 4636, rl-loss: 183.32635498046875INFO - Step 4637, rl-loss: 68.60238647460938INFO - Step 4638, rl-loss: 416.5442810058594INFO - Step 4639, rl-loss: 48.76774978637695INFO - Step 4640, rl-loss: 124.55915069580078INFO - Step 4641, rl-loss: 159.70924377441406INFO - Step 4642, rl-loss: 266.3915710449219INFO - Step 4643, rl-loss: 170.72177124023438INFO - Step 4644, rl-loss: 150.85836791992188INFO - Step 4645, rl-loss: 94.8963851928711INFO - Step 4646, rl-loss: 197.70713806152344INFO - Step 4647, rl-loss: 109.86631774902344INFO - Step 4648, rl-loss: 218.63478088378906INFO - Step 4649, rl-loss: 384.2030029296875INFO - Step 4650, rl-loss: 149.4080047607422INFO - Step 4651, rl-loss: 123.94218444824219INFO - Step 4652, rl-loss: 79.51454162597656INFO - Step 4653, rl-loss: 60.87617874145508INFO - Step 4654, rl-loss: 142.5921630859375

  0%|          | 85/25000 [00:08<32:15, 12.87it/s]

INFO - Step 4811, rl-loss: 177.8228759765625INFO - Step 4812, rl-loss: 296.6627197265625INFO - Step 4813, rl-loss: 65.25407409667969INFO - Step 4814, rl-loss: 330.288818359375INFO - Step 4815, rl-loss: 224.4496307373047INFO - Step 4816, rl-loss: 200.29391479492188INFO - Step 4817, rl-loss: 120.21029663085938INFO - Step 4818, rl-loss: 180.4665985107422INFO - Step 4819, rl-loss: 152.17352294921875INFO - Step 4820, rl-loss: 99.72206115722656INFO - Step 4821, rl-loss: 80.2011489868164INFO - Step 4822, rl-loss: 121.78144836425781INFO - Step 4823, rl-loss: 139.41073608398438INFO - Step 4824, rl-loss: 142.8125INFO - Step 4825, rl-loss: 227.80052185058594INFO - Step 4826, rl-loss: 116.33967590332031INFO - Step 4827, rl-loss: 213.6270751953125INFO - Step 4828, rl-loss: 85.21214294433594INFO - Step 4829, rl-loss: 333.4465026855469INFO - Step 4830, rl-loss: 49.15685272216797INFO - Step 4831, rl-loss: 184.74838256835938INFO - Step 4832, rl-loss: 115.18624877929688INFO - Step

  0%|          | 93/25000 [00:09<36:20, 11.42it/s]

INFO - Step 4991, rl-loss: 107.82937622070312INFO - Step 4992, rl-loss: 145.36099243164062INFO - Step 4993, rl-loss: 52.466331481933594INFO - Step 4994, rl-loss: 82.09112548828125INFO - Step 4995, rl-loss: 60.56389617919922INFO - Step 4996, rl-loss: 122.34686279296875INFO - Step 4997, rl-loss: 61.137996673583984INFO - Step 4998, rl-loss: 51.52610778808594INFO - Step 4999, rl-loss: 162.0121307373047INFO - Step 5000, rl-loss: 111.99047088623047INFO - Step 5001, rl-loss: 194.02340698242188
INFO - Copied model parameters to target network.
INFO - Step 5002, rl-loss: 88.7510986328125INFO - Step 5003, rl-loss: 475.4242248535156INFO - Step 5004, rl-loss: 257.9560241699219INFO - Step 5005, rl-loss: 220.286865234375INFO - Step 5006, rl-loss: 151.00604248046875INFO - Step 5007, rl-loss: 501.87652587890625INFO - Step 5008, rl-loss: 131.39865112304688INFO - Step 5009, rl-loss: 34.992706298828125INFO - Step 5010, rl-loss: 61.83868408203125INFO - Step 5011, rl-loss: 235.62509155

  0%|          | 101/25000 [00:10<39:18, 10.56it/s]

INFO - Step 5169, rl-loss: 97.91119384765625INFO - Step 5170, rl-loss: 309.92156982421875INFO - Step 5171, rl-loss: 95.69326782226562INFO - Step 5172, rl-loss: 84.1171875INFO - Step 5173, rl-loss: 285.19708251953125INFO - Step 5174, rl-loss: 140.20034790039062INFO - Step 5175, rl-loss: 68.65594482421875INFO - Step 5176, rl-loss: 72.7459716796875INFO - Step 5177, rl-loss: 179.98703002929688INFO - Step 5178, rl-loss: 187.90036010742188INFO - Step 5179, rl-loss: 260.0970458984375INFO - Step 5180, rl-loss: 54.80316162109375INFO - Step 5181, rl-loss: 66.06100463867188INFO - Step 5182, rl-loss: 99.04564666748047INFO - Step 5183, rl-loss: 55.69652557373047INFO - Step 5184, rl-loss: 364.3450927734375INFO - Step 5185, rl-loss: 202.3957977294922INFO - Step 5186, rl-loss: 188.4345245361328INFO - Step 5187, rl-loss: 92.56147003173828INFO - Step 5188, rl-loss: 120.25926208496094INFO - Step 5189, rl-loss: 415.08319091796875INFO - Step 5190, rl-loss: 61.866153717041016INFO - St

  0%|          | 107/25000 [00:10<36:38, 11.32it/s]

INFO - Step 5347, rl-loss: 420.58709716796875INFO - Step 5348, rl-loss: 67.91574096679688INFO - Step 5349, rl-loss: 89.86143493652344INFO - Step 5350, rl-loss: 130.46604919433594INFO - Step 5351, rl-loss: 58.989471435546875INFO - Step 5352, rl-loss: 151.7370147705078INFO - Step 5353, rl-loss: 315.8959655761719INFO - Step 5354, rl-loss: 93.79360961914062INFO - Step 5355, rl-loss: 231.35928344726562INFO - Step 5356, rl-loss: 41.92392349243164INFO - Step 5357, rl-loss: 98.35431671142578INFO - Step 5358, rl-loss: 211.34652709960938INFO - Step 5359, rl-loss: 172.5919189453125INFO - Step 5360, rl-loss: 237.66671752929688INFO - Step 5361, rl-loss: 255.45982360839844INFO - Step 5362, rl-loss: 188.81727600097656INFO - Step 5363, rl-loss: 451.16168212890625INFO - Step 5364, rl-loss: 322.8156433105469INFO - Step 5365, rl-loss: 156.51051330566406INFO - Step 5366, rl-loss: 142.09457397460938INFO - Step 5367, rl-loss: 390.0146179199219INFO - Step 5368, rl-loss: 72.81298828125I

  0%|          | 109/25000 [00:10<41:22, 10.03it/s]


KeyboardInterrupt: 

INFO - Step 5525, rl-loss: 237.54005432128906INFO - Step 5526, rl-loss: 247.93051147460938INFO - Step 5527, rl-loss: 73.07599639892578INFO - Step 5528, rl-loss: 76.84397888183594INFO - Step 5529, rl-loss: 66.43702697753906INFO - Step 5530, rl-loss: 96.94234466552734INFO - Step 5531, rl-loss: 259.673828125INFO - Step 5532, rl-loss: 278.6844177246094INFO - Step 5533, rl-loss: 156.12989807128906INFO - Step 5534, rl-loss: 120.23167419433594INFO - Step 5535, rl-loss: 678.0908203125INFO - Step 5536, rl-loss: 253.52102661132812INFO - Step 5537, rl-loss: 258.60516357421875INFO - Step 5538, rl-loss: 289.50445556640625INFO - Step 5539, rl-loss: 142.77752685546875INFO - Step 5540, rl-loss: 279.8894958496094INFO - Step 5541, rl-loss: 138.52671813964844INFO - Step 5542, rl-loss: 90.2581558227539INFO - Step 5543, rl-loss: 158.94236755371094INFO - Step 5544, rl-loss: 185.2816162109375INFO - Step 5545, rl-loss: 74.03421783447266INFO - Step 5546, rl-loss: 249.46209716796875INFO -

## Evaluating the Agent
After training, evaluate your agent's performance. You can use RLCard's tournament function to play the game multiple times and see how well your agent performs:

In [None]:
# Plot the learning curve
plot_curve(csv_path, fig_path, "dqn")

# Save model
save_path = os.path.join(log_dir, 'model.pth')
torch.save(agent, save_path)
print('Model saved in', save_path)