# Play BlackJack using Random Agent

In [30]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Install dependencies

In [31]:
#!pip install gymnasium==0.29.1 pygame==2.3.0 pettingzoo==1.24.3 tianshou==0.5.1

# Setup environment

In [32]:
import gymnasium as gym
import tianshou as ts
from gymnasium.wrappers import FlattenObservation

# We keep an original copy of the environment that
# is purely used for sampling as Tianshou modifies
# the action space such that we cannot sample from
# it.
env_for_sampling = gym.make("Blackjack-v1")

def get_env(render_mode = None):
  """
  BlackJack has an observation space which is a tuple, consisting of
  the player's sum, the dealers card showing and whether or not
  the player has a usable ace. This tuple is flattened for Tianshou
  to be able to put it through the deep network layers.
  """
  env = gym.make("Blackjack-v1", render_mode=render_mode)
  env = FlattenObservation(env)
  env.reset(seed=42)

  return env

env = get_env()

# Create Policy

In [33]:
from tianshou.policy import BasePolicy
from tianshou.data import Batch


class RandomPolicy(BasePolicy):
  def forward(self, batch, state):
    """
    Sample a random action from the environment.
    """
    return Batch(act=[env_for_sampling.action_space.sample()])
  
  def learn():
    pass
  
policy = RandomPolicy()

# Play

Play with the trained agent to the opponent a number of episodes and print the results

In [34]:
policy.eval()

env = get_env(render_mode=None)
env = ts.env.DummyVectorEnv([lambda: env])
collector = ts.data.Collector(policy, env, exploration_noise=True)
result = collector.collect(n_episode=100, render=None)
rews, lens = result["rews"], result["lens"]

won = 0
draw = 0
lost = 0
for res in result['rews']:
  if res == 1:
    won += 1
  elif res == -1:
    lost +=1
  else:
    draw += 1

print("Win: " + str(won) + " lost: " + str(lost) + " draw: " + str(draw))

Win: 28 lost: 70 draw: 2
