# DL4G - Jass Introduction

In this exercise we will look at some properties of the jass kit environment that can be used to develop your own jass agent.

You will need to have numpy installed, as well as the jass-kit environment.

In [79]:
from jass.game.game_util import *
from jass.game.game_sim import GameSim
from jass.game.game_observation import GameObservation
from jass.game.const import *
from jass.game.rule_schieber import RuleSchieber
from jass.agents.agent import Agent
from jass.agents.agent_random_schieber import AgentRandomSchieber
from jass.arena.arena import Arena


Information about the cards is stored as one-hot encoded arrays, there are several tools available to access the information in the cards. 

Lets deal some random cards first.

In [80]:
# Lets set the seed of the random number generater, so that we get the same results
np.random.seed(1)

# This distributes the cards randomly among the 4 players.
hands = deal_random_hand()
print(hands.shape)
print(hands)

(4, 36)
[[0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 0]
 [0 0 1 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 1]
 [1 1 0 0 0 1 0 0 1 1 0 1 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]


In [81]:
# There is an entry for each player, to access the cards of the first player
cards = hands[0,:]
print(cards)

[0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 0]


In [82]:
# This should be 9 cards
assert(cards.sum() == 9)

# The cards can be converted to other formats for easier reading or processing
print(convert_one_hot_encoded_cards_to_str_encoded_list(cards))

# Each card is encoded as a value between 0 and 35.
print(convert_one_hot_encoded_cards_to_int_encoded_list(cards))


[np.str_('DJ'), np.str_('H6'), np.str_('SK'), np.str_('SJ'), np.str_('S9'), np.str_('CK'), np.str_('CQ'), np.str_('CJ'), np.str_('C7')]
[3, 17, 19, 21, 23, 28, 29, 30, 34]


In [83]:
# There is a method to count colors too
colors = count_colors(cards)
print(colors)

[1 1 3 4]


There is a common jass "rule" to select trump, when you have the "Puur" (Jack of trump) and 3 or more other cards of the same color. 

Task 1: Write a function that returns an array of 4 values that contains a 1 for each color that fulfills the rule or 0 otherwise, i.e. [0 0 0 0] is returned, if you do not have any color with Jack and 3 other cards.


In [84]:
def havePuurWithFour(hand: np.ndarray) -> np.ndarray:
    result = np.zeros(4, dtype=int)
    jack_index = 3  # because your order is [A, K, Q, J, 10, 9, 8, 7, 6]

    for color in range(4):
        start = color * 9
        end = start + 9
        suit_cards = hand[start:end]

        has_puur = suit_cards[jack_index] == 1
        num_cards = np.sum(suit_cards)

        if has_puur and num_cards >= 4:
            result[color] = 1

    return result


In [85]:
print(convert_one_hot_encoded_cards_to_str_encoded_list(cards))
print(count_colors(cards))
print(havePuurWithFour(cards))

[np.str_('DJ'), np.str_('H6'), np.str_('SK'), np.str_('SJ'), np.str_('S9'), np.str_('CK'), np.str_('CQ'), np.str_('CJ'), np.str_('C7')]
[1 1 3 4]
[0 0 0 1]


In [86]:
assert (havePuurWithFour(cards) == [0, 0, 0, 1]).all()
cards_2 = hands[1,:]
assert (havePuurWithFour(cards_2) == [0, 0, 0, 0]).all()

Another possibility to select trump is by assigning a value to each card, depending on whether the color is trump or not. This table is from the Maturawork of Daniel Graf from 2009: "Jassen auf Basis der Spieltheorie".

In [87]:
# Score for each card of a color from Ace to 6

# score if the color is trump
trump_score = [15, 10, 7, 25, 6, 19, 5, 5, 5]
# score if the color is not trump
no_trump_score = [9, 7, 5, 2, 1, 0, 0, 0, 0]
# score if obenabe is selected (all colors)
obenabe_score = [14, 10, 8, 7, 5, 0, 5, 0, 0,]
# score if uneufe is selected (all colors)
uneufe_score = [0, 2, 1, 1, 5, 5, 7, 9, 11]

Task 2: Implement a function that evaluates a hand that is given as a list of 9 cards and with a given trump value and returns a score depending on the table above. For example the score of our hand ['DJ', 'H6', 'SK', 'SJ', 'S9', 'CK', 'CQ', 'CJ', 'C7'] when Club is trump should be:

2 + 0 + 7 + 2 + 0 + 10 + 7 + 25 + 5 = 58

while the score is 70 if Spade is selected, which is better as you have both the jack and the nine.

You can use the arrays offset_of_card and color_of_card to get the offset (Ace, King, etc.) and color of a card.

In [88]:
def calculate_trump_selection_score(cards, trump: int) -> int:
    """
    cards: list of 9 integer-encoded cards (0–35)
    trump: int 0–3 representing the trump color (0=Clubs, 1=Spades, 2=Hearts, 3=Diamonds)
    returns: total score (int)
    """
    trump_score = [15, 10, 7, 25, 6, 19, 5, 5, 5]
    no_trump_score = [9, 7, 5, 2, 1, 0, 0, 0, 0]

    score = 0

    for card in cards:
        color = color_of_card[card]
        offset = offset_of_card[card]

        if color == trump:
            score += trump_score[offset]
        else:
            score += no_trump_score[offset]

    return score
    

In [89]:
card_list = convert_one_hot_encoded_cards_to_int_encoded_list(cards)
assert calculate_trump_selection_score(card_list, CLUBS) == 58
assert calculate_trump_selection_score(card_list, SPADES) == 70

## Agents

In order to play a game you have to program an agent that decides on the action. For that you have to override the methods action_trump and action_play_card.

Task 3: Use the function implemented above to select the best trump value. If the calculated trump value is below a threshold (for example let us take 68, as suggested in the work by Daniel Graf) you should "Schiebe", i.e. pass to your partner if you are still allowed to do that.

The game observation allows you to access the information about your card, and if you are the first or second player to select trump.

For playing a card, we just take a random action.

In [90]:
class MyAgent(Agent):
    def __init__(self):
        super().__init__()
        # we need a rule object to determine the valid cards
        self._rule = RuleSchieber()
        
    def action_trump(self, obs: GameObservation) -> int:
        """
        Determine trump action for the given observation
        Args:
            obs: the game observation, it must be in a state for trump selection

        Returns:
            selected trump as encoded in jass.game.const or jass.game.const.PUSH
        """
       # get your cards as int encoded list
        cards = convert_one_hot_encoded_cards_to_int_encoded_list(obs.hand)

        # compute scores for all four trumps
        scores = [calculate_trump_selection_score(cards, trump) for trump in range(4)]
        best_trump = int(np.argmax(scores))
        best_score = scores[best_trump]

        # threshold logic: 68 suggested by Daniel Graf
        if best_score < 68 and obs.trump is None:
            return PUSH  # pass to partner
        else:
            return best_trump
        

    def action_play_card(self, obs: GameObservation) -> int:
        """
        Determine the card to play.

        Args:
            obs: the game observation

        Returns:
            the card to play, int encoded as defined in jass.game.const
        """
        valid_cards = self._rule.get_valid_cards_from_obs(obs)
        # we use the global random number generator here
        return np.random.choice(np.flatnonzero(valid_cards))
    
    

We can use the game simulation to play a game. We will use that to test our implementation, and then use the arena class to play against other agents

In [91]:
rule = RuleSchieber()
game = GameSim(rule=rule)
agent = MyAgent()

np.random.seed(1)
game.init_from_cards(hands=deal_random_hand(), dealer=NORTH)

In [92]:
obs = game.get_observation()

In [93]:
cards = convert_one_hot_encoded_cards_to_str_encoded_list(obs.hand)
print(cards)
trump = agent.action_trump(obs)
assert trump == HEARTS

[np.str_('DA'), np.str_('DK'), np.str_('D9'), np.str_('D6'), np.str_('HA'), np.str_('HQ'), np.str_('HJ'), np.str_('H8'), np.str_('H7')]


In [94]:
# tell the simulation the selected trump
game.action_trump(trump)

In [95]:
# play the game to the end and print the result
while not game.is_done():
    game.action_play_card(agent.action_play_card(game.get_observation()))

print(game.state.points)

[ 10 147]


Another possibility to test agents locally is to use the arena. Let us play 100 games against the Random Agent and see if our trump methods makes any difference.


In [96]:
arena = Arena(nr_games_to_play=100)
arena.set_players(MyAgent(), AgentRandomSchieber(), MyAgent(), AgentRandomSchieber())

In [97]:
arena.play_all_games()

[........................................]  100/ 100 games played


In [98]:
print(arena.points_team_0.sum(), arena.points_team_1.sum())

8778.0 6922.0


Now you can continue with a rule based implemenation of the card play. Also look at the flask implementation of the service to see how you can get your agent online.

In [99]:
from jass.game.rule_schieber import RuleSchieber
import numpy as np

class MyRuleBasedAgent(MyAgent):
    def __init__(self):
        super().__init__()
        self._rule = RuleSchieber()

    def action_play_card(self, obs):
        valid_cards = self._rule.get_valid_cards_from_obs(obs)
        valid_indices = np.flatnonzero(valid_cards)

        trump = obs.trump
        current_trick = obs.current_trick
        first_card = next((c for c in current_trick if c != -1), None)

        # Helper: get color of any card
        def get_color(card):
            return color_of_card[card]

        # If you are first in trick → just play highest card
        if first_card is None:
            return self._play_highest(valid_indices)

        first_color = get_color(first_card)

        # Separate your valid cards by color
        same_color = [c for c in valid_indices if get_color(c) == first_color]
        trump_cards = [c for c in valid_indices if get_color(c) == trump]
        others = [c for c in valid_indices if c not in same_color + trump_cards]

        # 1️⃣ If you can follow color → play highest in that color
        if same_color:
            return self._play_highest(same_color)

        # 2️⃣ If you cannot follow color but have trump → play the lowest trump (save strong ones)
        if trump_cards:
            return self._play_lowest(trump_cards)

        # 3️⃣ Otherwise → throw lowest non-trump card
        return self._play_lowest(others)

    def _play_highest(self, cards):
        best_card = cards[0]
        best_rank = 10
        for c in cards:
            offset = offset_of_card[c]
            if offset < best_rank:  # smaller offset = stronger rank
                best_card = c
                best_rank = offset
        return best_card

    def _play_lowest(self, cards):
        worst_card = cards[0]
        worst_rank = -1
        for c in cards:
            offset = offset_of_card[c]
            if offset > worst_rank:  # larger offset = weaker card
                worst_card = c
                worst_rank = offset
        return worst_card

In [100]:
arena = Arena(nr_games_to_play=100)
arena.set_players(MyRuleBasedAgent(), AgentRandomSchieber(), MyRuleBasedAgent(), AgentRandomSchieber())
arena.play_all_games()

print("Team 0:", arena.points_team_0.sum(), "Team 1:", arena.points_team_1.sum())


[........................................]  100/ 100 games played
Team 0: 8473.0 Team 1: 7227.0


In [101]:
class MyAdvancedRuleBasedAgent(MyRuleBasedAgent):
    def __init__(self):
        super().__init__()
        self._rule = RuleSchieber()

    def action_play_card(self, obs):
        valid_cards = self._rule.get_valid_cards_from_obs(obs)
        valid_indices = np.flatnonzero(valid_cards)
        trump = obs.trump
        current_trick = obs.current_trick
        first_card = next((c for c in current_trick if c != -1), None)

        def get_color(card):
            return color_of_card[card]

        def get_rank(card):
            return offset_of_card[card]

        # ---- Case 1: You start the trick ----
        if first_card is None:
            # Prefer to start with your highest non-trump card (save trump)
            non_trump_cards = [c for c in valid_indices if get_color(c) != trump]
            if non_trump_cards:
                return self._play_highest(non_trump_cards)
            return self._play_lowest(valid_indices)

        # ---- Case 2: Follow color if possible ----
        first_color = get_color(first_card)
        same_color = [c for c in valid_indices if get_color(c) == first_color]
        trump_cards = [c for c in valid_indices if get_color(c) == trump]
        others = [c for c in valid_indices if c not in same_color + trump_cards]

        if same_color:
            # Try to win the trick if possible
            winning_card = self._get_winning_card(current_trick, trump)
            if winning_card is None or get_color(winning_card) != first_color:
                # Trick currently led by non-follow card, try to win
                return self._play_highest(same_color)
            else:
                # Trick currently led by same color, only play higher if possible
                higher = [c for c in same_color if get_rank(c) < get_rank(winning_card)]
                if higher:
                    return self._play_highest(higher)
                # Can't win, play lowest
                return self._play_lowest(same_color)

        # ---- Case 3: Cannot follow color ----
        # Check if partner is currently winning the trick
        partner_pos = (obs.player + 2) % 4
        partner_card = current_trick[partner_pos]
        winning_card = self._get_winning_card(current_trick, trump)
        partner_winning = (partner_card != -1 and winning_card == partner_card)

        if partner_winning:
            # Don't waste trump, discard lowest non-trump
            if others:
                return self._play_lowest(others)
            return self._play_lowest(trump_cards)

        # Partner not winning
        if trump_cards:
            # Avoid wasting the Puur if possible (Jack of trump)
            puur_index = self._get_puur_index(trump)
            non_puur_trumps = [c for c in trump_cards if c != puur_index]
            if non_puur_trumps:
                return self._play_lowest(non_puur_trumps)
            return self._play_lowest(trump_cards)

        # Otherwise, discard weakest card
        return self._play_lowest(others)

    # --- Helpers ---
    def _play_highest(self, cards):
        return min(cards, key=lambda c: offset_of_card[c])  # lower offset = higher rank

    def _play_lowest(self, cards):
        return max(cards, key=lambda c: offset_of_card[c])  # higher offset = weaker card

    def _get_puur_index(self, trump_color):
        """Return the integer card index of the Jack of trump."""
        return trump_color * 9 + 3  # because [A,K,Q,J,10,9,8,7,6] -> Jack=3

    def _get_winning_card(self, current_trick, trump):
        """Return the current winning card in the trick."""
        played = [c for c in current_trick if c != -1]
        if not played:
            return None
        first_color = color_of_card[played[0]]

        def rank_value(card):
            color = color_of_card[card]
            offset = offset_of_card[card]
            # Simple numeric order: trump beats others
            base = 100 - offset  # A small numeric advantage for high ranks
            if color == trump:
                base += 100
            elif color != first_color:
                base -= 100
            return base

        return max(played, key=rank_value)


In [102]:
arena = Arena(nr_games_to_play=100)
arena.set_players(MyAdvancedRuleBasedAgent(), MyRuleBasedAgent(),
                  MyAdvancedRuleBasedAgent(), MyRuleBasedAgent())
arena.play_all_games()

print("Team 0:", arena.points_team_0.sum(), "Team 1:", arena.points_team_1.sum())


[........................................]  100/ 100 games played
Team 0: 8198.0 Team 1: 7502.0


In [103]:
class MyMemoryAgent(MyAdvancedRuleBasedAgent):
    def __init__(self):
        super().__init__()
        self._rule = RuleSchieber()
        self._played_cards = set()  # store integer card IDs that were played

    def action_play_card(self, obs):
        # --- Update memory with all cards visible in this trick ---
        self._update_memory(obs)

        valid_cards = self._rule.get_valid_cards_from_obs(obs)
        valid_indices = np.flatnonzero(valid_cards)
        trump = obs.trump
        current_trick = obs.current_trick
        first_card = next((c for c in current_trick if c != -1), None)

        def get_color(card): return color_of_card[card]
        def get_rank(card): return offset_of_card[card]

        # --- If you are starting the trick ---
        if first_card is None:
            # Prefer to play a color where many of its cards are already gone
            remaining_by_color = self._count_remaining_by_color()
            # pick the color with fewest cards left in the deck
            color_to_play = min(remaining_by_color, key=remaining_by_color.get)
            color_cards = [c for c in valid_indices if get_color(c) == color_to_play]
            if color_cards:
                return self._play_highest(color_cards)
            return self._play_lowest(valid_indices)

        # --- Follow-color logic remains the same ---
        first_color = get_color(first_card)
        same_color = [c for c in valid_indices if get_color(c) == first_color]
        trump_cards = [c for c in valid_indices if get_color(c) == trump]
        others = [c for c in valid_indices if c not in same_color + trump_cards]

        # Follow color if possible
        if same_color:
            return self._play_best_in_color(same_color, current_trick, trump)

        # No same color: check partner winning
        partner_pos = (obs.player + 2) % 4
        partner_card = current_trick[partner_pos]
        winning_card = self._get_winning_card(current_trick, trump)
        partner_winning = (partner_card != -1 and winning_card == partner_card)

        if partner_winning:
            # discard lowest from a color already depleted
            safe_colors = self._get_depleted_colors()
            safe_discards = [c for c in others if get_color(c) in safe_colors]
            if safe_discards:
                return self._play_lowest(safe_discards)
            return self._play_lowest(others or trump_cards)

        # Play trump if not partner winning
        if trump_cards:
            puur_index = self._get_puur_index(trump)
            non_puur_trumps = [c for c in trump_cards if c != puur_index]
            if non_puur_trumps:
                return self._play_lowest(non_puur_trumps)
            return self._play_lowest(trump_cards)

        # Otherwise discard weakest
        return self._play_lowest(others)

    # --- MEMORY HELPERS ---
    def _update_memory(self, obs):
        """Add all visible cards from tricks and table to memory."""
        # Current trick cards
        for c in obs.current_trick:
            if c != -1:
                self._played_cards.add(c)
        # Previous tricks
        for t in obs.tricks:
            for c in t:
                if c != -1:
                    self._played_cards.add(c)

    def _count_remaining_by_color(self):
        """Count how many cards remain unseen per color."""
        counts = {0: 0, 1: 0, 2: 0, 3: 0}
        for card in range(36):
            if card not in self._played_cards:
                color = color_of_card[card]
                counts[color] += 1
        return counts

    def _get_depleted_colors(self):
        """Return colors with few cards remaining (likely void in others)."""
        counts = self._count_remaining_by_color()
        return [c for c, count in counts.items() if count <= 4]

    def _play_best_in_color(self, cards, current_trick, trump):
        """Decide whether to play high or low within the color."""
        winning_card = self._get_winning_card(current_trick, trump)
        first_color = color_of_card[current_trick[0]]
        # if no one played a higher card yet, play high
        if winning_card is None or color_of_card[winning_card] != first_color:
            return self._play_highest(cards)
        else:
            higher = [c for c in cards if offset_of_card[c] < offset_of_card[winning_card]]
            if higher:
                return self._play_highest(higher)
            return self._play_lowest(cards)


In [104]:
arena = Arena(nr_games_to_play=100)
arena.set_players(MyMemoryAgent(), AgentRandomSchieber(),
                  MyMemoryAgent(), AgentRandomSchieber())
arena.play_all_games()

print("Team 0:", arena.points_team_0.sum(), "Team 1:", arena.points_team_1.sum())


[........................................]  100/ 100 games played
Team 0: 9034.0 Team 1: 6666.0


In [105]:
import copy
import numpy as np

# --- Helpers you likely already have in your notebook ---
# team of a player: 0 for players {0,2}, 1 for players {1,3}
def team_of(player_id: int) -> int:
    return 0 if player_id % 2 == 0 else 1

# Get valid moves from full information.
# If your RuleSchieber exposes "get_valid_cards_from_state", use that.
# Otherwise, ask the GameSim directly for valid moves.
def get_valid_moves(rule, game) -> np.ndarray:
    valid_mask = rule.get_valid_cards_from_obs(game.get_observation())
    moves = np.flatnonzero(valid_mask)
    return moves[:2]  # try fewer for speed

# Play a card on a cloned game and return the clone
def play_on_clone(game, card):
    """
    Create a lightweight clone of GameSim by deep-copying it once,
    play 'card' on the copy, and return the new GameSim.
    """
    # Deepcopy the full object – slow but works for property-based design
    g2 = copy.deepcopy(game)
    # Execute the card on the cloned state
    g2.action_play_card(card)
    return g2

# Did the current trick finish? (4 cards played)
def trick_finished(game) -> bool:
    """
    Return True if 4 cards have been played in the current trick.
    Handles None-type current_trick for start of game/trick.
    """
    trick = game.state.current_trick
    if trick is None:
        return False  # no trick yet, so not finished
    return all(c != -1 for c in trick)

# Points of the *last completed* trick, split by teams.
# If RuleSchieber exposes a direct method, use it. Otherwise, use state fields.
def last_trick_points_by_team(game, rule):
    # In Schieber, points are accumulated in state.points during scoring, but we need just this trick.
    # Workaround approach:
    # - When trick finishes, RuleSchieber internally scores and adds to state.points.
    # - So: before simulating a playout branch, capture points; after trick completion, diff the points.
    # We implement this by taking the delta externally in minimax (see below).
    return tuple(game.state.points)  # return full cumulative points for delta computation

# Returns current trick leader’s winning card and winner player index.
# Most rule versions have something like rule.winning_card / rule.winner_of_trick.
def current_trick_winner(rule, game):
    trick = game.state.current_trick
    if any(c == -1 for c in trick):
        return None, None
    # If available:
    # winner = rule.winner_of_trick(trick, game.state.trump, game.state.first_player)
    # return trick[winner_index_in_trick], winner_global_player_id
    # Fallback: many rule APIs provide a direct winner; if not, you can compute, but that’s not needed for scoring here.
    return None, None

# Order moves (optional): try higher trump first, then high same-color, then others.
def move_order_heuristic(moves):
    # Simple: identity. You can add smarter ordering if you want pruning to work better.
    return moves


# -----------------------------
# Minimax for CURRENT TRICK ONLY
# -----------------------------
def choose_card_minimax_current_trick(game, rule):
    """
    Cheating: picks the card that maximizes points in THIS trick only.
    """
    root_player = game.state.player  # player to act now
    root_team = team_of(root_player)

    # Snapshot team points now – we’ll compute trick-only points as a delta after trick completes.
    base_points = tuple(game.state.points)

    best_value = -1e9
    best_card = None

    for card in move_order_heuristic(get_valid_moves(rule, game)):
        g2 = play_on_clone(game, card)
        value = _minimax_trick(g2, rule, root_team, base_points)
        if value > best_value:
            best_value = value
            best_card = card

    return best_card

MAX_DEPTH = 10  # just to be safe

def _minimax_trick(game, rule, root_team, base_points, depth=0):
    if depth > MAX_DEPTH:
        return 0  # stop evaluating further

    trick = game.state.current_trick
    if trick is None:
        return 0
    if all(c != -1 for c in trick):
        pts_after = tuple(game.state.points)
        trick_delta_team0 = pts_after[0] - base_points[0]
        trick_delta_team1 = pts_after[1] - base_points[1]
        return trick_delta_team0 - trick_delta_team1 if root_team == 0 else trick_delta_team1 - trick_delta_team0

    player_to_act = game.state.player
    maximizing = (team_of(player_to_act) == root_team)
    best = -1e9 if maximizing else 1e9

    for card in get_valid_moves(rule, game):
        g2 = play_on_clone(game, card)
        val = _minimax_trick(g2, rule, root_team, base_points, depth+1)
        if maximizing:
            best = max(best, val)
        else:
            best = min(best, val)
    return best


# -------------------------------------------
# Minimax over N COMPLETE TRICKS (with alpha-beta)
# -------------------------------------------
def choose_card_minimax_n_tricks(game, rule, n_tricks=2, use_alpha_beta=True):
    """
    Cheating: choose card maximizing points over the next n complete tricks.
    Uses alpha-beta pruning by default.
    """
    root_player = game.state.player
    root_team = team_of(root_player)

    best_value = -1e9
    best_card = None

    alpha, beta = -1e9, 1e9

    for card in move_order_heuristic(get_valid_moves(rule, game)):
        g2 = play_on_clone(game, card)
        val = _minimax_n_tricks(
            g2, rule, root_team, n_tricks=n_tricks,
            alpha=alpha, beta=beta, use_alpha_beta=use_alpha_beta
        )
        if val > best_value:
            best_value = val
            best_card = card
        if use_alpha_beta:
            alpha = max(alpha, val)

    return best_card


def _minimax_n_tricks(game, rule, root_team, n_tricks, alpha, beta, use_alpha_beta):
    # Base cases:
    if n_tricks <= 0:
        # Value is current total point difference
        pts0, pts1 = game.state.points
        return (pts0 - pts1) if root_team == 0 else (pts1 - pts0)

    # If current trick finished, just continue with one fewer trick to simulate
    if trick_finished(game):
        # Advance to next trick automatically handled by GameSim;
        # points already added into game.state.points.
        return _minimax_n_tricks(game, rule, root_team, n_tricks - 1, alpha, beta, use_alpha_beta)

    player_to_act = game.state.player
    maximizing = (team_of(player_to_act) == root_team)

    if maximizing:
        best = -1e9
        for card in move_order_heuristic(get_valid_moves(rule, game)):
            g2 = play_on_clone(game, card)
            val = _minimax_n_tricks(g2, rule, root_team, n_tricks, alpha, beta, use_alpha_beta)
            best = max(best, val)
            if use_alpha_beta:
                alpha = max(alpha, val)
                if beta <= alpha:
                    break  # beta cut
        return best
    else:
        best = 1e9
        for card in move_order_heuristic(get_valid_moves(rule, game)):
            g2 = play_on_clone(game, card)
            val = _minimax_n_tricks(g2, rule, root_team, n_tricks, alpha, beta, use_alpha_beta)
            best = min(best, val)
            if use_alpha_beta:
                beta = min(beta, val)
                if beta <= alpha:
                    break  # alpha cut
        return best


# --------------------------
# A cheating Minimax Agent
# --------------------------
# This agent expects to be given the FULL game (GameSim) at selection time,
# so you can use it in your own loop like:
# best = MinimaxTrickAgent(rule).select_card(game)  # cheating
class MinimaxTrickAgent:
    def __init__(self, rule, n_tricks=1, use_alpha_beta=True):
        self.rule = rule
        self.n_tricks = n_tricks
        self.use_alpha_beta = use_alpha_beta

    def select_card(self, game):
        if self.n_tricks == 1:
            return choose_card_minimax_current_trick(game, self.rule)
        else:
            return choose_card_minimax_n_tricks(game, self.rule, n_tricks=self.n_tricks, use_alpha_beta=self.use_alpha_beta)


In [106]:
# Example: play one trick (or the whole game loop) with cheating Minimax.
rule = RuleSchieber()
game = GameSim(rule=rule)
np.random.seed(1)
game.init_from_cards(hands=deal_random_hand(), dealer=NORTH)

agent = MinimaxTrickAgent(rule, n_tricks=2)  # current-trick greedy
# OR: agent = MinimaxTrickAgent(rule, n_tricks=2, use_alpha_beta=True)

# If you want to simulate a full game using this agent for your seat only:
while not game.is_done():
    if game.state.player == 0:  # say you're player 0
        card = agent.select_card(game)   # cheating: full info from 'game'
    else:
        # Opponents/partner can be random or your other agents
        valid = np.flatnonzero(rule.get_valid_cards_from_obs(game.get_observation()))
        card = np.random.choice(valid)
    game.action_play_card(card)

print("Final points:", game.state.points)


Final points: [97 60]


In [107]:
import time

rule = RuleSchieber()
game = GameSim(rule=rule)
game.init_from_cards(hands=deal_random_hand(), dealer=NORTH)

agent = MinimaxTrickAgent(rule, n_tricks=1)

start = time.time()
card = agent.select_card(game)
print("Chosen card:", card, "took", round(time.time() - start, 2), "seconds")


Chosen card: 0 took 0.11 seconds


In [108]:
import copy, math, numpy as np

# -------- Helpers (reuse your earlier ones) --------
def team_of(pid: int) -> int:
    return 0 if pid % 2 == 0 else 1

def get_valid_moves(rule, game) -> np.ndarray:
    mask = rule.get_valid_cards_from_obs(game.get_observation())
    return np.flatnonzero(mask)

def trick_finished(game) -> bool:
    trick = game.state.current_trick
    if trick is None:
        return False
    return all(c != -1 for c in trick)

def clone_and_play(game, card):
    g2 = copy.deepcopy(game)
    g2.action_play_card(card)
    return g2

def simulate_random_until(rule, game, n_tricks=1):
    """Simulate randomly until current trick ends (n_tricks=1) or for n_tricks completed."""
    tricks_left = n_tricks
    start_tricks = game.state.nr_tricks
    while True:
        if trick_finished(game):
            # one trick completed -> engine will start next trick on next play
            if game.state.nr_tricks >= start_tricks + tricks_left:
                break
        if game.is_done():
            break
        moves = get_valid_moves(rule, game)
        if len(moves) == 0:
            break
        card = np.random.choice(moves)
        game.action_play_card(card)

def value_from_points(points, root_team: int) -> int:
    t0, t1 = points
    return (t0 - t1) if root_team == 0 else (t1 - t0)

# ----------------- MCTS Node -----------------
class MCTSNode:
    __slots__ = ("parent", "children", "N", "W", "Q", "untried")

    def __init__(self, parent=None, untried=None):
        self.parent = parent
        self.children = {}      # action -> node
        self.N = 0              # visits
        self.W = 0.0            # total value
        self.Q = 0.0            # mean value
        self.untried = list(untried) if untried is not None else []

    def uct_select(self, c=1.414):
        """Select child with maximal UCT."""
        log_Np = math.log(self.N + 1e-9)
        def uct(a_node):
            a, node = a_node
            return node.Q + c * math.sqrt(log_Np / (node.N + 1e-9))
        return max(self.children.items(), key=uct)

    def expand(self, action, child_untried):
        child = MCTSNode(parent=self, untried=child_untried)
        self.children[action] = child
        if action in self.untried:
            self.untried.remove(action)
        return child

    def backprop(self, value):
        n = self
        while n is not None:
            n.N += 1
            n.W += value
            n.Q = n.W / n.N
            n = n.parent

# --------------- Cheating MCTS agent ---------------
class MCTSAgentCheating:
    def __init__(self, rule, iterations=1000, c=1.414, horizon_tricks=1):
        self.rule = rule
        self.iterations = iterations
        self.c = c
        self.horizon_tricks = horizon_tricks

    def select_card(self, game):
        root_team = team_of(game.state.player)
        root = MCTSNode(untried=get_valid_moves(self.rule, game))

        if len(root.untried) == 1:
            return root.untried[0]

        for _ in range(self.iterations):
            # Selection
            node = root
            g = copy.deepcopy(game)
            # traverse down using UCT while no untried and children exist
            while not node.untried and node.children:
                action, node = node.uct_select(self.c)
                g.action_play_card(action)

            # Expansion
            if node.untried:
                a = np.random.choice(node.untried)
                g.action_play_card(a)
                child_untried = get_valid_moves(self.rule, g)
                node = node.expand(a, child_untried)

            # Simulation
            simulate_random_until(self.rule, g, n_tricks=self.horizon_tricks)

            # Value (team diff from root perspective)
            val = value_from_points(g.state.points, root_team)

            # Backprop
            node.backprop(val)

        # Pick action with highest visit count (robust)
        best_a = max(root.children.items(), key=lambda kv: kv[1].N)[0]
        return best_a


In [109]:
rule = RuleSchieber()
game = GameSim(rule=rule)
np.random.seed(1)
game.init_from_cards(hands=deal_random_hand(), dealer=NORTH)

mcts = MCTSAgentCheating(rule, iterations=1500, c=1.2, horizon_tricks=1)  # trick MCTS
# or horizon_tricks=2..3 for deeper impact

while not game.is_done():
    if game.state.player == 0:
        card = mcts.select_card(game)
    else:
        valid = get_valid_moves(rule, game)
        card = np.random.choice(valid)
    game.action_play_card(card)

print("Final points:", game.state.points)


Final points: [112  45]


In [110]:
# ---- Utilities to reconstruct hidden hands consistent with observation ----
def observed_played_cards(obs):
    played = set()
    for trick in obs.tricks:
        for c in trick:
            if c != -1:
                played.add(c)
    for c in obs.current_trick:
        if c != -1:
            played.add(c)
    return played


def sample_determinization(obs, rng=np.random):
    """Return a fully specified GameSim (full-information state) consistent with a GameObservation."""
    my_id = obs.player
    my_hand = np.flatnonzero(obs.hand)  # cards in my hand

    # --- 1. Collect played cards ---
    played = set()
    for trick in obs.tricks:
        for c in trick:
            if c != -1:
                played.add(int(c))
    for c in obs.current_trick:
        if c != -1:
            played.add(int(c))

    all_cards = set(range(36))
    remaining = list(all_cards - set(my_hand) - played)
    rng.shuffle(remaining)

    # --- 2. Count cards already played per player ---
    cards_played_by = [0, 0, 0, 0]
    # figure out which trick first player index to use
    current_first_player = int(obs.trick_first_player[obs.nr_tricks])

    for t_index, t in enumerate(obs.tricks):
        first_p = int(obs.trick_first_player[t_index])
        for i, c in enumerate(t):
            if c != -1:
                pid = (first_p + i) % 4
                cards_played_by[pid] += 1

    for i, c in enumerate(obs.current_trick):
        if c != -1:
            pid = (current_first_player + i) % 4
            cards_played_by[pid] += 1

    # --- 3. Determine how many cards each still needs ---
    need = [9 - cards_played_by[p] for p in range(4)]
    need[my_id] = len(my_hand)

    # --- 4. Initialize hands (2D array) ---
    cursor = 0
    hands = np.zeros((4, 36), dtype=np.int8)
    hands[my_id, my_hand] = 1
    opp_ids = [p for p in range(4) if p != my_id]

    for pid in opp_ids:
        k = need[pid]
        assign = remaining[cursor:cursor + k]
        cursor += k
        hands[pid, assign] = 1

    # --- 5. Build GameSim with copied state ---
    g = GameSim(rule=RuleSchieber())
    s = g.state
    s.hands = hands
    s.player = obs.player
    s.trump = obs.trump
    s.points = np.array(obs.points, dtype=np.int16)
    s.dealer = obs.dealer
    s.nr_tricks = obs.nr_tricks
    s.nr_played_cards = obs.nr_played_cards
    s.nr_cards_in_trick = obs.nr_cards_in_trick
    s.trick_first_player = np.array(obs.trick_first_player, dtype=np.int16)
    s.trick_points = np.array(obs.trick_points, dtype=np.int16)
    s.trick_winner = np.array(obs.trick_winner, dtype=np.int16)
    s.current_trick = np.array(obs.current_trick, dtype=np.int16)
    s.tricks = np.array(obs.tricks, dtype=np.int16)
    s.forehand = obs.forehand
    s.declared_trump = obs.declared_trump

    return g

# ---- Determinization wrapper ----
class MCTSAgentDeterminized:
    def __init__(self, iterations_per_det=300, n_dets=20, c=1.2, horizon_tricks=1):
        self.iterations_per_det = iterations_per_det
        self.n_dets = n_dets
        self.c = c
        self.horizon_tricks = horizon_tricks

    def action_play_card(self, obs):
        # aggregate visits over determinization runs
        visit_counts = {}
        for _ in range(self.n_dets):
            g_det = sample_determinization(obs)
            mcts = MCTSAgentCheating(RuleSchieber(),
                                     iterations=self.iterations_per_det,
                                     c=self.c,
                                     horizon_tricks=self.horizon_tricks)
            # one “select_card” without advancing original obs
            a = mcts.select_card(g_det)
            visit_counts[a] = visit_counts.get(a, 0) + 1

        # choose action with most votes
        best = max(visit_counts.items(), key=lambda kv: kv[1])[0]
        return best


In [111]:
class ISMCTSAgent:
    def __init__(self, iterations=2000, c=1.2, horizon_tricks=1):
        self.iterations = iterations
        self.c = c
        self.horizon_tricks = horizon_tricks

    def action_play_card(self, obs):
        root_team = team_of(obs.player)
        # Root node keyed by "information set" = empty history
        root = MCTSNode(untried=self._valid_from_obs(obs))

        # If only one legal move, play it
        if len(root.untried) == 1:
            return root.untried[0]

        for _ in range(self.iterations):
            # 1) Sample one determinization consistent with obs
            g = sample_determinization(obs)

            # 2) Selection/Expansion on a single open-loop tree
            node = root
            while not node.untried and node.children:
                action, node = node.uct_select(self.c)
                g.action_play_card(action)

            if node.untried:
                a = np.random.choice(node.untried)
                g.action_play_card(a)
                node = node.expand(a, self._valid_from_state(g))

            # 3) Rollout
            simulate_random_until(RuleSchieber(), g, self.horizon_tricks)

            # 4) Backprop with value from this determinization
            val = value_from_points(g.state.points, root_team)
            node.backprop(val)

        # robust child – highest visit count
        return max(root.children.items(), key=lambda kv: kv[1].N)[0]

    def _valid_from_obs(self, obs):
        mask = RuleSchieber().get_valid_cards_from_obs(obs)
        return np.flatnonzero(mask)

    def _valid_from_state(self, game):
        mask = RuleSchieber().get_valid_cards_from_obs(game.get_observation())
        return np.flatnonzero(mask)


In [112]:
class AgentMCTSDeterminized(Agent):
    def __init__(self, iterations_per_det=300, n_dets=10, c=1.2, horizon_tricks=1):
        super().__init__()
        self._rule = RuleSchieber()
        self._core = MCTSAgentDeterminized(
            iterations_per_det=iterations_per_det,
            n_dets=n_dets,
            c=c,
            horizon_tricks=horizon_tricks
        )

    def action_trump(self, obs):
        # Simple: never schiebe for testing (or random trump)
        from jass.game.const import HEARTS
        return HEARTS

    def action_play_card(self, obs):
        return self._core.action_play_card(obs)


In [113]:
class AgentISMCTS(Agent):
    def __init__(self, iterations=2000, c=1.2, horizon_tricks=1):
        super().__init__()
        self._rule = RuleSchieber()
        self._core = ISMCTSAgent(iterations=iterations, c=c, horizon_tricks=horizon_tricks)

    def action_trump(self, obs):
        from jass.game.const import HEARTS
        return HEARTS

    def action_play_card(self, obs):
        return self._core.action_play_card(obs)

In [114]:
arena = Arena(nr_games_to_play=2)
arena.set_players(
    AgentMCTSDeterminized(), AgentISMCTS(),
    AgentMCTSDeterminized(), AgentISMCTS()
)
arena.play_all_games()

print("Team0:", arena.points_team_0.sum(), "Team1:", arena.points_team_1.sum())



Team0: 175.0 Team1: 139.0


In [127]:
import importlib
import jass.agents.agent_mlmcts
import jass.agents.agent_mcts_determinized
importlib.reload(jass.agents.agent_mlmcts)
importlib.reload(jass.agents.agent_mcts_determinized)

from jass.agents.agent_mlmcts import AgentMLMCTS, TrumpPredictor, MCTSAdapter
from jass.agents.agent_mcts_determinized import MCTSAgentDeterminized
from jass.arena.arena import Arena

mcts = MCTSAgentDeterminized(iterations_per_det=50, n_dets=5)
mcts_adapter = MCTSAdapter(mcts)
trump_predictor = TrumpPredictor("jass/agents/trump_model.joblib")
bot = AgentMLMCTS(trump_predictor, mcts_adapter)

arena = Arena(nr_games_to_play=5)
arena.set_players(bot, AgentISMCTS(), bot, AgentISMCTS())
arena.play_all_games()

print("Team0:", arena.points_team_0.sum(), "Team1:", arena.points_team_1.sum())

[........................................]    5/   5 games played
Team0: 464.0 Team1: 321.0


In [130]:
import requests
r = requests.post("http://10.155.121.147:5000/play", json={
    "trump": -1,
    "forehand": 1,
    "hand": [1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
    "player": 0
})
print(r.status_code, r.text)


200 {"action":5}

