In [1]:
import libriichi
help(libriichi)

Help on module libriichi:

NAME
    libriichi

DESCRIPTION
    This module provides implementations of the riichi mahjong including the
    following features:

    - The core feature - player state maintenance driven by mjai events (via
      `state.PlayerState`).
    - Read mjai logs and produce a batch of instances for training (via
      `dataset`).
    - Self-play under standard Tenhou rules (via `arena`).
    - Definitions of observation and action space for Mortal (via `consts`).
    - Statistical works on mjai logs (via `stat.Stat`).
    - mjai interface (via `mjai.Bot`).

DATA
    __all__ = ['__profile__', '__version__', 'consts', 'state', 'dataset',...
    __profile__ = 'release'

VERSION
    0.1.0

FILE
    e:\code\mortal\mortal\libriichi.pyd




In [2]:
loader = libriichi.dataset.GameplayLoader(version = 4)

In [55]:
import libriichi  # 假设你的 Rust 模块名为 libriichi
import logging
import gzip

def load_and_process_gameplay(rust_loader, mjai_log_path):
    """
    加载并处理单个 mjai 日志文件的 Gameplay 数据。
    """
    try:
        with gzip.open(mjai_log_path, "rt") as f:
            log_content = f.read()
        gameplays = rust_loader.load_log(log_content)
        logging.info(f"Successfully loaded {len(gameplays)} Gameplay instances from {mjai_log_path}")
        return gameplays
    except Exception as e:
        logging.error(f"Error loading log {mjai_log_path}: {e}")
        return None


# 示例：初始化 Rust 的 GameplayLoader
loaders = []
for version in [1, 2, 3, 4]:
    loader = libriichi.dataset.GameplayLoader(version=version, augmented = False)
    loaders.append(loader)

# 示例：加载并处理一个转换后的 mjai 日志文件
mjai_log_path = "./dataset/train/20230101.html/2023010100gm-00a9-0000-7fcb3f13.json.gz"
gameplays_v = []
for loader in loaders:
    gameplays = load_and_process_gameplay(loader, mjai_log_path)
    if gameplays is not None:
        gameplays_v.append(gameplays)

gameplay = gameplays_v[0][0]
obs = gameplay.take_obs()
invisible_obs = gameplay.take_invisible_obs()
actions = gameplay.take_actions()
at_kyoku = gameplay.take_at_kyoku()
dones = gameplay.take_dones()
apply_gamma = gameplay.take_apply_gamma()
at_turns = gameplay.take_at_turns()
grp = gameplay.take_grp()
player_id = gameplay.take_player_id()
shantens = gameplay.take_shantens()
masks = gameplay.take_masks()

# grp
feature = grp.take_feature()
rank_by_player = grp.take_rank_by_player()
final_scores = grp.take_final_scores()


In [56]:
player_id = 0
move = 0
print("手牌")
print(invisible_obs[move][player_id*15:player_id*15+4])
print("aka:")
print(invisible_obs[move][player_id*15+4:player_id*15+7])
print("shanten:")
print(invisible_obs[move][player_id*15+7:player_id*15+13])
print("wait:")
print(invisible_obs[move][player_id*15+13])
print("furiten:")
print(invisible_obs[move][player_id*15+14])



手牌
[[0. 0. 1. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0.
  0. 1. 1. 0. 0. 0. 0. 1. 1. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
aka:
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
shanten:
[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1

In [140]:
obss = []
for gameplays in gameplays_v:
    for gameplay in gameplays:
        obss.append(gameplay.take_obs())
gameplays = gameplays_v[0]
actions = []
for gameplay in gameplays:
    actions.append(gameplay.take_actions())

In [None]:
version = 1
player_id = 1
decision_points_idx = 39
# print("obs len:", len(obss[(version-1)*4+player_id]))
# print("手牌编码:\n", obss[(version-1)*4+player_id][decision_points_idx][0:7])
# print("点数编码:\n", obss[(version-1)*4+player_id][decision_points_idx][7:11])
# print("排名编码:\n", obss[(version-1)*4+player_id][decision_points_idx][11:15])
# print("局数编码:\n", obss[(version-1)*4+player_id][decision_points_idx][15:19])
# print("本场和供托编码:\n", obss[(version-1)*4+player_id][decision_points_idx][19:39])
# print("场风与自风编码:\n", obss[(version-1)*4+player_id][decision_points_idx][39:41])
print("dora指示牌编码:\n", obss[(version-1)*4+player_id][decision_points_idx][41:48])
print("牌河的编码以四个通道一组，分别表示杠牌，舍牌，红宝牌和普通dora牌")
print("自家牌河编码(前6):\n")
for i in range(6):
    print(obss[(version-1)*4+player_id][decision_points_idx][48+i*4:48+i*4+4])
    print("--------------------------------------------------------------------")

dora指示牌编码:
 [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
牌河的编码以四个通道一组，分别表示杠牌，舍牌，红宝牌和普通dora牌
自家牌河编码(前6):

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0

In [97]:
print("actions length:", len(actions[0]))
print("player0:\n", actions[0])

actions length: 69
player0:
 [9, 29, 0, 8, 31, 15, 12, 45, 11, 20, 27, 45, 23, 17, 37, 3, 45, 28, 31, 27, 33, 2, 27, 19, 45, 20, 45, 3, 3, 18, 27, 30, 27, 33, 45, 10, 33, 32, 27, 26, 31, 28, 5, 29, 0, 45, 15, 20, 2, 18, 40, 3, 32, 15, 24, 0, 8, 31, 9, 18, 45, 20, 8, 45, 37, 24, 21, 24, 43]


### 生成用于训练grp模型的数据

In [5]:
from libriichi.dataset import Grp
import glob
import torch

buffer = []
file_list = glob.glob("dataset/train/**/*.json.gz", recursive=True)
print("file_list length:", len(file_list))
data = Grp.load_gz_log_files(file_list[0:10])
for game in data:
    feature = game.take_feature()
    rank_by_player = game.take_rank_by_player()

    for i in range(feature.shape[0]):
        inputs_seq = torch.as_tensor(feature[:i + 1], dtype=torch.float64)
        buffer.append((
            inputs_seq,
            rank_by_player,
        ))
    break

file_list length: 4142


In [8]:
print("buffer length:", len(buffer))
print("buffer[0]:", buffer[0])
print("buffer[1]:", buffer[1])

buffer length: 12
buffer[0]: (tensor([[0.0000, 0.0000, 0.0000, 2.5000, 2.5000, 2.5000, 2.5000]],
       dtype=torch.float64), [3, 0, 1, 2])
buffer[1]: (tensor([[0.0000, 0.0000, 0.0000, 2.5000, 2.5000, 2.5000, 2.5000],
        [1.0000, 0.0000, 0.0000, 2.5000, 2.6000, 2.5000, 2.4000]],
       dtype=torch.float64), [3, 0, 1, 2])


In [13]:
from libriichi.dataset import Grp
from train_grp import GrpFileDatasetsIter
import torch

file_list = glob.glob("dataset/train/**/*.json.gz", recursive=True)
dataloader = GrpFileDatasetsIter(file_list, file_batch_size=50, cycle=False)
for i, (inputs_seq, rank_by_player) in enumerate(dataloader):
    print("inputs_seq:", inputs_seq)
    print("rank_by_player:", rank_by_player)
    break

inputs_seq: tensor([[0.0000, 0.0000, 0.0000, 2.5000, 2.5000, 2.5000, 2.5000],
        [1.0000, 0.0000, 0.0000, 2.5000, 2.1100, 2.4000, 2.9900],
        [2.0000, 0.0000, 0.0000, 2.3000, 1.7200, 2.2000, 3.7800],
        [2.0000, 1.0000, 0.0000, 2.2500, 1.6700, 2.3500, 3.7300],
        [2.0000, 2.0000, 0.0000, 1.8500, 1.2700, 3.5500, 3.3300],
        [3.0000, 0.0000, 0.0000, 1.5300, 1.5900, 3.5500, 3.3300],
        [4.0000, 0.0000, 0.0000, 2.3300, 1.5900, 2.7500, 3.3300],
        [4.0000, 1.0000, 0.0000, 3.6000, 1.1000, 2.3600, 2.9400]],
       dtype=torch.float64)
rank_by_player: [1, 3, 2, 0]


## 验证train.py的代码

In [59]:
import prelude

import logging
import sys
import os
import gc
import gzip
import json
import shutil
import random
import torch
from os import path
from glob import glob
from datetime import datetime
from itertools import chain
from torch import optim, nn
from torch.amp import GradScaler
from torch.nn.utils import clip_grad_norm_
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from common import submit_param, parameter_count, drain, filtered_trimmed_lines, tqdm
from player import TestPlayer
from dataloader import FileDatasetsIter, worker_init_fn
from lr_scheduler import LinearWarmUpCosineAnnealingLR
from model import Brain, DQN, AuxNet
from libriichi.consts import obs_shape
from config import config

version = config['control']['version']

online = config['control']['online']
batch_size = config['control']['batch_size']
opt_step_every = config['control']['opt_step_every']
save_every = config['control']['save_every']
test_every = config['control']['test_every']
submit_every = config['control']['submit_every']
test_games = config['test_play']['games']
min_q_weight = config['cql']['min_q_weight']
next_rank_weight = config['aux']['next_rank_weight']
assert save_every % opt_step_every == 0
assert test_every % save_every == 0

device = torch.device(config['control']['device'])
torch.backends.cudnn.benchmark = config['control']['enable_cudnn_benchmark']
enable_amp = config['control']['enable_amp']
enable_compile = config['control']['enable_compile']

pts = config['env']['pts']
gamma = config['env']['gamma']
file_batch_size = config['dataset']['file_batch_size']
reserve_ratio = config['dataset']['reserve_ratio']
num_workers = config['dataset']['num_workers']
num_epochs = config['dataset']['num_epochs']
enable_augmentation = config['dataset']['enable_augmentation']
augmented_first = config['dataset']['augmented_first']
eps = config['optim']['eps']
betas = config['optim']['betas']
weight_decay = config['optim']['weight_decay']
max_grad_norm = config['optim']['max_grad_norm']

mortal = Brain(version=version, **config['resnet']).to(device)
dqn = DQN(version=version).to(device)
aux_net = AuxNet((4,)).to(device)
all_models = (mortal, dqn, aux_net)
if enable_compile:
    for m in all_models:
        m.compile()

logging.info(f'version: {version}')
logging.info(f'obs shape: {obs_shape(version)}')
logging.info(f'mortal params: {parameter_count(mortal):,}')
logging.info(f'dqn params: {parameter_count(dqn):,}')
logging.info(f'aux params: {parameter_count(aux_net):,}')

mortal.freeze_bn(config['freeze_bn']['mortal'])

decay_params = []
no_decay_params = []
for model in all_models:
    params_dict = {}
    to_decay = set()
    for mod_name, mod in model.named_modules():
        for name, param in mod.named_parameters(prefix=mod_name, recurse=False):
            params_dict[name] = param
            if isinstance(mod, (nn.Linear, nn.Conv1d)) and name.endswith('weight'):
                to_decay.add(name)
    decay_params.extend(params_dict[name] for name in sorted(to_decay))
    no_decay_params.extend(params_dict[name] for name in sorted(params_dict.keys() - to_decay))
param_groups = [
    {'params': decay_params, 'weight_decay': weight_decay},
    {'params': no_decay_params},
]
optimizer = optim.AdamW(param_groups, lr=1, weight_decay=0, betas=betas, eps=eps)
scheduler = LinearWarmUpCosineAnnealingLR(optimizer, **config['optim']['scheduler'])
scaler = GradScaler(device.type, enabled=enable_amp)
test_player = TestPlayer()
best_perf = {
    'avg_rank': 4.,
    'avg_pt': -135.,
}

steps = 0
state_file = config['control']['state_file']
best_state_file = config['control']['best_state_file']
if path.exists(state_file):
    state = torch.load(state_file, weights_only=True, map_location=device)
    timestamp = datetime.fromtimestamp(state['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
    logging.info(f'loaded: {timestamp}')
    mortal.load_state_dict(state['mortal'])
    dqn.load_state_dict(state['current_dqn'])
    aux_net.load_state_dict(state['aux_net'])
    if not online or state['config']['control']['online']:
        optimizer.load_state_dict(state['optimizer'])
        scheduler.load_state_dict(state['scheduler'])
    scaler.load_state_dict(state['scaler'])
    best_perf = state['best_perf']
    steps = state['steps']

optimizer.zero_grad(set_to_none=True)
mse = nn.MSELoss()
ce = nn.CrossEntropyLoss()

if device.type == 'cuda':
    logging.info(f'device: {device} ({torch.cuda.get_device_name(device)})')
else:
    logging.info(f'device: {device}')

if online:
    submit_param(mortal, dqn, is_idle=True)
    logging.info('param has been submitted')

writer = SummaryWriter(config['control']['tensorboard_dir'])
stats = {
    'dqn_loss': 0,
    'cql_loss': 0,
    'next_rank_loss': 0,
}
all_q = torch.zeros((save_every, batch_size), device=device, dtype=torch.float32)
all_q_target = torch.zeros((save_every, batch_size), device=device, dtype=torch.float32)
idx = 0

AssertionError: Torch not compiled with CUDA enabled