In [111]:
import numpy as np
from collections import Counter

In [112]:
ACTION_HEAD_ORDER: list[str] = [
    'discard',       # uses tile head
    'riichi',        # uses tile head
    'tsumo',
    'ron',
    'pass',
    'kan',           # uses tile head (kakan/ankan pass tile; daiminkan uses no-op)
    'chi_low_noaka',
    'chi_mid_noaka',
    'chi_high_noaka',
    'chi_low_aka',
    'chi_mid_aka',
    'chi_high_aka',
    'pon_noaka',
    'pon_aka',
]
ACTION_HEAD_INDEX = {name: i for i, name in enumerate(ACTION_HEAD_ORDER)}

In [113]:
def perf_metrics(data_file):
    data_zip = np.load(data_file, allow_pickle=True)
    print("Mean Return:", np.mean(np.abs(data_zip["returns"])))
    print("Return fraction:", np.sum(data_zip["returns"] > 0.05) / len(data_zip["returns"]))
    action_counter = Counter(data_zip["action_idx"])
    print("\n")
    for idx, ct in sorted(list(action_counter.items()), key=lambda x: -x[1]):
        print(ACTION_HEAD_ORDER[idx], f"{ct/len(data_zip["action_idx"]) * 100}%")
    old_values = data_zip['returns'] - data_zip['advantages']
    print("Value-Return Correlation", np.corrcoef(old_values, data_zip["returns"])[0,1])
    data_zip.close()

In [212]:
new_zip = np.load("../training_data/ac_parallel_20250829_014000.npz", allow_pickle=True)

In [213]:
list(new_zip.keys())

['action_idx',
 'tile_idx',
 'returns',
 'advantages',
 'joint_log_probs',
 'game_ids',
 'step_ids',
 'actor_ids',
 'hand_idx',
 'disc_idx',
 'called_idx',
 'seat_winds',
 'riichi_declarations',
 'dora_indicator_tiles',
 'legal_action_mask',
 'called_discards',
 'round_wind',
 'remaining_tiles',
 'owner_of_reactable_tile',
 'reactable_tile',
 'newly_drawn_tile',
 'wall_count',
 'deal_in_tiles',
 'game_outcomes_obj']

In [208]:
max(new_zip['actor_ids'])

np.int8(5)

In [214]:
new_zip['riichi_declarations']

array([[-1, -1, -1, -1],
       [-1, -1, -1, -1],
       [-1, -1, -1, -1],
       ...,
       [-1, -1, -1, -1],
       [-1, -1, -1, -1],
       [-1, -1, -1,  2]], shape=(392057, 4), dtype=int8)

In [210]:
perf_metrics("../training_data/ac_parallel_20250829_014000.npz")

Mean Return: 0.090387695
Return fraction: 0.09839640664495214


discard 73.54083717418641%
pass 23.99064421755%
riichi 1.6921008934925277%
ron 0.41448054747141355%
tsumo 0.2744498886641483%
pon_noaka 0.042850911984736915%
chi_mid_noaka 0.017344416755726847%
chi_high_noaka 0.009947533139313926%
chi_high_aka 0.006121558854962416%
chi_low_noaka 0.004846234093511913%
chi_low_aka 0.0028057144751911073%
chi_mid_aka 0.0025506495229010067%
kan 0.0010202598091604027%
Value-Return Correlation 0.2748337094012235


In [211]:
perf_metrics("../training_data/ac_parallel_20250829_011610.npz")

Mean Return: 0.10473628
Return fraction: 0.10724690888588774


discard 73.48945153211743%
pass 23.871399163465195%
riichi 1.7763908374526336%
ron 0.4513092163058728%
tsumo 0.3207153814887173%
pon_noaka 0.029108264386956354%
chi_high_noaka 0.01652090681421847%
chi_mid_noaka 0.01389854065323141%
chi_high_aka 0.009702754795652118%
chi_low_noaka 0.008391571715158587%
chi_low_aka 0.004982495705875412%
kan 0.004720259089776705%
chi_mid_aka 0.0028846027770857647%
pon_aka 0.0005244732321974117%
Value-Return Correlation 0.28296770719157865


In [160]:
perf_metrics("../training_data/ac_parallel_20250828_150827.npz")

Mean Return: 0.14808007
Return fraction: 0.19899269579162335


discard 75.55583397020685%
pass 18.83107608654798%
pon_noaka 1.441046393625418%
riichi 1.14637232669732%
chi_high_noaka 0.9173484345440195%
chi_low_noaka 0.8060939398874926%
ron 0.4061791347707882%
chi_mid_noaka 0.3242417028953732%
tsumo 0.25032261297718533%
kan 0.10298557951313629%
chi_high_aka 0.09221093250811231%
pon_aka 0.06690304070561409%
chi_low_aka 0.04836062492952629%
chi_mid_aka 0.01102522019118734%
Value-Return Correlation 0.32586325583473924


In [161]:
perf_metrics("../training_data/ac_parallel_20250828_154546.npz")

Mean Return: 0.16223681
Return fraction: 0.19623640883051133


discard 74.79425803270522%
pass 20.61206646774464%
riichi 1.3251866800185028%
pon_noaka 0.9930004524035847%
chi_high_noaka 0.6237069644736106%
chi_low_noaka 0.5418676643267065%
ron 0.4234294224991994%
tsumo 0.27245878806671175%
chi_mid_noaka 0.17740320342403432%
kan 0.09276815078763972%
chi_high_aka 0.053627615934772555%
pon_aka 0.04320708392848974%
chi_low_aka 0.037107260315055894%
chi_mid_aka 0.009912213371829997%
Value-Return Correlation 0.3559128841240819


In [162]:
perf_metrics("../training_data/ac_parallel_20250828_160927.npz")

Mean Return: 0.17023166
Return fraction: 0.1946564688650734


discard 74.34475858441036%
pass 21.80270038886425%
riichi 1.4672662946497643%
pon_noaka 0.7202240353175381%
ron 0.4391484182405182%
chi_high_noaka 0.3906693209831973%
tsumo 0.29551619923877503%
chi_low_noaka 0.27798120661378667%
chi_mid_noaka 0.11397745206242459%
kan 0.05131564018195134%
chi_high_aka 0.037390793097401724%
pon_aka 0.035069985249976794%
chi_low_aka 0.018050727702193937%
chi_mid_aka 0.005930953387863722%
Value-Return Correlation 0.35946106778058423


In [163]:
perf_metrics("../training_data/ac_parallel_20250828_164712.npz")

Mean Return: 0.16640237
Return fraction: 0.19260311191118804


discard 74.0608155477431%
pass 22.82650286204408%
riichi 1.523341901917531%
ron 0.4278300081919431%
pon_noaka 0.4160171748325017%
tsumo 0.28068319264933606%
chi_high_noaka 0.1535668336727383%
chi_low_noaka 0.14483560901575987%
chi_mid_noaka 0.07318820668349567%
kan 0.03980411240681344%
chi_high_aka 0.022341663092856574%
pon_aka 0.01540804351231488%
chi_low_aka 0.00898802538218368%
chi_mid_aka 0.006676818855336447%
Value-Return Correlation 0.36210705928687514


In [165]:
perf_metrics("../training_data/ac_parallel_20250828_174046.npz")

Mean Return: 0.16283648
Return fraction: 0.18933959136042688


discard 74.20877169082398%
pass 22.762288361046352%
riichi 1.496991366081733%
pon_noaka 0.44184892827858846%
ron 0.41460286289317233%
tsumo 0.2732317689122394%
chi_low_noaka 0.13520217351631036%
chi_high_noaka 0.13391698175284733%
chi_mid_noaka 0.06246031970430308%
chi_high_aka 0.02236233668425666%
kan 0.017478607983097156%
chi_low_aka 0.015936377866941527%
pon_aka 0.00925338069693379%
chi_mid_aka 0.005654843759237316%
Value-Return Correlation 0.3643792518811623


In [166]:
perf_metrics('../training_data/ac_parallel_20250828_175507.npz')

Mean Return: 0.122376665
Return fraction: 0.1479834369578706


discard 74.31690114092515%
pass 21.933621202719987%
riichi 1.4293854706157956%
pon_noaka 0.7025363741120159%
ron 0.4079978726367907%
chi_high_noaka 0.38140583252079874%
chi_low_noaka 0.2826353978042572%
tsumo 0.2719985817578605%
chi_mid_noaka 0.12156361195882033%
kan 0.054197110331640735%
chi_high_aka 0.037482113687302934%
pon_aka 0.031150675564447707%
chi_low_aka 0.02228666219245039%
chi_mid_aka 0.006837953172683643%
Value-Return Correlation 0.37866761152886436


In [181]:
perf_metrics('../training_data/ac_parallel_20250828_203723.npz')

Mean Return: 0.1218145
Return fraction: 0.1475355691707397


discard 74.0901281949526%
pass 23.211044173107993%
riichi 1.4563069509981386%
ron 0.42091572212393463%
pon_noaka 0.2767700094987672%
tsumo 0.2601279645242807%
chi_high_noaka 0.08858688555649717%
chi_mid_noaka 0.07706546980492962%
chi_low_noaka 0.07220087204315666%
chi_high_aka 0.016642044974486465%
chi_low_aka 0.014337761824172955%
kan 0.007424912373232423%
chi_mid_aka 0.004864597761772967%
pon_aka 0.0035844404560432387%
Value-Return Correlation 0.3915873894115282


In [189]:
perf_metrics('../training_data/ac_parallel_20250828_214828.npz')

Mean Return: 0.12902048
Return fraction: 0.14984930774936844


discard 73.8002977533627%
pass 23.570498555325585%
riichi 1.6347904573678396%
ron 0.43392105904750045%
tsumo 0.2847850106599854%
pon_noaka 0.15250781991627632%
chi_mid_noaka 0.03683011977569939%
chi_high_noaka 0.03501455049098181%
chi_low_noaka 0.028271007433459387%
chi_high_aka 0.01115278274897939%
chi_low_aka 0.005446707854152725%
kan 0.0031124044880872716%
chi_mid_aka 0.0020749363253915144%
pon_aka 0.0012968352033696966%
Value-Return Correlation 0.3809980603049238


In [190]:
perf_metrics('../training_data/ac_parallel_20250828_220705.npz')

Mean Return: 0.1268816
Return fraction: 0.1476097505398841


discard 73.65313806014593%
pass 23.76422212670316%
riichi 1.6429566487309488%
ron 0.4357913380321807%
tsumo 0.2840541282360679%
pon_noaka 0.09347633572763948%
chi_high_noaka 0.032885026696427185%
chi_mid_noaka 0.029518842861359834%
chi_low_noaka 0.028224156770949315%
chi_low_aka 0.01294686090410519%
chi_high_aka 0.011911112031776775%
chi_mid_aka 0.006214493233970492%
kan 0.003625121053149454%
pon_aka 0.0010357488723284152%
Value-Return Correlation 0.3871214361964352


In [60]:
perf_metrics("../training_data/gen0.npz")

Mean Return: 0.16047949
Return fraction: 0.1500838595106551


discard 74.52890686661405%
pass 20.92911404893449%
chi_low_noaka 0.5882498026835044%
pon_noaka 0.9458859510655091%
chi_high_noaka 0.6499112075769534%
kan 0.12455603788476717%
ron 0.3810674822415154%
riichi 1.1900651144435674%
tsumo 0.2244475138121547%
chi_mid_noaka 0.26144435674822414%
chi_high_aka 0.05056235201262825%
pon_aka 0.06412786108918705%
chi_low_aka 0.044396211523283345%
chi_mid_aka 0.017265193370165743%
Value-Return Correlation nan
