In [1]:
import numpy as np
from collections import Counter

In [2]:
ACTION_HEAD_ORDER: list[str] = [
    'discard',       # uses tile head
    'riichi',        # uses tile head
    'tsumo',
    'ron',
    'pass',
    'kan',           # uses tile head (kakan/ankan pass tile; daiminkan uses no-op)
    'chi_low_noaka',
    'chi_mid_noaka',
    'chi_high_noaka',
    'chi_low_aka',
    'chi_mid_aka',
    'chi_high_aka',
    'pon_noaka',
    'pon_aka',
]
ACTION_HEAD_INDEX = {name: i for i, name in enumerate(ACTION_HEAD_ORDER)}

In [6]:
def perf_metrics(data_file):
    data_zip = np.load(data_file, allow_pickle=True)
    print(np.mean(np.abs(data_zip["returns"])))
    print(np.sum(data_zip["returns"] > 0.05) / len(data_zip["returns"]))
    action_counter = Counter(data_zip["action_idx"])
    print("\n")
    for idx, ct in action_counter.items():
        print(ACTION_HEAD_ORDER[idx], f"{ct/len(data_zip["action_idx"]) * 100}%")
    data_zip.close()

In [7]:
perf_metrics("../training_data/ac_parallel_20250825_192500.npz")

0.18755879
0.15507616187314802


discard 73.34522617730373%
pass 24.41595849046189%
riichi 1.489616342875208%
chi_low_aka 0.0027599426935535267%
ron 0.3698323209361726%
tsumo 0.24939845794474594%
pon_aka 0.001254519406160694%
pon_noaka 0.09258353217465921%
chi_high_noaka 0.029104850222928098%
chi_low_noaka 0.0027599426935535267%
chi_mid_aka 0.0005018077624642775%
kan 0.00025090388123213877%
chi_high_aka 0.00025090388123213877%
chi_mid_noaka 0.0005018077624642775%


In [8]:
perf_metrics("../training_data/ac_parallel_20250825_154932.npz")

0.18243663
0.15115449118533753


discard 73.22009703116578%
pass 24.690039331563078%
riichi 1.4866632060215226%
tsumo 0.24407530896240542%
ron 0.355381635953441%
chi_high_aka 0.0029947890670233793%
pon_noaka 0.0007486972667558448%


In [9]:
perf_metrics("../training_data/gen0.npz")

0.16047949
0.1500838595106551


discard 74.52890686661405%
pass 20.92911404893449%
chi_low_noaka 0.5882498026835044%
pon_noaka 0.9458859510655091%
chi_high_noaka 0.6499112075769534%
kan 0.12455603788476717%
ron 0.3810674822415154%
riichi 1.1900651144435674%
tsumo 0.2244475138121547%
chi_mid_noaka 0.26144435674822414%
chi_high_aka 0.05056235201262825%
pon_aka 0.06412786108918705%
chi_low_aka 0.044396211523283345%
chi_mid_aka 0.017265193370165743%


In [10]:
perf_metrics("../training_data/ac_parallel_20250825_213051.npz")

0.18317343
0.15287744988682708


discard 73.51753491065229%
pass 24.424798281475983%
riichi 1.4636029347256652%
tsumo 0.23737488939633583%
ron 0.35568634430137336%
chi_mid_noaka 0.0005013197241738877%
pon_noaka 0.00025065986208694385%
chi_low_aka 0.00025065986208694385%


In [11]:
perf_metrics("../training_data/ac_parallel_20250825_222614.npz")

0.18238935
0.15058725249370977


discard 73.69223195727626%
pass 24.30484421151035%
riichi 1.4196493391544758%
tsumo 0.2426581007031118%
ron 0.33514663908585524%
chi_high_noaka 0.00024862510317941785%
chi_low_noaka 0.00348075144451185%
chi_mid_noaka 0.0009945004127176714%
chi_low_aka 0.0004972502063588357%
pon_noaka 0.00024862510317941785%


In [12]:
perf_metrics("../training_data/ac_parallel_20250825_225310.npz")

0.19302396
0.15523899984211198


discard 73.24098973226705%
pass 24.502464807263852%
riichi 1.5510617342118256%
ron 0.35838072062092596%
tsumo 0.26540222597032204%
chi_mid_noaka 0.05413303192595804%
chi_low_noaka 0.016540648644042734%
kan 0.0007518476656383061%
pon_noaka 0.007267860767836959%
chi_high_noaka 0.0002506158885461021%
chi_high_aka 0.0025061588854610206%
chi_low_aka 0.0002506158885461021%


In [13]:
perf_metrics("../training_data/ac_parallel_20250825_230502.npz")

0.17711326
0.14411690602166793


discard 73.35600907029477%
pass 24.422608549592674%
riichi 1.3815402704291593%
ron 0.3401360544217687%
tsumo 0.27294868564709834%
chi_high_noaka 0.11337868480725624%
chi_high_aka 0.012597631645250695%
pon_noaka 0.058788947677836566%
chi_mid_noaka 0.02519526329050139%
chi_low_noaka 0.016796842193667588%


In [14]:
perf_metrics("../training_data/ac_parallel_20250825_231401.npz")

0.18434078
0.15460283304659106


discard 73.36544516116237%
pass 24.383338869855372%
riichi 1.4185065273115807%
tsumo 0.23873152495649286%
chi_high_noaka 0.15593159833606854%
ron 0.35078412026317485%
chi_mid_noaka 0.013386814483541655%
pon_noaka 0.05924904928826769%
chi_high_aka 0.00644550326985339%
pon_aka 0.000495807943834876%
chi_mid_aka 0.0007437119157523142%
chi_low_noaka 0.0052059834102661995%
chi_low_aka 0.000991615887669752%
kan 0.0007437119157523142%


In [15]:
perf_metrics("../training_data/ac_parallel_20250825_232819.npz")

0.18148695
0.1511219442253925


discard 73.61579085717017%
pass 24.40318302387268%
riichi 1.3501565225703156%
tsumo 0.20312089277606518%
ron 0.30587616794513345%
chi_low_noaka 0.0693000693000693%
chi_mid_noaka 0.01911726049657084%
chi_high_noaka 0.01911726049657084%
pon_noaka 0.007168972686214065%
pon_aka 0.00477931512414271%
chi_low_aka 0.002389657562071355%
