In [111]:
import numpy as np
from collections import Counter

In [112]:
ACTION_HEAD_ORDER: list[str] = [
    'discard',       # uses tile head
    'riichi',        # uses tile head
    'tsumo',
    'ron',
    'pass',
    'kan',           # uses tile head (kakan/ankan pass tile; daiminkan uses no-op)
    'chi_low_noaka',
    'chi_mid_noaka',
    'chi_high_noaka',
    'chi_low_aka',
    'chi_mid_aka',
    'chi_high_aka',
    'pon_noaka',
    'pon_aka',
]
ACTION_HEAD_INDEX = {name: i for i, name in enumerate(ACTION_HEAD_ORDER)}

In [113]:
def perf_metrics(data_file):
    data_zip = np.load(data_file, allow_pickle=True)
    print("Mean Return:", np.mean(np.abs(data_zip["returns"])))
    print("Return fraction:", np.sum(data_zip["returns"] > 0.05) / len(data_zip["returns"]))
    action_counter = Counter(data_zip["action_idx"])
    print("\n")
    for idx, ct in sorted(list(action_counter.items()), key=lambda x: -x[1]):
        print(ACTION_HEAD_ORDER[idx], f"{ct/len(data_zip["action_idx"]) * 100}%")
    old_values = data_zip['returns'] - data_zip['advantages']
    print("Value-Return Correlation", np.corrcoef(old_values, data_zip["returns"])[0,1])
    data_zip.close()

In [220]:
new_zip = np.load("../training_data/ac_parallel_20250829_140220.npz", allow_pickle=True)

In [221]:
list(new_zip.keys())

['action_idx',
 'tile_idx',
 'returns',
 'advantages',
 'joint_log_probs',
 'game_ids',
 'step_ids',
 'hand_idx',
 'disc_idx',
 'called_idx',
 'seat_winds',
 'riichi_declarations',
 'dora_indicator_tiles',
 'legal_action_mask',
 'called_discards',
 'round_wind',
 'remaining_tiles',
 'owner_of_reactable_tile',
 'reactable_tile',
 'newly_drawn_tile',
 'wall_count',
 'deal_in_tiles',
 'last_discards',
 'actor_ids',
 'game_outcomes_obj']

In [217]:
max(new_zip['actor_ids'])

np.str_('0')

In [219]:
new_zip['returns']

array([0.     , 0.     , 0.     , ..., 0.29403, 0.297  , 0.3    ],
      shape=(4174,), dtype=float32)

In [224]:
perf_metrics("../training_data/ac_parallel_20250829_150902.npz")

Mean Return: 0.11422934
Return fraction: 0.19357291309752175


discard 74.54075265294281%
pass 23.031520186875216%
riichi 1.3600474190861822%
ron 0.34038700576982234%
tsumo 0.24009663889715613%
pon_noaka 0.17882197584527773%
chi_high_noaka 0.12304952743479251%
chi_low_noaka 0.07803140764157573%
kan 0.048769629775984834%
chi_mid_noaka 0.02826137520351942%
chi_low_aka 0.011504630613822063%
chi_high_aka 0.009503825289679095%
chi_mid_aka 0.006002415972428903%
pon_aka 0.0032513086517323227%
Value-Return Correlation 0.32851497494419357


In [225]:
perf_metrics("../training_data/ac_parallel_20250829_153655.npz")

Mean Return: 0.11453858
Return fraction: 0.19057834716945027


discard 74.45543108505875%
pass 23.406076812480958%
riichi 1.344079929698929%
ron 0.33438331709257735%
tsumo 0.252801845151316%
pon_noaka 0.0790635252764076%
chi_high_noaka 0.044064066634940544%
kan 0.029208181312303443%
chi_low_noaka 0.028452797312847316%
chi_mid_noaka 0.012589733324268725%
chi_low_aka 0.004532303996736741%
pon_aka 0.004028714663765992%
chi_high_aka 0.0035251253307952432%
chi_mid_aka 0.0017625626653976216%
Value-Return Correlation 0.34529959164505536


In [227]:
perf_metrics("../training_data/ac_parallel_20250829_160817.npz")

Mean Return: 0.13158001
Return fraction: 0.20487902645589656


discard 74.051585914856%
pass 23.63660657388945%
riichi 1.5764075893683065%
ron 0.38928402573642773%
tsumo 0.289586202643534%
chi_low_noaka 0.015160235985775072%
chi_mid_noaka 0.012590704462762349%
pon_noaka 0.010792032396653443%
chi_high_noaka 0.008736407178243264%
chi_high_aka 0.0033403909799165415%
chi_mid_aka 0.0020556252184101794%
chi_low_aka 0.0017986720661089068%
kan 0.0015417189138076347%
pon_aka 0.0005139063046025449%
Value-Return Correlation 0.32401918536812935


In [161]:
perf_metrics("../training_data/ac_parallel_20250828_154546.npz")

Mean Return: 0.16223681
Return fraction: 0.19623640883051133


discard 74.79425803270522%
pass 20.61206646774464%
riichi 1.3251866800185028%
pon_noaka 0.9930004524035847%
chi_high_noaka 0.6237069644736106%
chi_low_noaka 0.5418676643267065%
ron 0.4234294224991994%
tsumo 0.27245878806671175%
chi_mid_noaka 0.17740320342403432%
kan 0.09276815078763972%
chi_high_aka 0.053627615934772555%
pon_aka 0.04320708392848974%
chi_low_aka 0.037107260315055894%
chi_mid_aka 0.009912213371829997%
Value-Return Correlation 0.3559128841240819


In [162]:
perf_metrics("../training_data/ac_parallel_20250828_160927.npz")

Mean Return: 0.17023166
Return fraction: 0.1946564688650734


discard 74.34475858441036%
pass 21.80270038886425%
riichi 1.4672662946497643%
pon_noaka 0.7202240353175381%
ron 0.4391484182405182%
chi_high_noaka 0.3906693209831973%
tsumo 0.29551619923877503%
chi_low_noaka 0.27798120661378667%
chi_mid_noaka 0.11397745206242459%
kan 0.05131564018195134%
chi_high_aka 0.037390793097401724%
pon_aka 0.035069985249976794%
chi_low_aka 0.018050727702193937%
chi_mid_aka 0.005930953387863722%
Value-Return Correlation 0.35946106778058423


In [163]:
perf_metrics("../training_data/ac_parallel_20250828_164712.npz")

Mean Return: 0.16640237
Return fraction: 0.19260311191118804


discard 74.0608155477431%
pass 22.82650286204408%
riichi 1.523341901917531%
ron 0.4278300081919431%
pon_noaka 0.4160171748325017%
tsumo 0.28068319264933606%
chi_high_noaka 0.1535668336727383%
chi_low_noaka 0.14483560901575987%
chi_mid_noaka 0.07318820668349567%
kan 0.03980411240681344%
chi_high_aka 0.022341663092856574%
pon_aka 0.01540804351231488%
chi_low_aka 0.00898802538218368%
chi_mid_aka 0.006676818855336447%
Value-Return Correlation 0.36210705928687514


In [228]:
perf_metrics("../training_data/ac_parallel_20250829_182007.npz")

Mean Return: 0.14737715
Return fraction: 0.20625397640387985


discard 75.37950240537555%
pass 22.407105624448885%
riichi 1.416436918886942%
ron 0.4760522820372582%
tsumo 0.2835106204864328%
chi_high_noaka 0.011440881338527307%
chi_mid_noaka 0.007813284816555235%
chi_low_noaka 0.005301871839805337%
pon_noaka 0.004185688294583162%
kan 0.0039066424082776175%
chi_high_aka 0.003069504749360985%
pon_aka 0.0008371376589166324%
chi_mid_aka 0.0005580917726110882%
chi_low_aka 0.0002790458863055441%
Value-Return Correlation 0.3728334208201131


In [166]:
perf_metrics('../training_data/ac_parallel_20250828_175507.npz')

Mean Return: 0.122376665
Return fraction: 0.1479834369578706


discard 74.31690114092515%
pass 21.933621202719987%
riichi 1.4293854706157956%
pon_noaka 0.7025363741120159%
ron 0.4079978726367907%
chi_high_noaka 0.38140583252079874%
chi_low_noaka 0.2826353978042572%
tsumo 0.2719985817578605%
chi_mid_noaka 0.12156361195882033%
kan 0.054197110331640735%
chi_high_aka 0.037482113687302934%
pon_aka 0.031150675564447707%
chi_low_aka 0.02228666219245039%
chi_mid_aka 0.006837953172683643%
Value-Return Correlation 0.37866761152886436


In [181]:
perf_metrics('../training_data/ac_parallel_20250828_203723.npz')

Mean Return: 0.1218145
Return fraction: 0.1475355691707397


discard 74.0901281949526%
pass 23.211044173107993%
riichi 1.4563069509981386%
ron 0.42091572212393463%
pon_noaka 0.2767700094987672%
tsumo 0.2601279645242807%
chi_high_noaka 0.08858688555649717%
chi_mid_noaka 0.07706546980492962%
chi_low_noaka 0.07220087204315666%
chi_high_aka 0.016642044974486465%
chi_low_aka 0.014337761824172955%
kan 0.007424912373232423%
chi_mid_aka 0.004864597761772967%
pon_aka 0.0035844404560432387%
Value-Return Correlation 0.3915873894115282


In [231]:
perf_metrics('../training_data/ac_parallel_20250829_183434.npz')

Mean Return: 0.12923601
Return fraction: 0.20568522845524662


discard 75.42825920424374%
pass 22.41417655344351%
riichi 1.3827301507109968%
ron 0.44699854752928964%
tsumo 0.28143336069872354%
chi_mid_noaka 0.014826733149005922%
chi_high_noaka 0.012630180089893934%
chi_low_noaka 0.00576595178016897%
chi_high_aka 0.004118536985834979%
pon_noaka 0.0035693987210569816%
kan 0.0035693987210569816%
chi_mid_aka 0.0008237073971669956%
pon_aka 0.0005491382647779972%
chi_low_aka 0.0005491382647779972%
Value-Return Correlation 0.35960501876959894


In [230]:
perf_metrics('../training_data/ac_parallel_20250829_184816.npz')

Mean Return: 0.13090098
Return fraction: 0.20875337682018844


discard 75.34427093628517%
pass 22.4753464672421%
riichi 1.370494827699809%
ron 0.4623223737673233%
tsumo 0.2838725264105774%
chi_high_noaka 0.018943137642485338%
chi_mid_noaka 0.013177834881728932%
chi_high_aka 0.0098833761612967%
pon_noaka 0.008236146801080584%
chi_low_noaka 0.006588917440864466%
kan 0.003568996947134919%
chi_mid_aka 0.0016472293602161165%
pon_aka 0.0010981529068107444%
chi_low_aka 0.0005490764534053722%
Value-Return Correlation 0.37782385433399407


In [233]:
perf_metrics('../training_data/ac_parallel_20250829_192357.npz')

Mean Return: 0.07379851
Return fraction: 0.14204359738192487


discard 75.38073966203127%
pass 21.576000446443995%
riichi 1.1519849484596354%
ron 0.4310310464594068%
chi_high_noaka 0.33722465965288984%
pon_noaka 0.32712652169638085%
chi_low_noaka 0.31251079570669693%
tsumo 0.24979604418732576%
kan 0.07121844664064182%
chi_mid_noaka 0.05500827781571961%
chi_low_aka 0.04464439938667099%
chi_high_aka 0.029231451979367908%
pon_aka 0.02125923780317666%
chi_mid_aka 0.01222406173682658%
Value-Return Correlation 0.2525847708526395


In [60]:
perf_metrics("../training_data/gen0.npz")

Mean Return: 0.16047949
Return fraction: 0.1500838595106551


discard 74.52890686661405%
pass 20.92911404893449%
chi_low_noaka 0.5882498026835044%
pon_noaka 0.9458859510655091%
chi_high_noaka 0.6499112075769534%
kan 0.12455603788476717%
ron 0.3810674822415154%
riichi 1.1900651144435674%
tsumo 0.2244475138121547%
chi_mid_noaka 0.26144435674822414%
chi_high_aka 0.05056235201262825%
pon_aka 0.06412786108918705%
chi_low_aka 0.044396211523283345%
chi_mid_aka 0.017265193370165743%
Value-Return Correlation nan
