In [111]:
import numpy as np
from collections import Counter

In [112]:
ACTION_HEAD_ORDER: list[str] = [
    'discard',       # uses tile head
    'riichi',        # uses tile head
    'tsumo',
    'ron',
    'pass',
    'kan',           # uses tile head (kakan/ankan pass tile; daiminkan uses no-op)
    'chi_low_noaka',
    'chi_mid_noaka',
    'chi_high_noaka',
    'chi_low_aka',
    'chi_mid_aka',
    'chi_high_aka',
    'pon_noaka',
    'pon_aka',
]
ACTION_HEAD_INDEX = {name: i for i, name in enumerate(ACTION_HEAD_ORDER)}

In [113]:
def perf_metrics(data_file):
    data_zip = np.load(data_file, allow_pickle=True)
    print("Mean Return:", np.mean(np.abs(data_zip["returns"])))
    print("Return fraction:", np.sum(data_zip["returns"] > 0.05) / len(data_zip["returns"]))
    action_counter = Counter(data_zip["action_idx"])
    print("\n")
    for idx, ct in sorted(list(action_counter.items()), key=lambda x: -x[1]):
        print(ACTION_HEAD_ORDER[idx], f"{ct/len(data_zip["action_idx"]) * 100}%")
    old_values = data_zip['returns'] - data_zip['advantages']
    print("Value-Return Correlation", np.corrcoef(old_values, data_zip["returns"])[0,1])
    data_zip.close()

In [123]:
new_zip = np.load("../training_data/ac_parallel_20250827_124424.npz")

In [124]:
list(new_zip.keys())

['action_idx',
 'tile_idx',
 'returns',
 'advantages',
 'joint_log_probs',
 'game_ids',
 'step_ids',
 'actor_ids',
 'hand_idx',
 'disc_idx',
 'called_idx',
 'seat_winds',
 'riichi_declarations',
 'dora_indicator_tiles',
 'legal_action_mask',
 'called_discards',
 'round_wind',
 'remaining_tiles',
 'owner_of_reactable_tile',
 'reactable_tile',
 'newly_drawn_tile',
 'game_outcomes_obj']

In [125]:
for key in new_zip:
    print(len(new_zip[key]))

407589
407589
407589
407589
407589
407589
407589
407589
18254
18254
18254
18254
18254
18254
18254
18254
18254
18254
18254
18254
18254


ValueError: Object arrays cannot be loaded when allow_pickle=False

In [77]:
perf_metrics("../training_data/ac_parallel_20250825_192500.npz")

Mean Return: 0.18755879
Return fraction: 0.15507616187314802


discard 73.34522617730373%
pass 24.41595849046189%
riichi 1.489616342875208%
ron 0.3698323209361726%
tsumo 0.24939845794474594%
pon_noaka 0.09258353217465921%
chi_high_noaka 0.029104850222928098%
chi_low_aka 0.0027599426935535267%
chi_low_noaka 0.0027599426935535267%
pon_aka 0.001254519406160694%
chi_mid_aka 0.0005018077624642775%
chi_mid_noaka 0.0005018077624642775%
kan 0.00025090388123213877%
chi_high_aka 0.00025090388123213877%
Value-Return Correlation 0.27894515399447906


In [59]:
perf_metrics("../training_data/ac_parallel_20250825_154932.npz")

Mean Return: 0.18243663
Return fraction: 0.15115449118533753


discard 73.22009703116578%
pass 24.690039331563078%
riichi 1.4866632060215226%
tsumo 0.24407530896240542%
ron 0.355381635953441%
chi_high_aka 0.0029947890670233793%
pon_noaka 0.0007486972667558448%
Value-Return Correlation 0.2528755998284951


In [60]:
perf_metrics("../training_data/gen0.npz")

Mean Return: 0.16047949
Return fraction: 0.1500838595106551


discard 74.52890686661405%
pass 20.92911404893449%
chi_low_noaka 0.5882498026835044%
pon_noaka 0.9458859510655091%
chi_high_noaka 0.6499112075769534%
kan 0.12455603788476717%
ron 0.3810674822415154%
riichi 1.1900651144435674%
tsumo 0.2244475138121547%
chi_mid_noaka 0.26144435674822414%
chi_high_aka 0.05056235201262825%
pon_aka 0.06412786108918705%
chi_low_aka 0.044396211523283345%
chi_mid_aka 0.017265193370165743%
Value-Return Correlation nan


In [61]:
perf_metrics("../training_data/ac_parallel_20250825_213051.npz")

Mean Return: 0.18317343
Return fraction: 0.15287744988682708


discard 73.51753491065229%
pass 24.424798281475983%
riichi 1.4636029347256652%
tsumo 0.23737488939633583%
ron 0.35568634430137336%
chi_mid_noaka 0.0005013197241738877%
pon_noaka 0.00025065986208694385%
chi_low_aka 0.00025065986208694385%
Value-Return Correlation 0.2685874739959118


In [38]:
perf_metrics("../training_data/ac_parallel_20250825_222614.npz")

Mean Return: 0.18238935
Return fraction: 0.15058725249370977


discard 73.69223195727626%
pass 24.30484421151035%
riichi 1.4196493391544758%
tsumo 0.2426581007031118%
ron 0.33514663908585524%
chi_high_noaka 0.00024862510317941785%
chi_low_noaka 0.00348075144451185%
chi_mid_noaka 0.0009945004127176714%
chi_low_aka 0.0004972502063588357%
pon_noaka 0.00024862510317941785%
Value-Return Correlation 0.2413707031637274


In [39]:
perf_metrics("../training_data/ac_parallel_20250825_225310.npz")

Mean Return: 0.19302396
Return fraction: 0.15523899984211198


discard 73.24098973226705%
pass 24.502464807263852%
riichi 1.5510617342118256%
ron 0.35838072062092596%
tsumo 0.26540222597032204%
chi_mid_noaka 0.05413303192595804%
chi_low_noaka 0.016540648644042734%
kan 0.0007518476656383061%
pon_noaka 0.007267860767836959%
chi_high_noaka 0.0002506158885461021%
chi_high_aka 0.0025061588854610206%
chi_low_aka 0.0002506158885461021%
Value-Return Correlation 0.3168012688201298


In [40]:
perf_metrics("../training_data/ac_parallel_20250825_230502.npz")

Mean Return: 0.17711326
Return fraction: 0.14411690602166793


discard 73.35600907029477%
pass 24.422608549592674%
riichi 1.3815402704291593%
ron 0.3401360544217687%
tsumo 0.27294868564709834%
chi_high_noaka 0.11337868480725624%
chi_high_aka 0.012597631645250695%
pon_noaka 0.058788947677836566%
chi_mid_noaka 0.02519526329050139%
chi_low_noaka 0.016796842193667588%
Value-Return Correlation 0.25108260852585823


In [41]:
perf_metrics("../training_data/ac_parallel_20250825_231401.npz")

Mean Return: 0.18434078
Return fraction: 0.15460283304659106


discard 73.36544516116237%
pass 24.383338869855372%
riichi 1.4185065273115807%
tsumo 0.23873152495649286%
chi_high_noaka 0.15593159833606854%
ron 0.35078412026317485%
chi_mid_noaka 0.013386814483541655%
pon_noaka 0.05924904928826769%
chi_high_aka 0.00644550326985339%
pon_aka 0.000495807943834876%
chi_mid_aka 0.0007437119157523142%
chi_low_noaka 0.0052059834102661995%
chi_low_aka 0.000991615887669752%
kan 0.0007437119157523142%
Value-Return Correlation 0.3317057318717842


In [42]:
perf_metrics("../training_data/ac_parallel_20250825_232819.npz")

Mean Return: 0.18148695
Return fraction: 0.1511219442253925


discard 73.61579085717017%
pass 24.40318302387268%
riichi 1.3501565225703156%
tsumo 0.20312089277606518%
ron 0.30587616794513345%
chi_low_noaka 0.0693000693000693%
chi_mid_noaka 0.01911726049657084%
chi_high_noaka 0.01911726049657084%
pon_noaka 0.007168972686214065%
pon_aka 0.00477931512414271%
chi_low_aka 0.002389657562071355%
Value-Return Correlation 0.37571817600339896


In [43]:
perf_metrics("../training_data/ac_parallel_20250825_234451.npz")

Mean Return: 0.1873601
Return fraction: 0.15359569330799788


discard 73.39104553637642%
pass 24.44986554072106%
riichi 1.4597009186178036%
tsumo 0.24295056343057184%
ron 0.36305253774722657%
chi_low_noaka 0.060924910048365394%
chi_high_noaka 0.020974149360912676%
pon_noaka 0.00474415283163501%
chi_high_aka 0.001498153525779477%
chi_mid_noaka 0.003745383814448692%
pon_aka 0.0012484612714828973%
chi_low_aka 0.00024969225429657943%
Value-Return Correlation 0.31402874293074096


In [44]:
perf_metrics("../training_data/ac_parallel_20250825_235718.npz")

Mean Return: 0.19038653
Return fraction: 0.15501103454825835


discard 73.24565678680953%
pass 24.604918184385678%
riichi 1.496349726448175%
ron 0.36040359203912953%
tsumo 0.25143274174158414%
chi_high_noaka 0.007747927429412632%
chi_mid_aka 0.004248863429032734%
chi_mid_noaka 0.018245119430552327%
pon_aka 0.0019994651430742276%
pon_noaka 0.0007497994286528353%
chi_low_noaka 0.005998395429222683%
chi_high_aka 0.002249398285958506%
Value-Return Correlation 0.3104719524236516


In [45]:
perf_metrics("../training_data/ac_parallel_20250826_001032.npz")

Mean Return: 0.19924365
Return fraction: 0.1566437980447704


discard 73.10870485450998%
pass 24.596918622271986%
riichi 1.5820176987603856%
ron 0.3714585571978549%
tsumo 0.2597454644734832%
pon_noaka 0.07013378018570422%
chi_mid_noaka 0.004508600154795272%
chi_high_aka 0.001753344504642606%
chi_high_noaka 0.0012523889318875755%
chi_low_noaka 0.001753344504642606%
pon_aka 0.0005009555727550302%
chi_mid_aka 0.0007514333591325453%
chi_low_aka 0.0005009555727550302%
Value-Return Correlation 0.31584313947134474


In [46]:
perf_metrics("../training_data/ac_parallel_20250826_002501.npz")

Mean Return: 0.19944732
Return fraction: 0.15807417784506156


discard 73.22934476153176%
pass 24.379570840017774%
riichi 1.484761983666112%
ron 0.3956687328809253%
chi_high_noaka 0.22218707398452975%
chi_mid_noaka 0.024101648703406617%
chi_mid_aka 0.004016941450567769%
tsumo 0.23474001601755404%
chi_low_aka 0.0032637649285863126%
pon_noaka 0.016318824642931565%
pon_aka 0.002510588406604856%
chi_low_noaka 0.0010042353626419423%
chi_high_aka 0.002510588406604856%
Value-Return Correlation 0.3071267691513172


In [47]:
perf_metrics("../training_data/ac_parallel_20250826_004449.npz")

Mean Return: 0.19397444
Return fraction: 0.15668588828718555


discard 73.05051883325851%
pass 24.760022373665294%
riichi 1.4886513846840723%
tsumo 0.2608588778553387%
ron 0.3644499514651992%
pon_aka 0.004514865193650092%
pon_noaka 0.010785511295941888%
chi_high_noaka 0.03636974739329241%
chi_low_aka 0.003009910129100062%
chi_high_aka 0.0032607359731917336%
chi_mid_aka 0.008277252855025171%
chi_mid_noaka 0.008778904543208515%
chi_low_noaka 0.0005016516881833436%
Value-Return Correlation 0.31558098373914634


In [48]:
perf_metrics("../training_data/ac_parallel_20250826_010200.npz")

Mean Return: 0.19227518
Return fraction: 0.1574944149160132


discard 73.0268926128286%
pass 24.823778835627635%
riichi 1.4511311343045519%
chi_low_noaka 0.02352917865829193%
ron 0.35789119117086143%
chi_mid_noaka 0.03863738811256359%
tsumo 0.23752086666633646%
chi_high_noaka 0.02600593430653319%
pon_noaka 0.005944213555779014%
chi_mid_aka 0.002972106777889507%
chi_high_aka 0.001981404518593005%
pon_aka 0.0014860533889447536%
chi_low_aka 0.00222908008341713%
Value-Return Correlation 0.32240225793851623


In [49]:
perf_metrics("../training_data/ac_parallel_20250826_011902.npz")

Mean Return: 0.18467629
Return fraction: 0.15137108100064686


discard 73.05750202298704%
pass 24.923446235276465%
riichi 1.373417581417542%
tsumo 0.22554153333808846%
ron 0.3487654245075349%
chi_mid_noaka 0.03222021904829835%
chi_high_noaka 0.0066408085061378285%
chi_high_aka 0.01205183765928717%
chi_mid_aka 0.00565698502374704%
chi_low_aka 0.005902940894344737%
chi_low_noaka 0.00565698502374704%
pon_noaka 0.001967646964781579%
pon_aka 0.0012297793529884868%
Value-Return Correlation 0.30998684550469696


In [50]:
perf_metrics("../training_data/ac_parallel_20250826_105221.npz")

Mean Return: 0.19284408
Return fraction: 0.15545409235377652


discard 73.10284545005999%
pass 24.719925664151237%
riichi 1.490223383232858%
tsumo 0.25145954231357803%
ron 0.366169174165788%
chi_high_noaka 0.0345631641825436%
pon_noaka 0.012021970150449948%
chi_high_aka 0.0037568656720156084%
chi_mid_aka 0.006010985075224974%
chi_mid_noaka 0.008515562189902046%
pon_aka 0.002504577114677072%
chi_low_aka 0.0020036616917416578%
Value-Return Correlation 0.3178879658722283


In [62]:
perf_metrics('../training_data/ac_parallel_20250826_113701.npz')

Mean Return: 0.13865383
Return fraction: 0.15846361947528834


discard 73.26160823896808%
riichi 1.5039267113885881%
pass 24.469875210337214%
pon_noaka 0.13226554120109113%
ron 0.3637927456476136%
tsumo 0.2475290846674484%
chi_high_aka 0.0030003525414236173%
chi_mid_noaka 0.008250969488914947%
chi_high_noaka 0.003250381919875585%
chi_mid_aka 0.002250264406067713%
chi_low_noaka 0.0027503231629716495%
pon_aka 0.0012501468922598406%
chi_low_aka 0.0002500293784519681%
Value-Return Correlation 0.36521066022664594


In [64]:
perf_metrics('../training_data/ac_parallel_20250826_124536.npz')

Mean Return: 0.18173212
Return fraction: 0.221778701105565


discard 73.08887476543482%
pass 24.601270608450125%
riichi 1.52709149234947%
tsumo 0.2708018800382841%
ron 0.36827046023759225%
pon_noaka 0.12258934829191337%
chi_high_aka 0.003014492171112624%
chi_mid_noaka 0.007033815065929456%
chi_high_noaka 0.004019322894816832%
chi_low_noaka 0.0037681152138907804%
chi_low_aka 0.000502415361852104%
chi_mid_aka 0.002009661447408416%
pon_aka 0.000753623042778156%
Value-Return Correlation 0.3358648702931774


In [65]:
perf_metrics('../training_data/ac_parallel_20250826_130523.npz')

Mean Return: 0.1826642
Return fraction: 0.2329447258269148


discard 73.24727025704038%
pass 24.474031228459257%
riichi 1.5257090338757855%
tsumo 0.2579894306750182%
ron 0.370625668075244%
chi_high_noaka 0.0811680202994975%
pon_noaka 0.032217460365031314%
chi_low_aka 0.0024974775476768463%
chi_mid_aka 0.0032467208119799002%
chi_mid_noaka 0.0032467208119799002%
chi_high_aka 0.0017482342833737923%
pon_aka 0.00024974775476768464%
Value-Return Correlation 0.3327708154335926


In [67]:
perf_metrics('../training_data/ac_parallel_20250826_133454.npz')

Mean Return: 0.18563361
Return fraction: 0.24785654793983636


discard 72.77938366567047%
pass 24.72441330655039%
riichi 1.5996276517564059%
pon_noaka 0.24986526872764683%
tsumo 0.24251629023565724%
ron 0.36744892459948064%
chi_low_noaka 0.014697956983979226%
chi_mid_noaka 0.012248297486649356%
chi_high_aka 0.007348978491989613%
chi_low_aka 0.002449659497329871%
Value-Return Correlation 0.3301529637584454


In [69]:
perf_metrics('../training_data/ac_parallel_20250826_134926.npz')

Mean Return: 0.19261858
Return fraction: 0.2380489994804461


discard 72.76604437898177%
riichi 1.6413867549067578%
ron 0.41362542686648474%
pass 24.87679510913154%
tsumo 0.25826368116541487%
chi_high_aka 0.006305265653452512%
chi_mid_noaka 0.02244674572629094%
chi_low_aka 0.004287580644347708%
pon_noaka 0.0025221062613810046%
chi_low_noaka 0.0015132637568286028%
chi_mid_aka 0.00580084440117631%
pon_aka 0.0007566318784143014%
chi_high_noaka 0.00025221062613810046%
Value-Return Correlation 0.3341974929486128


In [79]:
perf_metrics('../training_data/ac_parallel_20250826_140339.npz')

Mean Return: 0.19342147
Return fraction: 0.24034447621865038


discard 72.79866700042226%
pass 24.643298718833677%
riichi 1.665996627045833%
ron 0.42907820247333117%
tsumo 0.2700386094528684%
chi_high_noaka 0.1734517659968799%
chi_high_aka 0.006321128498428567%
chi_mid_noaka 0.005056902798742854%
chi_mid_aka 0.004045522238994283%
chi_low_aka 0.0022756062594342843%
pon_aka 0.0010113805597485707%
chi_low_noaka 0.0005056902798742854%
pon_noaka 0.0002528451399371427%
Value-Return Correlation 0.3210917004429643


In [78]:
perf_metrics('../training_data/ac_parallel_20250826_141956.npz')

Mean Return: 0.19052091
Return fraction: 0.20355767374272155


discard 72.71713394372351%
pass 24.822421169225766%
riichi 1.6637535947646003%
ron 0.416891074821913%
tsumo 0.27183025597772514%
chi_low_noaka 0.06376578901907384%
chi_high_noaka 0.017783287774243702%
chi_mid_noaka 0.016513052933226295%
chi_high_aka 0.005589033300476593%
chi_mid_aka 0.0025404696820348145%
chi_low_aka 0.0012702348410174073%
pon_aka 0.000508093936406963%
Value-Return Correlation 0.33228519527177874


In [80]:
perf_metrics('../training_data/ac_parallel_20250826_143735.npz')

Mean Return: 0.19145013
Return fraction: 0.20592708032224413


discard 72.69923460731208%
pass 24.755237724182138%
riichi 1.6994482637274246%
ron 0.4014227003657295%
tsumo 0.2775019794638481%
pon_noaka 0.12140711601543352%
chi_low_noaka 0.021617002023451935%
chi_high_noaka 0.007038093682054118%
chi_high_aka 0.005781291238830168%
chi_mid_noaka 0.005529930750185378%
chi_mid_aka 0.002764965375092689%
chi_low_aka 0.002262244397803109%
pon_aka 0.0007540814659343697%
Value-Return Correlation 0.33641485741515686


In [82]:
perf_metrics('../training_data/ac_parallel_20250826_154426.npz')

Mean Return: 0.16855012
Return fraction: 0.1811697321593904


discard 72.81426690713727%
pass 24.68679212737839%
riichi 1.6949625469589855%
ron 0.41410152019014423%
tsumo 0.27902705801479855%
chi_low_noaka 0.06645917199988839%
chi_high_noaka 0.016234301557224644%
chi_mid_noaka 0.014965996748066469%
chi_high_aka 0.007990320297696505%
chi_mid_aka 0.002282948656484716%
chi_low_aka 0.002282948656484716%
pon_aka 0.0005073219236632701%
pon_noaka 0.00012683048091581754%
Value-Return Correlation 0.3475717378280132


In [84]:
perf_metrics('../training_data/ac_parallel_20250826_160937.npz')

Mean Return: 0.16279772
Return fraction: 0.1782004147277576


discard 72.87622408424804%
pass 24.621273792260016%
riichi 1.6110772446224855%
ron 0.40765996867454973%
tsumo 0.2668090321294112%
pon_noaka 0.16243293488673238%
chi_low_noaka 0.027766313655851686%
chi_mid_noaka 0.00921336771307806%
chi_mid_aka 0.005174631181317815%
chi_high_aka 0.0040387365317602454%
chi_low_aka 0.00378631549852523%
chi_high_noaka 0.002902841882202676%
pon_aka 0.0016407367160275995%
Value-Return Correlation 0.35535795483480354
