In [1]:
# =====================================================
# One-runtime AlphaZero 立体四目  学習ループ
#   ・Google Drive にモデル／データ／state を直保存
#   ・途中サイクルから再開
# =====================================================
import os, json, time, shutil
from pathlib import Path
from google.colab import drive
from tensorflow.keras import backend as K

# ---------- 0) Google Drive ----------
drive.mount('/content/drive', force_remount=False)

ROOT = Path('/content/drive/MyDrive/azero_3d')
MODEL_DIR = ROOT / 'model'
DATA_DIR  = ROOT / 'data'
MODEL_DIR.mkdir(parents=True, exist_ok=True)
DATA_DIR.mkdir(exist_ok=True)

# ---------- 1) コード置き場に移動 ----------
%cd /content/drive/MyDrive/sample/3dyonnmoku/train_code_new
import sys; sys.path.append(os.getcwd())

# ---------- 2) ユーザモジュール ----------
from dual_network      import dual_network
from self_play         import self_play   #テスト
from train_network     import train_network
from evaluate_network  import evaluate_network
from evaluate_best_player import evaluate_best_player

BEST_PATH  = MODEL_DIR / 'best.h5'
STATE_JSON = ROOT / 'state.json'

# ---------- 3) 初期 best.h5 ----------
if not BEST_PATH.exists():
    dual_network()
else:
    print("🔄  best.h5 を再利用")

# ---------- 4) 再開ポイント ----------
start_cycle = 0
if STATE_JSON.exists():
    start_cycle = json.load(STATE_JSON.open())['cycle'] + 1
    print(f"▶ 再開: cycle {start_cycle}")

TOTAL_CYCLES = 1          #テスト 10→1

# ---------- 5) メインループ ----------
for cycle in range(start_cycle, TOTAL_CYCLES):
    print(f"\n===== CYCLE {cycle+1}/{TOTAL_CYCLES} =====")
    self_play()                   # ① 自己対戦
    train_network()               # ② 再学習
    updated = evaluate_network()  # ③ best 更新判定
    if updated:
        evaluate_best_player()    # ④ 任意テスト

    # ⑤ latest バックアップ
    shutil.copy('./model/latest.h5', MODEL_DIR / f'latest_{cycle:02d}.h5')
    if updated:
        shutil.copy('./model/best.h5', BEST_PATH)

    # ⑥ state 更新
    json.dump({'cycle': cycle,
               'timestamp': time.strftime('%Y-%m-%d %H:%M:%S')},
              STATE_JSON.open('w'))

    # ⑦ メモリ解放
    K.clear_session()

print("\n🎉 すべて完了  •  best =", BEST_PATH)


Mounted at /content/drive
/content/drive/MyDrive/sample/3dyonnmoku/train_code_new
🔄  best.h5 を再利用

===== CYCLE 1/1 =====




Benchmark: 14.49s / game  •  RAM 14.4%
SelfPlay 300/300
Train 100/100








Evaluate 100/100
Average Point: 0.13

🎉 すべて完了  •  best = /content/drive/MyDrive/azero_3d/model/best.h5


In [4]:
import pickle
from pathlib import Path
import numpy as np

# 1. データ点検
hist = pickle.load(open(sorted(Path('data').glob('*.history'))[-1], 'rb'))
print(len(hist), 'samples')

# Unpack hist[0] based on its structure
p0, pol, v0 = hist[0][0], hist[0][1], hist[0][2]

print(hex(p0[0]), pol[:8], v0) # Access the first element of p0

# 2. tensor 変換確認
# Assuming bitboards_to_tensor_batch is defined elsewhere and takes two numpy arrays
# x0 = bitboards_to_tensor_batch(np.array([p0], 'uint64'),
#                                np.array([hist[0][0][1]], 'uint64'))
# print('tensor non-zero=', x0.sum())

10858 samples
Structure of hist[0]: [[0, 0], [np.float32(0.6530612), np.float32(0.0), np.float32(0.0), np.float32(0.0), np.float32(0.3469388), np.float32(0.0), np.float32(0.0), np.float32(0.0), np.float32(0.0), np.float32(0.0), np.float32(0.0), np.float32(0.0), np.float32(0.0), np.float32(0.0), np.float32(0.0), np.float32(0.0)], -1]
Type of hist[0]: <class 'list'>


In [None]:
# best.h5のアップロード
from google.colab import files
uploaded = files.upload()
# modelフォルダに移動
!mkdir model
!mv best.h5 model