In [1]:
import sys
import os
build_path = os.path.join(os.getcwd(), "..", "build")
!cmake -B {build_path} -DPYTHON_EXECUTABLE={sys.executable} -Wno-dev ..
!cmake --build {build_path}
if build_path not in sys.path:
    sys.path.append(build_path)
import pypumila
# to reload pypumila, restart python kernel.
# importlib.reload(pypumila) <- does not work

-- Eigen3 found
-- sdl2 and SDL2_ttf found
-- Accelerate found
-- Fetching pybind11 source...
-- pybind11 v2.11.1 
-- Fetching BS::thread_pool source...
-- Fetching cli11 source...
-- Fetching roboto font...
-- Configuring done (1.5s)
-- Generating done (0.0s)
-- Build files have been written to: /Users/kou/projects/pumila/build
[ 57%] Built target pumila
[ 92%] Built target pypumila
[ 92%] Built target CLI11
[100%] Built target pumila-sim


In [2]:
os.chdir(build_path)
sim = pypumila.GameSim()

In [3]:
import numpy as np

In [4]:
from tqdm.notebook import tqdm

In [5]:
import random

In [6]:
models = []
models_first = []
rewards = []
scores = []

In [None]:
for i, hidden in enumerate(range(200, 1200, 200)):
    print(f"hidden = {int(hidden)}")
    models.append(pypumila.Pumila8.make_shared(int(hidden)))
    models_first.append(models[i].copy())
    it = 100000
    rewards.append([])
    scores.append([])
    with tqdm(total=it) as pbar:
        while pbar.n < it:
            sim = pypumila.GameSim()
            rnd_p = random.random()
            # print(rnd_p)
            for e in range(50):
                sim.step()
                pbar.update()
                a = models[i].get_action_rnd(sim.field, rnd_p)
                models[i].learn_step(sim.field)
                # diff = model.mean_diff
                sim.put(pypumila.actions[a])
                while not sim.is_free_phase():
                    sim.step()
                reward = models[i].calc_reward(sim.field)
                rewards[i].append(reward)
                # diffs.append(diff)
                # このdiffはだいぶ前の学習結果で使ったdiffなので参考にならない
                #print(f"a = {a}, reward = {reward}, chain = {sim.field.prev_chain_num}, diff = {diff}")
            # print("reset sim")
            scores[i].append(sim.field.total_score)
    models[i].save_file(f"pumila8_{i}")

hidden = 200


  0%|          | 0/100000 [00:00<?, ?it/s]

In [None]:
# del models[-1]
# del models_first[-1]
# del rewards[-1]
# del scores[-1]

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(np.array(rewards).T, label=[str(int(h)) for h in np.geomspace(1, 1000, 7)])

In [None]:
ar = np.array(rewards)
plt.plot(
    np.array([[np.average(ar[j, i:i+100]) for i in range(len(rewards[j])-100)] for j in range(7)]).T,
    label=[str(int(h)) for h in np.geomspace(1, 1000, 7)]
)
plt.legend()

In [None]:
ar = np.array(rewards)
plt.plot(
    np.array([[np.average(ar[j, i:i+1000]) for i in range(len(rewards[j])-1000)] for j in range(7)]).T,
    label=[str(int(h)) for h in np.geomspace(1, 1000, 7)]
)
plt.legend()

In [None]:
plt.plot(np.array(scores).T, label=[str(int(h)) for h in np.geomspace(1, 1000, 7)])
plt.legend()

In [None]:
ar = np.array(scores)
plt.plot(
    np.array([[np.average(ar[j, i:i+100]) for i in range(len(scores[j])-100)] for j in range(7)]).T,
    label=[str(int(h)) for h in np.geomspace(1, 1000, 7)]
)
plt.legend()

In [None]:
models = []
for i in range(7):
    models.append(pypumila.Pumila7.make_shared(1))
    models[i].load_file(f"pumila7_{i}")

In [None]:
sim = pypumila.GameSim()
win = pypumila.Window(sim)
win.step(False)

In [None]:
while True:
    a = models[2].get_action(sim)
    sim.put(pypumila.actions[a])
    # sim.soft_put(pypumila.actions[a])
    while sim.is_free_phase():
        win.step(True)
    while not sim.is_free_phase():
        win.step(True)
    # for i in range(15):
    #     win.step(True)

In [None]:
win.quit()

In [None]:
sim = pypumila.GameSim()
win = pypumila.Window(sim)
prev_r = 0
while True:
    r = pypumila.Pumila7.calc_reward(sim.field)
    if r != prev_r:
        print(r)
    prev_r = r
    win.step(True)