In [1]:
import os
os.environ["MKL_NUM_THREADS"] = "1" 
os.environ["NUMEXPR_NUM_THREADS"] = "1" 
os.environ["OMP_NUM_THREADS"] = "1" 

from train import MCTS_HYPERPARAMETERS, load_from_checkpoint, collect_episode, train, rotate_training_examples
import torch
from resnet import ResNet2Heads
from utils import input_to_tensor_3d, input_to_tensor, input_to_tensor_scalar
import torch.multiprocessing as mp
from train import save_checkpoint, load_from_checkpoint, MetricsHistory, test_network, GameReplayMemory
import warnings
import matplotlib.pyplot as plt

In [2]:
MODEL_TO_LOAD = "" # place filename of checkpoint here, otherwise leave empty
LOAD_REPLAY_MEMORY = True
PLOT_EVERY = 25
WARMUP_AMOUNT = 10

# HYPERPARAMETERS (will be ignored if loading from a checkpoint)
hyperparameters = MCTS_HYPERPARAMETERS() # use kwargs to specify non-default values


In [3]:
if MODEL_TO_LOAD:
    _, model, optimizer, hyperparameters, metrics_history, replay_memory, run_tag = load_from_checkpoint(MODEL_TO_LOAD, ResNet2Heads, load_replay_memory=LOAD_REPLAY_MEMORY)
    metrics_history.figs = [plt.figure() for _ in range(6)]
else:
    hyperparameters.replay_memory_size = 1000
    hyperparameters.minibatch_size = 128
    hyperparameters.weight_decay = 0.01
    hyperparameters.num_mcts_train_evals = 50
    hyperparameters.num_episodes = 100000
    hyperparameters.mcts_c_puct = 1
    hyperparameters.minibatches_per_episode = 16
    hyperparameters.lr = 5e-5
    hyperparameters.mcts_tau = 0.95
    hyperparameters.checkpoint_every = 250
    hyperparameters.c_prob = 50
    model = ResNet2Heads()
    replay_memory = GameReplayMemory(hyperparameters.replay_memory_size)
    optimizer = torch.optim.AdamW(model.parameters(), lr=hyperparameters.lr, weight_decay=hyperparameters.weight_decay)
    metrics_history = MetricsHistory()
    run_tag = 'mcresnet1283'     
    episode = 0
    
    
NUM_PROCS = 7

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

In [4]:
collect_episode(model, hyperparameters, input_to_tensor_scalar)

In [5]:
def enque_and_train(results):
    torch.set_num_threads(4)
    
    training_examples, reward, moves, high_square, pid = results
    training_examples = rotate_training_examples(training_examples)
    replay_memory.insert(list(training_examples))
    
    if replay_memory.size() >= WARMUP_AMOUNT:
        cum_vl, cum_pl, cum_tl = 0.0, 0.0, 0.0
        for i in range(hyperparameters.minibatches_per_episode):
            value_loss, prob_loss, total_loss = train(replay_memory.sample(hyperparameters.minibatch_size), model, optimizer, tensor_conversion_fn=input_to_tensor_scalar, c_prob=hyperparameters.c_prob)
            cum_vl += value_loss
            cum_pl += prob_loss
            cum_tl += total_loss
        cum_vl /= hyperparameters.minibatches_per_episode
        cum_pl /= hyperparameters.minibatches_per_episode
        cum_tl /= hyperparameters.minibatches_per_episode
        new_best = metrics_history.add_history({
            'reward': reward,
            'game_moves': moves,
            'prob_loss': cum_pl,
            'value_loss': cum_vl,
            'total_loss': cum_tl,
            'high_square': high_square
        })
        if metrics_history.episodes % PLOT_EVERY == 0:
            metrics_history.plot_history(window_size=100)
        
        if new_best:
            print(f'*** NEW BEST REWARD: {metrics_history.best_result} ***')
        
        print(f'[EPISODE {metrics_history.episodes}] Total Loss: {cum_tl}, Prob Loss {cum_pl}, Value Loss {cum_vl}, Reward {reward}, Moves: {moves}, Highest Square: {high_square}, PID: {pid}')
        if metrics_history.episodes % hyperparameters.checkpoint_every == 0:
            print('Saving model checkpoint...')
            save_checkpoint(metrics_history.episodes, model, optimizer, hyperparameters, metrics_history, replay_memory, run_tag=run_tag, save_replay_memory=True)
            print('Saved model checkpoint!')
    else:
        print(f'Replay memory size not large enough, {replay_memory.size()} < {WARMUP_AMOUNT}')
with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    with mp.Pool(NUM_PROCS) as p:
        results = []
        for n in range(metrics_history.episodes, hyperparameters.num_episodes):
            results.append(p.apply_async(collect_episode, (model, hyperparameters, input_to_tensor_scalar, ), callback=enque_and_train, error_callback=print))
        for r in results:
            r.wait()

[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
  return F.conv2d(input, weight, bias, self.stride,
[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
  return F.conv2d(input, weight, bias, self.stride,
[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
  return F.conv2d(input, weight, bias, self.stride,
[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
  return F.conv2d(input, weight, bias, self.stride,
[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
  return F.conv2d(input, weight, bias, self.stride,
[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
  return F.conv2d(input, weight, bias, self.stride,
[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
  return F.conv2d(input, weight, bias, self.stride,


Replay memory size not large enough, 1 < 10
Replay memory size not large enough, 2 < 10
Replay memory size not large enough, 3 < 10
Replay memory size not large enough, 4 < 10
Replay memory size not large enough, 5 < 10
Replay memory size not large enough, 6 < 10
Replay memory size not large enough, 7 < 10
Replay memory size not large enough, 8 < 10
Replay memory size not large enough, 9 < 10


Exception in thread Thread-7:
Traceback (most recent call last):
  File "/Users/marshingjay/opt/anaconda3/envs/python38/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/Users/marshingjay/opt/anaconda3/envs/python38/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/marshingjay/opt/anaconda3/envs/python38/lib/python3.8/multiprocessing/pool.py", line 592, in _handle_results
    cache[job]._set(i, obj)
  File "/Users/marshingjay/opt/anaconda3/envs/python38/lib/python3.8/multiprocessing/pool.py", line 776, in _set
    self._callback(self._value)
  File "/var/folders/bd/sfd5qw2j27v1b3ht5zx176l00000gn/T/ipykernel_37854/531245689.py", line 11, in enque_and_train
  File "/Users/marshingjay/Repos/2048_dqn/train.py", line 165, in train
    obs = tensor_conversion_fn(obs)
  File "/Users/marshingjay/Repos/2048_dqn/utils.py", line 28, in input_to_tensor_scalar
    torch.tensor(board, dtype=torch.float)
ValueEr

AssertionError: Cannot have cache with result_hander not alive

In [None]:
# test_network(model, hyperparameters, input_to_tensor, debug_print=True)