docker run -p 9000:8080 -p 6006:6006 asia-docker.pkg.dev/colab-images/public/runtime

In [1]:
# Initialize
!git clone --recursive https://github.com/mitosagi/puzzdra-nnsolver
# %cd /kaggle/working/puzzdra-nnsolver
%cd /content/puzzdra-nnsolver
!pip install --log=pip_log -e .
!pip install torchsummary

Cloning into 'puzzdra-nnsolver'...
remote: Enumerating objects: 299, done.[K
remote: Counting objects: 100% (54/54), done.[K
remote: Compressing objects: 100% (54/54), done.[K
remote: Total 299 (delta 31), reused 0 (delta 0), pack-reused 245[K
Receiving objects: 100% (299/299), 8.31 MiB | 24.74 MiB/s, done.
Resolving deltas: 100% (173/173), done.
Submodule 'extern/pybind11' (https://github.com/pybind/pybind11) registered for path 'extern/pybind11'
Cloning into '/content/puzzdra-nnsolver/extern/pybind11'...
remote: Enumerating objects: 27236, done.        
remote: Counting objects: 100% (8/8), done.        
remote: Compressing objects: 100% (6/6), done.        
remote: Total 27236 (delta 1), reused 4 (delta 1), pack-reused 27228        
Receiving objects: 100% (27236/27236), 10.39 MiB | 18.15 MiB/s, done.
Resolving deltas: 100% (19162/19162), done.
Submodule path 'extern/pybind11': checked out '8de7772cc72daca8e947b79b83fea46214931604'
/content/puzzdra-nnsolver
Obtaining file:///con

In [2]:
import random
import numpy as np
from puzzpy import PuzzTable

drop_color = 3
board_width = 6
board_height = 5

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
    BLACK = '\033[30m'
    RED = '\033[31m'
    GREEN = '\033[32m'
    YELLOW = '\033[33m'
    BLUE = '\033[34m'
    MAGENTA = '\033[35m'
    CYAN = '\033[36m'

def npUint8(array):
    return np.array(array, dtype=np.uint8)

class PuzzBoard():
    def reset(self):
        self.prev_action = 255

        while True:
            self.table = PuzzTable("".join([str(random.randrange(drop_color)) for i in range(board_width*board_height)]), random.randrange(board_width), random.randrange(board_height), 50) # n色陣　操作時間m秒
            if self.table.eval_otoshi() == 0:
                break

        return npUint8(self.table.get_table())

    def step(self):
        next_tables = self.table.next_tables()
        valid_actions = [action for action, table in enumerate(next_tables) if table.get_table()[0][0] != 127 and abs(action - self.prev_action) != 2]
        self.prev_action = random.choice(valid_actions)
        self.table = next_tables[self.prev_action]

        return self.prev_action, npUint8([(npUint8(table.get_table()) if action in valid_actions else np.zeros_like(npUint8(table.get_table()))) for action, table in enumerate(next_tables)])
    def render(self):
        tcolor = [bcolors.RED, bcolors.BLUE, bcolors.GREEN, bcolors.MAGENTA, bcolors.YELLOW, bcolors.BLACK]
        start = self.table.get_XY_as_table()
        table = self.table.get_table()
        for i in range(board_height):
            for j in range(board_width):
                if start[i][j] == 1:
                    print(tcolor[table[i][j]-1]  +  bcolors.UNDERLINE + "●" + bcolors.ENDC, end='')
                else:
                    print(tcolor[table[i][j]-1]  + "●" + bcolors.ENDC, end='')
            print('')

In [3]:
env = PuzzBoard()
obs = env.reset()
env.render()

n_steps = 3

for step in range(n_steps):
    print("Step {}".format(step + 1))
    action, obs = env.step()
    with np.printoptions(threshold=np.inf):
        print("action: ", action)
#         print(obs)
        env.render()

[32m●[0m[31m●[0m[31m●[0m[34m●[0m[31m●[0m[32m●[0m
[34m●[0m[34m●[0m[31m●[0m[31m●[0m[34m●[0m[34m●[0m
[31m●[0m[34m●[0m[32m●[0m[32m●[0m[34m●[0m[31m●[0m
[31m●[0m[31m●[0m[32m●[0m[34m●[0m[32m●[0m[32m[4m●[0m
[34m●[0m[32m●[0m[31m●[0m[34m●[0m[32m●[0m[32m●[0m
Step 1
action:  3
[32m●[0m[31m●[0m[31m●[0m[34m●[0m[31m●[0m[32m●[0m
[34m●[0m[34m●[0m[31m●[0m[31m●[0m[34m●[0m[34m●[0m
[31m●[0m[34m●[0m[32m●[0m[32m●[0m[34m●[0m[32m[4m●[0m
[31m●[0m[31m●[0m[32m●[0m[34m●[0m[32m●[0m[31m●[0m
[34m●[0m[32m●[0m[31m●[0m[34m●[0m[32m●[0m[32m●[0m
Step 2
action:  3
[32m●[0m[31m●[0m[31m●[0m[34m●[0m[31m●[0m[32m●[0m
[34m●[0m[34m●[0m[31m●[0m[31m●[0m[34m●[0m[32m[4m●[0m
[31m●[0m[34m●[0m[32m●[0m[32m●[0m[34m●[0m[34m●[0m
[31m●[0m[31m●[0m[32m●[0m[34m●[0m[32m●[0m[31m●[0m
[34m●[0m[32m●[0m[31m●[0m[34m●[0m[32m●[0m[32m●[0m
Step 3
action:  3
[32m●[0m[31m●[0

In [11]:
def make_data(num_step, env):
    start = env.reset()

    sample_step = random.randrange(1, num_step + 1)

    for i in range(sample_step):
        answer, obs = env.step()
    sample_obs = obs

    for i in range(num_step - sample_step):
        answer, obs = env.step()
    end = obs[answer]

    return np.concatenate([npUint8([start]),sample_obs,npUint8([end])]), answer
def make_data_fast(length):
    env = PuzzBoard()
    return [make_data(50, env) for i in range(length)]

from multiprocessing import Pool
process = 10
p = Pool(process)
%time result = p.map(make_data_fast, [1_000_000 // process  for i in range(process)]) # 100万データ生成に5分22秒かかる

import itertools
sample_data = npUint8([sample[0] for sample in itertools.chain(*result)])
sample_labels = npUint8([sample[1] for sample in itertools.chain(*result)])
np.save('sample_data_50', sample_data)
np.save('sample_labels_50', sample_labels)

CPU times: user 3.03 s, sys: 627 ms, total: 3.65 s
Wall time: 5min 22s


In [None]:
sample_data = np.load('/kaggle/input/puzz-dataset/sample_data.npy')
sample_labels = np.load('/kaggle/input/puzz-dataset/sample_labels.npy')
from torch.utils.data import Dataset
class PuzzDataSet(Dataset):
    def __init__(self, x, y):
        self.data = np.array(x, dtype=np.float32)
        self.labels = np.array(np.identity(4)[y], dtype=np.float32) # 4 for actions

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

dataset = PuzzDataSet(sample_data[0:len(sample_data) - 1_000],sample_labels[0:len(sample_labels) - 1_000])
print("dataset: ", dataset.__len__())
print("dataset: ", dataset.__getitem__(1))
testset = PuzzDataSet(sample_data[len(sample_data) - 1_000:],sample_labels[len(sample_labels) - 1_000:])
print("testset: ", testset.__len__())

dataset:  999000
dataset:  (array([[[1., 3., 1., 3., 2., 1.],
        [1., 3., 3., 1., 3., 2.],
        [2., 1., 1., 3., 1., 3.],
        [1., 1., 2., 3., 2., 2.],
        [3., 2., 3., 1., 2., 3.]],

       [[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]],

       [[1., 3., 1., 3., 2., 1.],
        [1., 3., 3., 1., 3., 2.],
        [2., 1., 1., 3., 1., 3.],
        [1., 1., 2., 3., 2., 2.],
        [3., 2., 3., 1., 3., 2.]],

       [[1., 3., 1., 3., 2., 1.],
        [1., 3., 3., 1., 3., 2.],
        [2., 1., 1., 3., 1., 3.],
        [1., 1., 2., 3., 2., 3.],
        [3., 2., 3., 1., 2., 2.]],

       [[1., 3., 1., 3., 2., 1.],
        [1., 3., 3., 1., 3., 2.],
        [1., 2., 1., 3., 1., 3.],
        [1

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
from torchvision.models import resnet18, ResNet18_Weights

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        num_input_channel = 6
        num_classes = 4
        resnet = resnet18(weights=ResNet18_Weights.DEFAULT)
        resnet.conv1 = nn.Conv2d(num_input_channel, 64, kernel_size=7, stride=2, padding=3,bias=False)
        resnet.fc = nn.Linear(512, num_classes)
        self.resnet = resnet
    def forward(self, x):
        x = F.interpolate(x, size=None, scale_factor=2, mode='nearest')
        x = self.resnet(x)
        return x

model = Net()
for i, param in enumerate(model.parameters()):
#     param.requires_grad = False if len(param) != 512 and len(param) != 4 else True
    param.requires_grad = True if i >= 45 + 15 else False
    print(len(param), param.requires_grad)

# model = CNN()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("device: ", device)
model = model.to(device)
print(summary(model, (6, 5, 6)))

train_loader = torch.utils.data.DataLoader(dataset, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=200, shuffle=True)

criterion = nn.MSELoss()
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.SGD(model.parameters(), lr=0.0005)

from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

import datetime

def train(epoch):
    total_loss = 0
    total_size = 0
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
#         print(output)
#         output = torch.flatten(output)
        loss = criterion(output, target)
        total_loss += loss.item()
        total_size += data.size(0)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            now = datetime.datetime.now()
            print('[{}] Train Epoch: {} [{}/{} ({:.0f}%)]\tAverage loss: {:.6f}'.format(
                now,
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), total_loss / total_size))
            writer.add_scalar('Loss/train', total_loss / total_size, epoch)
            test(epoch)
def test(epoch):
    total_loss = 0
    total_size = 0
    model.eval()
    for batch_idx, (data, target) in enumerate(test_loader):
        data, target = data.to(device), target.to(device)
        output = model(data)
#         output = torch.flatten(output)
        loss = criterion(output, target)
        total_loss += loss.item()
        total_size += data.size(0)
        if batch_idx % 100 == 0:
            now = datetime.datetime.now()
            print('[{}] Test Epoch: {} [{}/{} ({:.0f}%)]\tAverage loss: {:.6f}'.format(
                now,
                epoch, batch_idx * len(data), len(test_loader.dataset),
                100. * batch_idx / len(test_loader), total_loss / total_size))
            writer.add_scalar('Loss/test', total_loss / total_size, epoch)
            break

for epoch in range(1, 1 + 1):
    train(epoch)
    test(epoch)

64 False
64 False
64 False
64 False
64 False
64 False
64 False
64 False
64 False
64 False
64 False
64 False
64 False
64 False
64 False
128 False
128 False
128 False
128 False
128 False
128 False
128 False
128 False
128 False
128 False
128 False
128 False
128 False
128 False
128 False
256 False
256 False
256 False
256 False
256 False
256 False
256 False
256 False
256 False
256 False
256 False
256 False
256 False
256 False
256 False
512 False
512 False
512 False
512 False
512 False
512 False
512 False
512 False
512 False
512 False
512 False
512 False
512 True
512 True
512 True
4 True
4 True
device:  cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1             [-1, 64, 5, 6]          18,816
       BatchNorm2d-2             [-1, 64, 5, 6]             128
              ReLU-3             [-1, 64, 5, 6]               0
         MaxPool2d-4             [-1, 64, 3, 3]               0
     

KeyboardInterrupt: 

In [None]:
%load_ext tensorboard
%tensorboard --logdir logs