In [None]:
from main import *

warnings.simplefilter('ignore')

model_path: Optional[Path] = Path('../models/latest.ckpt')
onnx_model_path: Path = Path('../models/model.onnx')
data_path: Path = Path('../data')
num_simulation: int = 500
num_iter: int = 100
num_worker = os.cpu_count()
print(data_path)
module = LightingModule()
policy = np.load(data_path / 'policy.npy')
states = np.load(data_path / 'states.npy').astype(np.float32)
values = np.load(data_path / 'values.npy').astype(np.float32)
train_p, val_p, train_s, val_s, train_v, val_v = train_test_split(
    policy, states, values, test_size=0.2, shuffle=True, random_state=42
)
train_dataset = Dataset(train_s, train_p, train_v)
val_dataset = Dataset(val_s, val_p, val_v)
train_dataloder = DataLoader(train_dataset, batch_size=256, shuffle=True)
val_dataloder = DataLoader(val_dataset, batch_size=256)
trainer = pl.Trainer(
    min_epochs=10,
    max_epochs=100,
    log_every_n_steps=10,
    logger=[],
    callbacks=[
        EarlyStopping(monitor='val_loss'),
    ],
    checkpoint_callback=False,
)
trainer.fit(module, train_dataloder, val_dataloder)
trainer.save_checkpoint(model_path)

In [None]:
from main import *
lightningmodule = LightingModule()
dummy_input = torch.randn(1, 2, 8, 8)

In [None]:
class Model(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.height = self.width = 8
        self.ouput_size = 8 * 8
        self.dropout_late = 0.5
        in_channels = 2
        channels = 16

        self.relu = nn.ReLU()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels, channels, 3, padding=1),
            nn.BatchNorm2d(channels),
            self.relu,
            nn.Conv2d(channels, channels, 3, padding=1),
            nn.BatchNorm2d(channels),
            self.relu,
            nn.Conv2d(channels, channels, 3),
            nn.BatchNorm2d(channels),
            self.relu,
            nn.Conv2d(channels, channels, 3),
            nn.BatchNorm2d(channels),
            self.relu,
        )

        self.fc_input = channels * (self.width - 4) * (self.height - 4)
        self.dropout = nn.Dropout(self.dropout_late, inplace=True)
        self.layer2 = nn.Sequential(
            nn.Linear(self.fc_input, 512),
            nn.BatchNorm1d(512),
            self.relu,
            self.dropout,
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            self.relu,
            self.dropout,
        )

        self.fc3 = nn.Linear(256, 64)
        self.fc4 = nn.Linear(256, 1)
        self.softmax = nn.Softmax(dim=1)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.layer1(x)
        x = x.view(-1, self.fc_input)
        x = self.layer2(x)
        policy = self.fc3(x)
        value = self.fc4(x)
        return self.softmax(policy), self.tanh(value)

In [None]:
torch.backends.quantized.supported_engines

In [None]:
model_i8 = torch.quantization.quantize_dynamic(Model())

In [None]:
lightningmodule.to_onnx('../models/model_i8.onnx', dummy_input, export_params=True)

In [None]:
from main import *

In [None]:

policy = np.load(data_path / 'policy.npy')
states = np.load(data_path / 'states.npy').astype(np.float32)
values = np.load(data_path / 'values.npy').astype(np.float32)
train_p, val_p, train_s, val_s, train_v, val_v = train_test_split(
    policy, states, values, test_size=0.2, shuffle=True, random_state=42
)
train_dataset = Dataset(train_s, train_p, train_v)
val_dataset = Dataset(val_s, val_p, val_v)
train_dataloder = DataLoader(train_dataset, batch_size=256, shuffle=True)
val_dataloder = DataLoader(val_dataset, batch_size=256)
trainer = pl.Trainer(
    min_epochs=10,
    max_epochs=100,
    log_every_n_steps=10,
    logger=[],
    callbacks=[
        EarlyStopping(monitor='val_loss'),
    ],
    checkpoint_callback=False,
)
trainer.fit(module, train_dataloder, val_dataloder)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class OthelloNNet(nn.Module):
    def __init__(self):
        # game params
        self.board_x= self.board_y = 8
        self.action_size = 65
        num_channels = 512
        self.num_channels = num_channels
        self.dropout = 0.3

        super(OthelloNNet, self).__init__()
        self.conv1 = nn.Conv2d(1, num_channels, 3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(num_channels, num_channels, 3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(num_channels, num_channels, 3, stride=1)
        self.conv4 = nn.Conv2d(num_channels, num_channels, 3, stride=1)

        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)
        self.bn3 = nn.BatchNorm2d(num_channels)
        self.bn4 = nn.BatchNorm2d(num_channels)

        self.fc1 = nn.Linear(num_channels*(self.board_x-4)*(self.board_y-4), 1024)
        self.fc_bn1 = nn.BatchNorm1d(1024)

        self.fc2 = nn.Linear(1024, 512)
        self.fc_bn2 = nn.BatchNorm1d(512)

        self.fc3 = nn.Linear(512, self.action_size)

        self.fc4 = nn.Linear(512, 1)

    def forward(self, s):
        #                                                           s: batch_size x board_x x board_y
        s = s.view(-1, 1, self.board_x, self.board_y)                # batch_size x 1 x board_x x board_y
        s = F.relu(self.bn1(self.conv1(s)))                          # batch_size x num_channels x board_x x board_y
        s = F.relu(self.bn2(self.conv2(s)))                          # batch_size x num_channels x board_x x board_y
        s = F.relu(self.bn3(self.conv3(s)))                          # batch_size x num_channels x (board_x-2) x (board_y-2)
        s = F.relu(self.bn4(self.conv4(s)))                          # batch_size x num_channels x (board_x-4) x (board_y-4)
        s = s.view(-1, self.num_channels*(self.board_x-4)*(self.board_y-4))

        s = F.dropout(F.relu(self.fc_bn1(self.fc1(s))), p=self.dropout, training=self.training)  # batch_size x 1024
        s = F.dropout(F.relu(self.fc_bn2(self.fc2(s))), p=self.dropout, training=self.training)  # batch_size x 512

        pi = self.fc3(s)                                                                         # batch_size x action_size
        v = self.fc4(s)                                                                          # batch_size x 1

        return F.softmax(pi, dim=1), torch.tanh(v)

In [None]:
othellonet = OthelloNNet()
othellonet.load_state_dict(torch.load('/Users/near129/Downloads/8x8_100checkpoints_best.pth.tar', map_location=torch.device('cpu'))['state_dict'])

In [None]:
torch.onnx.export(othellonet, torch.randn((1, 8, 8)), 'othellonet.onnx')

In [None]:
othellonet.eval()
othellonet(torch.randn((1, 8, 8)))[0].shape

In [None]:
import onnx

In [None]:
model = onnx.load('othellonet.onnx')
onnx.checker.check_model(model)

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd

data_path = Path('../data')
policy = np.load(data_path / 'policy.npy')
states = np.load(data_path / 'states.npy').astype(np.float32)
values = np.load(data_path / 'values.npy').astype(np.float32)
print(len(np.unique(states, axis=0)))
policy = [
    np.rot90(policy.reshape(-1, 8, 8), i, (1, 2)).reshape(-1, 64)
    for i in range(4)
]
states = [np.rot90(states, i, (2, 3)) for i in range(4)]
values = [values for _ in range(4)] * 2
policy += [np.fliplr(p) for p in policy]
states += [np.fliplr(s) for s in states]
policy = np.concatenate(policy)
states = np.concatenate(states)
values = np.concatenate(values)
print(len(np.unique(states, axis=0)))

In [None]:
states.shape, policy.shape, values.shape

In [None]:
df = pd.DataFrame()
df['values'] = values
df['states']= list(states)
df['policy']= list(policy)
df.shape, df.head()

In [None]:
x = np.arange(8).reshape(4, 2)
x

In [None]:
x1 = np.rot90(x)
x1

In [None]:
np.fliplr(x1)

In [None]:
x1