In [54]:
import scipy

In [4]:
# import numpy as np
#
# file_path = "offline_data/20M/mahjong-offline-data-batch-0.mat"
# raw_data = scipy.io.loadmat(file_path)
# action_array = np.reshape(raw_data['A'], -1)
# action_data = raw_data['A'].T
# obs_data = raw_data['X']
#
# mask = np.bitwise_and(action_array >= 34, action_array <= 45)
# mask = np.bitwise_and(mask, action_array != 41)
# mask = np.bitwise_and(mask, action_array != 42)
# mask = np.bitwise_and(mask, action_array != 43)
# obs_data_masked = obs_data[mask]  # 93 * 34
# action_data_masked = action_data[mask]  # cate: 9
#
# import torch
# action_data_masked_t = torch.tensor(action_data_masked)

TypeError: scatter() received an invalid combination of arguments - got (), but expected one of:
 * (int dim, Tensor index, Tensor src)
 * (int dim, Tensor index, Tensor src, *, str reduce)
 * (name dim, Tensor index, Tensor src)
 * (int dim, Tensor index, Number value)
 * (int dim, Tensor index, Number value, *, str reduce)
 * (name dim, Tensor index, Number value)


In [25]:
# print(action_data_masked_t.shape)
# class_labels = [34, 35, 36, 37, 38, 39, 40, 44, 45]
# new_action_data_masked_t = torch.tensor([class_labels.index(class_id.item()) for class_id in action_data_masked]).view(-1, 1)
#
# # new_action_data_masked_t.shape
# torch.zeros((95616, 9)).scatter_(1, new_action_data_masked_t.to(torch.int64), 1)

95616

In [55]:
from torch.utils.data import Dataset, DataLoader
import numpy as np
from torch import tensor
import torch


class MeldDataset(Dataset):
    def __init__(self, root_path='offline_data/20M', index=0, oracle=False):
        file_path = f"{root_path}/mahjong-offline-data-batch-{index}.mat"
        raw_data = scipy.io.loadmat(file_path)
        action_array = np.reshape(raw_data['A'], -1)
        action_data = raw_data['A'].T
        if oracle:
            obs_data = np.concatenate([raw_data['X'], raw_data['O']], axis=1)
        else:
            obs_data = raw_data['X']

        mask = np.bitwise_and(action_array >= 34, action_array <= 45)
        mask = np.bitwise_and(mask, action_array != 41)
        mask = np.bitwise_and(mask, action_array != 42)
        mask = np.bitwise_and(mask, action_array != 43)
        self.obs_data_masked = obs_data[mask]  # 93 * 34
        self.action_data_masked = action_data[mask]  # cate: 9
        action_category = [34, 35, 36, 37, 38, 39, 40, 44, 45]
        self.action_data_masked = tensor([action_category.index(class_id.item()) for class_id in self.action_data_masked]).view(-1, 1)

        # noinspection PyTypeChecker
        self.action_data_masked_onehot = torch.zeros(self.action_data_masked.shape[0], 9).scatter_(1, self.action_data_masked, 1)

    def __len__(self):
        return len(self.action_data_masked)

    def __getitem__(self, idx):
        return tensor(self.obs_data_masked[idx], dtype=torch.float), self.action_data_masked_onehot[idx]

In [56]:
meld_dataset_valid = MeldDataset(index=39)

In [58]:
from torch import nn


class MeldModel(nn.Module):
    def __init__(self, in_channels=93):
        super().__init__()
        self.input_layer = nn.Sequential(nn.Conv1d(in_channels, 256, 3, padding=1),
                                         nn.ReLU())

        self.hidden_layer = nn.Sequential(nn.Conv1d(256, 256, 3, padding=1),
                                          nn.ReLU(),
                                          nn.Conv1d(256, 32, 3, padding=1),
                                          nn.ReLU(),
                                          nn.Flatten(),
                                          nn.Linear(32 * 34, 1024),
                                          nn.ReLU(),
                                          nn.Linear(1024, 256),
                                          nn.ReLU()
                                          )

        self.output_layer = nn.Sequential(nn.Linear(256, 9),
                                          nn.Softmax(dim=1))

    def forward(self, obs):
        return self.output_layer(self.hidden_layer(self.input_layer(obs)))

In [59]:
meld_model = MeldModel()

In [60]:
def train(model: nn.Module, train_dataset: Dataset, test_dataset: Dataset, loss_fn: nn.Module,
          optimizer: torch.optim.Optimizer,
          epoch: int = 1, train_batch_size=10, eval_interval=1000):
    for e in range(epoch):
        dataloader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)
        for iter_time, (obs, target_action) in enumerate(dataloader):
            model.train()
            action = model(obs)
            loss = loss_fn(action, target_action)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if iter_time % eval_interval == 0:
                evaluate(model, train_dataset, test_dataset, loss_fn, f"# epoch_{e}_iter_{iter_time} #")

In [73]:
def evaluate(model: nn.Module, train_dataset: Dataset, test_dataset: Dataset, loss_fn: nn.Module, log_title="",
             test_batch_size=10):
    model.eval()
    train_dataloader = DataLoader(train_dataset, batch_size=test_batch_size, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=True)
    train_total_loss = []
    test_total_loss = []

    train_accuracy_total = 0
    train_accuracy = 0
    train_response_accuracy_total = 0
    train_response_accuracy = 0

    test_accuracy_total = 0
    test_accuracy = 0
    test_response_accuracy_total = 0
    test_response_accuracy = 0

    for obs, target_action in train_dataloader:
        action = model(obs)
        loss = loss_fn(action, target_action)
        train_total_loss.append(loss.item())

        action_argmax = action.argmax(axis=1)
        target_action_argmax = target_action.argmax(axis=1)

        train_accuracy_total += len(action_argmax)
        # noinspection PyTypeChecker
        train_accuracy += torch.sum(action_argmax == target_action_argmax).item()

        response_mask = np.where(target_action_argmax != 8, True, False)
        train_response_accuracy_total += len(action_argmax[response_mask])
        # noinspection PyTypeChecker
        train_response_accuracy += torch.sum(action_argmax[response_mask] == target_action_argmax[response_mask]).item()

        print(action_argmax[response_mask], target_action_argmax[response_mask])

    for obs, target_action in test_dataloader:
        action = model(obs)
        loss = loss_fn(action, target_action)
        test_total_loss.append(loss.item())

        action_argmax = action.argmax(axis=1)
        target_action_argmax = target_action.argmax(axis=1)

        test_accuracy_total += len(action_argmax)
        # noinspection PyTypeChecker
        test_accuracy += torch.sum(action_argmax == target_action_argmax).item()

        response_mask = np.where(target_action_argmax != 8, True, False)
        test_response_accuracy_total += len(action_argmax[response_mask])
        # noinspection PyTypeChecker
        test_response_accuracy += torch.sum(action_argmax[response_mask] == target_action_argmax[response_mask]).item()

    print(f"{log_title}\n"
          f"Train Loss: {np.mean(train_total_loss)} | Test Loss: {np.mean(test_total_loss)}\n"
          f"Train Accuracy: {train_accuracy / train_accuracy_total} | Test Accuracy: {test_accuracy / test_accuracy_total}\n"
          f"Train response accuracy: {train_response_accuracy / train_response_accuracy_total} | Test response accuracy: {test_response_accuracy / test_response_accuracy_total}"
          )

In [70]:
from torch.optim import Adam

adam_optimizer = Adam(meld_model.parameters(), lr=0.0005)

for i in range(39):
    train(meld_model, MeldDataset(index=i), meld_dataset_valid, nn.CrossEntropyLoss(), adam_optimizer, epoch=1)

# epoch_0_iter_0 #
Train Loss: 2.180276962983212 | Test Loss: 2.1802498449563705
Train Accuracy: 0.8343687248995983 | Test Accuracy: 0.8351464874514334

# epoch_0_iter_1000 #
Train Loss: 1.5375960581074746 | Test Loss: 1.5368043082258374
Train Accuracy: 0.8343687248995983 | Test Accuracy: 0.8351464874514334

# epoch_0_iter_2000 #
Train Loss: 1.5375751412655365 | Test Loss: 1.5368043089268477
Train Accuracy: 0.8343687248995983 | Test Accuracy: 0.8351464874514334

# epoch_0_iter_3000 #
Train Loss: 1.5375821132511305 | Test Loss: 1.5368043076124536
Train Accuracy: 0.8343687248995983 | Test Accuracy: 0.8351464874514334

# epoch_0_iter_4000 #
Train Loss: 1.5375751413154044 | Test Loss: 1.5368043088392214
Train Accuracy: 0.8343687248995983 | Test Accuracy: 0.8351464874514334

# epoch_0_iter_5000 #
Train Loss: 1.5375821130267249 | Test Loss: 1.5368043081882834
Train Accuracy: 0.8343687248995983 | Test Accuracy: 0.8351464874514334

# epoch_0_iter_6000 #
Train Loss: 1.5375821132012626 | Test Lo

KeyboardInterrupt: 

In [74]:
evaluate(meld_model, MeldDataset(), meld_dataset_valid, nn.CrossEntropyLoss())

tensor([], dtype=torch.int64) tensor([], dtype=torch.int64)
tensor([8, 8]) tensor([3, 3])
tensor([8]) tensor([3])
tensor([8, 8]) tensor([3, 2])
tensor([8, 8]) tensor([3, 3])
tensor([8, 8]) tensor([3, 3])
tensor([], dtype=torch.int64) tensor([], dtype=torch.int64)
tensor([8]) tensor([3])
tensor([], dtype=torch.int64) tensor([], dtype=torch.int64)
tensor([8]) tensor([3])
tensor([8]) tensor([3])
tensor([8]) tensor([3])
tensor([], dtype=torch.int64) tensor([], dtype=torch.int64)
tensor([8]) tensor([4])
tensor([], dtype=torch.int64) tensor([], dtype=torch.int64)
tensor([8]) tensor([1])
tensor([8, 8]) tensor([3, 0])
tensor([8, 8, 8]) tensor([6, 6, 3])
tensor([8]) tensor([3])
tensor([8]) tensor([4])
tensor([8, 8]) tensor([1, 3])
tensor([8, 8, 8]) tensor([0, 3, 3])
tensor([8]) tensor([3])
tensor([8]) tensor([3])
tensor([8, 8, 8]) tensor([0, 2, 0])
tensor([8, 8]) tensor([3, 0])
tensor([8]) tensor([0])
tensor([8, 8, 8]) tensor([0, 3, 1])
tensor([8]) tensor([3])
tensor([], dtype=torch.int64) tens

KeyboardInterrupt: 

In [75]:
torch.save(meld_model.state_dict(), "weights/meld_model_fail(overfit).pt")