In [None]:
!pip install torch

In [3]:
from google.colab import files
uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving X_test.pt to X_test.pt
Saving X_train.pt to X_train.pt
Saving X_valid.pt to X_valid.pt
Saving y_test.pt to y_test.pt
Saving y_train.pt to y_train.pt
Saving y_valid.pt to y_valid.pt
User uploaded file "X_test.pt" with length 1600727 bytes
User uploaded file "X_train.pt" with length 12806364 bytes
User uploaded file "X_valid.pt" with length 1600732 bytes
User uploaded file "y_test.pt" with length 11799 bytes
User uploaded file "y_train.pt" with length 86492 bytes
User uploaded file "y_valid.pt" with length 11804 bytes


In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
from torch import nn
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from matplotlib import pyplot as plt
import numpy as np
import time


class SingleLayerPerceptronNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.fc = nn.Linear(input_size, output_size, bias=False)
        nn.init.normal_(self.fc.weight, 0.0, 1.0)

    def forward(self, x):
        x = self.fc(x)
        return x


X_train = torch.load("X_train.pt")
model = SingleLayerPerceptronNetwork(300, 4)
y_hat_1 = torch.softmax(model(X_train[:1]), dim=-1)
Y_hat = torch.softmax(model.forward(X_train[:4]), dim=-1)


class NewsDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return [self.x[idx], self.y[idx]]


def calc_acc(model, loader):
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for inputs, labels in loader:
            outputs = model(inputs)
            pred = torch.argmax(outputs, dim=-1)
            total += len(inputs)
            correct += (pred == labels).sum().item()

        return correct / total


def calc_loss_acc(model, criterion, loader, device):  # deviceを引数に追加
    model.eval()
    loss = 0.0
    total = 0
    correct = 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(device)  # deviceに移動
            labels = labels.to(device)  # deviceに移動
            outputs = model(inputs)
            loss += criterion(outputs, labels).item()
            pred = torch.argmax(outputs, dim=-1)
            total += len(inputs)
            correct += (pred == labels).sum().item()

        return loss / len(loader), correct / total


X_train = torch.load("X_train.pt")
X_valid = torch.load("X_valid.pt")
X_test = torch.load("X_test.pt")
y_train = torch.load("y_train.pt")
y_valid = torch.load("y_valid.pt")
y_test = torch.load("y_test.pt")

dataset_train = NewsDataset(X_train, y_train)
dataset_valid = NewsDataset(X_valid, y_valid)
dataset_test = NewsDataset(X_test, y_test)

dataloader_train = DataLoader(dataset_train, batch_size=1, shuffle=True)
dataloader_valid = DataLoader(
    dataset_valid, batch_size=len(dataset_valid), shuffle=False)
dataloader_test = DataLoader(
    dataset_test, batch_size=len(dataset_test), shuffle=False)

criterion = nn.CrossEntropyLoss()
l_1 = criterion(model(X_train[:1]), y_train[:1])
model.zero_grad()
l_1.backward()

l = criterion(model(X_train[:4]), y_train[:4])
model.zero_grad()
l.backward()


def train_model(dataset_train, dataset_valid, batch_size, model, criterion, num_epochs, device=None):
    model.to(device)  # 追加

    dataloader_train = DataLoader(
        dataset_train, batch_size=batch_size, shuffle=True)
    dataloader_valid = DataLoader(
        dataset_valid, batch_size=len(dataset_valid), shuffle=False)

    log_train = []
    log_valid = []

    for epoch in range(num_epochs):
        start_time = time.time()

        model.train()
        loss_train = 0.0
        for inputs, labels in dataloader_train:
            optimizer.zero_grad()

            inputs = inputs.to(device)  # deviceに移動
            labels = labels.to(device)  # deviceに移動
            outputs = model.forward(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        end_time = time.time()

        loss_train, acc_train = calc_loss_acc(
            model, criterion, dataloader_train, device)
        loss_valid, acc_valid = calc_loss_acc(
            model, criterion, dataloader_valid, device)
        log_train.append([loss_train, acc_train])
        log_valid.append([loss_valid, acc_valid])

        torch.save({'epoch': epoch, 'model_state_dict': model.state_dict(
        ), 'optimizer_state_dict': optimizer.state_dict()}, f'checkpoint{epoch + 1}.pt')

        print(f'epoch: {epoch + 1}, loss_train: {loss_train:.4f}, accuracy_train: {acc_train:.4f}, loss_valid: {loss_valid:.4f}, accuracy_valid: {acc_valid:.4f}, train_time: {(end_time - start_time):.4f}sec')


model = SingleLayerPerceptronNetwork(300, 4)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-1)
num_epochs = 1
device = torch.device("cuda")  # deviceを指定

for batch_size in [2 ** i for i in range(12)]:
    print(f"batch_size : {batch_size}")
    train_model(dataset_train, dataset_valid,
                batch_size, model, criterion, num_epochs, device)

batch_size : 1
epoch: 1, loss_train: 0.3309, accuracy_train: 0.8870, loss_valid: 0.3785, accuracy_valid: 0.8642, train_time: 10.2675sec
batch_size : 2
epoch: 1, loss_train: 0.2964, accuracy_train: 0.9000, loss_valid: 0.3470, accuracy_valid: 0.8800, train_time: 4.9481sec
batch_size : 4
epoch: 1, loss_train: 0.2870, accuracy_train: 0.9034, loss_valid: 0.3416, accuracy_valid: 0.8807, train_time: 2.4265sec
batch_size : 8
epoch: 1, loss_train: 0.2832, accuracy_train: 0.9034, loss_valid: 0.3410, accuracy_valid: 0.8830, train_time: 1.2610sec
batch_size : 16
epoch: 1, loss_train: 0.2811, accuracy_train: 0.9039, loss_valid: 0.3382, accuracy_valid: 0.8815, train_time: 0.6547sec
batch_size : 32
epoch: 1, loss_train: 0.2800, accuracy_train: 0.9047, loss_valid: 0.3377, accuracy_valid: 0.8822, train_time: 0.3897sec
batch_size : 64
epoch: 1, loss_train: 0.2796, accuracy_train: 0.9048, loss_valid: 0.3374, accuracy_valid: 0.8822, train_time: 0.2733sec
batch_size : 128
epoch: 1, loss_train: 0.2801, accu