In [2]:
#作成したテンソルを読み込み
import torch
X_train = torch.load('/content/drive/MyDrive/Colab Notebooks/chapter08/X_train.pt')
X_test = torch.load('/content/drive/MyDrive/Colab Notebooks/chapter08/X_test.pt')
X_valid = torch.load('/content/drive/MyDrive/Colab Notebooks/chapter08/X_valid.pt')
Y_train = torch.load('/content/drive/MyDrive/Colab Notebooks/chapter08/Y_train.pt')
Y_test = torch.load('/content/drive/MyDrive/Colab Notebooks/chapter08/Y_test.pt')
Y_valid = torch.load('/content/drive/MyDrive/Colab Notebooks/chapter08/Y_valid.pt')

In [3]:
# モデルの構築(nnはニューラルネットワーク)
from torch import nn

class SLNet(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.fc = nn.Linear(input_size, output_size) #全結合層の作成

    def forward(self, x):
        logits = self.fc(x)                          #フォワードパスの作成
        return logits

#入力300,出力4のサイズでインスタンスを作成
model = SLNet(300, 4)
print(model)

SLNet(
  (fc): Linear(in_features=300, out_features=4, bias=True)
)


In [4]:
# データセットを作成する
import torch.utils.data as data

#データセットのクラスを作成
class NewsDataset(data.Dataset):
    """
    newsのDatasetクラス

    Attributes
    ----------------------------
    X : テンソル
        単語ベクトルの平均をまとめたテンソル
    y : テンソル
        カテゴリをラベル化したテンソル
    phase : 'train' or 'val'
        学習か訓練かを設定する
    """
    def __init__(self, X, y, phase='train'):
        self.X = X
        self.y = y
        self.phase = phase

    def __len__(self):
        """全データサイズを返す"""
        return len(self.y)

    def __getitem__(self, idx):
        """idxに対応するテンソル形式のデータとラベルを取得"""
        return self.X[idx], self.y[idx]

train_dataset = NewsDataset(X_train, Y_train, phase='train')
valid_dataset = NewsDataset(X_valid, Y_valid, phase='val')
test_dataset = NewsDataset(X_test, Y_test, phase='val')

# 動作確認
idx = 0
print(train_dataset.__getitem__(idx)[0].size())
print(train_dataset.__getitem__(idx)[1])
print(valid_dataset.__getitem__(idx)[0].size())
print(valid_dataset.__getitem__(idx)[1])
print(test_dataset.__getitem__(idx)[0].size())
print(test_dataset.__getitem__(idx)[1])

torch.Size([300])
tensor(2)
torch.Size([300])
tensor(3)
torch.Size([300])
tensor(2)


In [5]:
# DataLoaderを作成(Dataloaderはdatasetsからバッチごとに取り出すことを目的とする)
batch_size = 1

train_dataloader = data.DataLoader(                                              #ここには10671個のバッチが含まれている
            train_dataset, batch_size=batch_size, shuffle=True)
valid_dataloader = data.DataLoader(
            valid_dataset, batch_size=len(valid_dataset), shuffle=False)
test_dataloader = data.DataLoader(
            test_dataset, batch_size=len(test_dataset), shuffle=False)

dataloaders_dict = {'train': train_dataloader,
                    'val': valid_dataloader,
                    'test': test_dataloader,
                   }

# 動作確認
batch_iter = iter(dataloaders_dict['train']) #バッチからデータを取り出す
inputs, labels = next(batch_iter)
print(inputs.size())
print(labels)

torch.Size([1, 300])
tensor([2])


In [6]:
from tqdm import tqdm
# 学習

# モデルの定義
net = SLNet(300, 4)
net.train()

# 損失関数の定義
criterion = nn.CrossEntropyLoss()

# 最適化手法の定義
optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)       #確率的勾配降下法、学習率0.1、モーメンタムは0.9

# 学習用の関数を定義
def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):

    # epochのループ
    for epoch in range(num_epochs):
        print('Epoch {} / {}'.format(epoch + 1, num_epochs))
        print('--------------------------------------------')

        # epochごとの学習と検証のループ
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train() # 訓練モード
            else:
                net.eval() # 検証モード

            epoch_loss = 0.0 # epochの損失和
            epoch_corrects = 0 # epochの正解数

            # データローダーからミニバッチを取り出すループ
            for inputs, labels in tqdm(dataloaders_dict[phase]):
                optimizer.zero_grad() # optimizerを初期化

                # 順伝播計算(forward)
                with torch.set_grad_enabled(phase == 'train'):     #訓練モードの時に勾配計算を有効にする
                    outputs = net(inputs)
                    loss = criterion(outputs, labels) # 損失を計算
                    _, preds = torch.max(outputs, 1) # ラベルを予想

                    # 訓練時は逆伝播
                    if phase == 'train':
                        loss.backward()  #誤差逆伝播
                        optimizer.step() #パラメータの更新

                    # イテレーション結果の計算
                    # lossの合計を更新
                    epoch_loss += loss.item() * inputs.size(0)
                    # 正解数の合計を更新
                    epoch_corrects += torch.sum(preds == labels.data)

            # epochごとのlossと正解率の表示
            epoch_loss = epoch_loss / len(dataloaders_dict[phase].dataset)
            epoch_acc = epoch_corrects.double() / len(dataloaders_dict[phase].dataset)

            print('{} Loss: {:.4f}, Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))


# 学習を実行する
num_epochs = 10
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

Epoch 1 / 10
--------------------------------------------


100%|██████████| 10672/10672 [00:10<00:00, 991.85it/s] 


train Loss: 0.4149, Acc: 0.8586


100%|██████████| 1/1 [00:00<00:00, 55.56it/s]


val Loss: 0.3056, Acc: 0.9040
Epoch 2 / 10
--------------------------------------------


100%|██████████| 10672/10672 [00:09<00:00, 1099.53it/s]


train Loss: 0.3099, Acc: 0.8919


100%|██████████| 1/1 [00:00<00:00, 73.04it/s]


val Loss: 0.2796, Acc: 0.9115
Epoch 3 / 10
--------------------------------------------


100%|██████████| 10672/10672 [00:08<00:00, 1186.78it/s]


train Loss: 0.2904, Acc: 0.9002


100%|██████████| 1/1 [00:00<00:00, 48.88it/s]


val Loss: 0.2715, Acc: 0.9108
Epoch 4 / 10
--------------------------------------------


100%|██████████| 10672/10672 [00:08<00:00, 1223.89it/s]


train Loss: 0.2788, Acc: 0.9040


100%|██████████| 1/1 [00:00<00:00, 48.89it/s]


val Loss: 0.2714, Acc: 0.9138
Epoch 5 / 10
--------------------------------------------


100%|██████████| 10672/10672 [00:09<00:00, 1130.83it/s]


train Loss: 0.2692, Acc: 0.9057


100%|██████████| 1/1 [00:00<00:00, 67.37it/s]


val Loss: 0.2689, Acc: 0.9123
Epoch 6 / 10
--------------------------------------------


100%|██████████| 10672/10672 [00:08<00:00, 1275.26it/s]


train Loss: 0.2651, Acc: 0.9094


100%|██████████| 1/1 [00:00<00:00, 44.99it/s]


val Loss: 0.2778, Acc: 0.9055
Epoch 7 / 10
--------------------------------------------


100%|██████████| 10672/10672 [00:09<00:00, 1149.31it/s]


train Loss: 0.2609, Acc: 0.9098


100%|██████████| 1/1 [00:00<00:00, 64.11it/s]


val Loss: 0.2715, Acc: 0.9085
Epoch 8 / 10
--------------------------------------------


100%|██████████| 10672/10672 [00:09<00:00, 1146.75it/s]


train Loss: 0.2577, Acc: 0.9132


100%|██████████| 1/1 [00:00<00:00, 44.50it/s]


val Loss: 0.2741, Acc: 0.9123
Epoch 9 / 10
--------------------------------------------


100%|██████████| 10672/10672 [00:08<00:00, 1284.77it/s]


train Loss: 0.2557, Acc: 0.9133


100%|██████████| 1/1 [00:00<00:00, 64.22it/s]


val Loss: 0.2762, Acc: 0.9040
Epoch 10 / 10
--------------------------------------------


100%|██████████| 10672/10672 [00:09<00:00, 1137.14it/s]


train Loss: 0.2532, Acc: 0.9127


100%|██████████| 1/1 [00:00<00:00, 69.68it/s]

val Loss: 0.2780, Acc: 0.9153



