In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import numpy as np


class TextDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        return text, label

# trainデータローダーの作成
X_train = np.load("./matrix/x_train.npy")
Y_train = np.load("./matrix/y_train.npy")
X_train_tensor = torch.from_numpy(X_train)
Y_train_tensor =torch.from_numpy(Y_train)
datasets = TextDataset(X_train, Y_train)
train_dataloader = DataLoader(datasets, shuffle=True, batch_size=64)

class TextClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, output_dim):
        super(TextClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim1 , bias = True)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2, bias = True)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_dim2, output_dim, bias = True)
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        out = self.softmax(out)
        return out

In [3]:
from tqdm import tqdm

# GPUが利用可能か確認
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# ハイパーパラメータの設定
input_dim = 300  # 文章ベクトルの次元数
hidden_dim1 = 128  # 最初の隠れ層のノード数
hidden_dim2 = 64   # 二番目の隠れ層のノード数
output_dim = 4     # クラス数

model = TextClassifier(input_dim, hidden_dim1, hidden_dim2, output_dim).to(device)
epochs = 500
learning_rate = 1e-3
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for t in tqdm(range(epochs)):
    size = len(datasets)
    epoch_loss = 0
    epoch_correct = 0
    total_samples = 0

    for batch, (X, y) in enumerate(train_dataloader):
        # 予測と損失の計算
        X = X.float().to(device)
        pred = model(X)
        y = y.to(device)
        loss = loss_fn(pred, y.to(torch.float64))

        # バックプロパゲーション
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 損失の累計
        epoch_loss += loss.item()

        # 正答数の累計
        predicted = pred.argmax(dim=1)
        correct = (predicted == y.argmax(dim=1)).sum().item()
        epoch_correct += correct
        total_samples += len(y)

    # エポックごとの平均損失と正答率を計算
    avg_loss = epoch_loss / len(train_dataloader)
    avg_accuracy = epoch_correct / total_samples
    if (t+1)%100 == 0:
        print(f"{t+1}epoch: loss = {avg_loss}")

 20%|██        | 100/500 [01:14<05:08,  1.30it/s]

100epoch: loss = 0.7867962757626458


 40%|████      | 200/500 [02:26<03:30,  1.42it/s]

200epoch: loss = 0.7839490504449373


 60%|██████    | 300/500 [03:39<02:18,  1.44it/s]

300epoch: loss = 0.7817140751125442


 80%|████████  | 400/500 [04:52<01:11,  1.40it/s]

400epoch: loss = 0.7804216727182974


100%|██████████| 500/500 [06:05<00:00,  1.37it/s]

500epoch: loss = 0.7782946278822024





In [5]:
# testデータローダーの作成
X_test = np.load("./matrix/x_test.npy")
Y_test = np.load("./matrix/y_test.npy")
X_test_tensor = torch.from_numpy(X_test)
Y_test_tensor =torch.from_numpy(Y_test)
test_datasets = TextDataset(X_test, Y_test)
test_dataloader = DataLoader(test_datasets, shuffle=True, batch_size=64)

size = len(datasets)
epoch_correct = 0
total_samples = 0

for batch, (X, y) in enumerate(test_dataloader):
    # 予測と損失の計算
    X = X.float().to(device)
    pred = model(X)
    y = y.to(device)

    # 正答数の累計
    predicted = pred.argmax(dim=1)
    correct = (predicted == y.argmax(dim=1)).sum().item()
    epoch_correct += correct
    total_samples += len(y)
accuracy = epoch_correct / total_samples
print(f"accuracy: {accuracy}")

accuracy: 0.9056179775280899
