In [1]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim

In [2]:
df = pd.read_csv("/home/ryoyanagimoto/Hazumi1902/dumpfiles/1902F2001.csv")

In [3]:
data = df.drop('TS_ternary', axis=1).values
target = df['TS_ternary'].values

In [4]:
x_train, x_valid, y_train, y_valid = train_test_split(data, target, shuffle=True)

In [5]:
x_train = torch.from_numpy(x_train).to(torch.float)
y_train = torch.from_numpy(y_train).to(torch.long)
x_valid = torch.from_numpy(x_valid).to(torch.float)
y_valid = torch.from_numpy(y_valid).to(torch.long)

In [6]:
train_dataset = TensorDataset(x_train, y_train)
valid_dataset = TensorDataset(x_valid, y_valid)

# 動作確認
# indexを指定すればデータを取り出すことができます。
index = 0
print(train_dataset.__getitem__(index)[0])
print(train_dataset.__getitem__(index)[1])

tensor([7.7553e+05, 7.8958e+05, 7.9592e+05,  ..., 5.3906e-03, 1.5420e-02,
        2.0395e+04])
tensor(0)


In [7]:
batch_size = 16
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

# 動作確認
# こんな感じでバッチ単位で取り出す子ができます。
# イテレータに変換
batch_iterator = iter(train_dataloader)
# 1番目の要素を取り出す
inputs, labels = next(batch_iterator)
print(inputs.size())
print(labels.size())

torch.Size([16, 1482])
torch.Size([16])


In [8]:
class Net(nn.Module):    
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(1482, 50)
        self.fc2 = nn.Linear(50, 3)
    
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.softmax(x, dim=1)
        return x

net = Net()
print(net)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01)


Net(
  (fc1): Linear(in_features=1482, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=4, bias=True)
)


In [9]:
# エポック数
num_epochs = 50

# 学習時と検証時で分けるためディクショナリを用意
dataloaders_dict = {
    'train': train_dataloader,
    'val': valid_dataloader
}

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch+1, num_epochs))
    print('-------------')
    
    for phase in ['train', 'val']:
        
        if phase == 'train':
            # モデルを訓練モードに設定
            net.train()
        else:
            # モデルを推論モードに設定
            net.eval()
        
        # 損失和
        epoch_loss = 0.0
        # 正解数
        epoch_corrects = 0
        
        # DataLoaderからデータをバッチごとに取り出す
        for inputs, labels in dataloaders_dict[phase]:
            
            # optimizerの初期化
            optimizer.zero_grad()
            
            # 学習時のみ勾配を計算させる設定にする
            with torch.set_grad_enabled(phase == 'train'):
                outputs = net(inputs)
                
                # 損失を計算
                loss = criterion(outputs, labels)
                
                # ラベルを予測
                _, preds = torch.max(outputs, 1)
                
                # 訓練時はバックプロパゲーション
                if phase == 'train':
                    # 逆伝搬の計算
                    loss.backward()
                    # パラメータの更新
                    optimizer.step()
                
                # イテレーション結果の計算
                # lossの合計を更新
                # PyTorchの仕様上各バッチ内での平均のlossが計算される。
                # データ数を掛けることで平均から合計に変換をしている。
                # 損失和は「全データの損失/データ数」で計算されるため、
                # 平均のままだと損失和を求めることができないため。
                epoch_loss += loss.item() * inputs.size(0)
                
                # 正解数の合計を更新
                epoch_corrects += torch.sum(preds == labels.data)

        # epochごとのlossと正解率を表示
        epoch_loss = epoch_loss / len(dataloaders_dict[phase].dataset)
        epoch_acc = epoch_corrects.double() / len(dataloaders_dict[phase].dataset)

        print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

Epoch 1/50
-------------
train Loss: 1.3702 Acc: 0.3735
val Loss: 1.2437 Acc: 0.5000
Epoch 2/50
-------------
train Loss: 1.3220 Acc: 0.4217
val Loss: 1.2437 Acc: 0.5000
Epoch 3/50
-------------
train Loss: 1.3220 Acc: 0.4217
val Loss: 1.2437 Acc: 0.5000
Epoch 4/50
-------------
train Loss: 1.3220 Acc: 0.4217
val Loss: 1.2437 Acc: 0.5000
Epoch 5/50
-------------
train Loss: 1.3220 Acc: 0.4217
val Loss: 1.2437 Acc: 0.5000
Epoch 6/50
-------------
train Loss: 1.3220 Acc: 0.4217
val Loss: 1.2437 Acc: 0.5000
Epoch 7/50
-------------
train Loss: 1.3220 Acc: 0.4217
val Loss: 1.2437 Acc: 0.5000
Epoch 8/50
-------------
train Loss: 1.3220 Acc: 0.4217
val Loss: 1.2437 Acc: 0.5000
Epoch 9/50
-------------
train Loss: 1.3220 Acc: 0.4217
val Loss: 1.2437 Acc: 0.5000
Epoch 10/50
-------------
train Loss: 1.3220 Acc: 0.4217
val Loss: 1.2437 Acc: 0.5000
Epoch 11/50
-------------
train Loss: 1.3220 Acc: 0.4217
val Loss: 1.2437 Acc: 0.5000
Epoch 12/50
-------------
train Loss: 1.3220 Acc: 0.4217
val Lo

In [10]:
x = torch.randn([3, 1482])
preds = net(x)