<a href="https://colab.research.google.com/github/AtaruOhto/pytorch_learning/blob/master/002_pytorch_wine_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
"""
PyTorch でワインの種類を分類
"""

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_wine

# sklearnからワインのデータをロードする
wine = load_wine()
X = wine.data
Y = wine.target

feature_num = X.shape[1]
classification_num = len(np.unique(Y))

# データ情報を表示
pd.DataFrame(wine.data, columns=wine.feature_names)

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0


In [27]:
# 25%を検証用データとして用いる
train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size=0.25)

train_X = torch.from_numpy(train_X).float()
train_Y = torch.from_numpy(train_Y).long()
test_X = torch.from_numpy(test_X).float()
test_Y = torch.from_numpy(test_Y).long()

# 訓練データをTensorDatasetで一組にする。
train = TensorDataset(train_X, train_Y)

# ミニバッチ学習させるために、DataLoader形式に変換する。
train_loader = DataLoader(train, batch_size=15, shuffle=True)

# ニューラルネットワークの定義
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    unit_num = 128
    self.fc1 = nn.Linear(feature_num, unit_num)
    self.fc2 = nn.Linear(unit_num, unit_num)
    self.fc3 = nn.Linear(unit_num, unit_num)
    self.fc4 = nn.Linear(unit_num, unit_num)
    self.fc5 = nn.Linear(unit_num, unit_num)
    self.fc6 = nn.Linear(unit_num, classification_num)
    
  def forward(self, x):    
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = F.relu(self.fc3(x))
    x = F.relu(self.fc4(x))
    x = F.relu(self.fc5(x))
    x = self.fc6(x)
    return F.log_softmax(x)

model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in range(1500):
  total_loss = 0

  for train_x, train_y in train_loader:
    train_x, train_y = Variable(train_x), Variable(train_y)
    optimizer.zero_grad()
    output = model(train_x)
    loss = criterion(output, train_y)
    loss.backward()
    optimizer.step()
    total_loss += loss.data.item()

  if(epoch + 1) % 100 == 0:
    print(f" {epoch + 1}回目の誤差: ", total_loss / epoch)

test_x, test_y = Variable(test_X), Variable(test_Y)
result = torch.max(model(test_x).data, 1)[1]


accuracy_score = sum(test_y.data.numpy() == result.numpy()) / len(test_y.data.numpy())
print("正解率: ", accuracy_score)



 100回目の誤差:  0.0619730521934201
 200回目の誤差:  0.028378667244360077
 300回目の誤差:  0.016684813244286986
 400回目の誤差:  0.011972468598444658
 500回目の誤差:  0.011224347986056952
 600回目の誤差:  0.008968536563230078
 700回目の誤差:  0.006783580106385958
 800回目の誤差:  0.0058534816373722424
 900回目の誤差:  0.005532102205596856
 1000回目の誤差:  0.004385185671282244
 1100回目の誤差:  0.004699669012272759
 1200回目の誤差:  0.004127086401979957
 1300回目の誤差:  0.0037213277367099235
 1400回目の誤差:  0.00301238208519211
 1500回目の誤差:  0.003506938742350705
正解率:  0.6
