# Part 2. 딥러닝 몸풀기
## Chapter 3. 붓꽃 종 예측하기

### 3.1 데이터 살펴보기

In [1]:
from sklearn.datasets import load_iris

dataset = load_iris()
dataset.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [2]:
print(dataset.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

:Number of Instances: 150 (50 in each of three classes)
:Number of Attributes: 4 numeric, predictive attributes and the class
:Attribute Information:
    - sepal length in cm
    - sepal width in cm
    - petal length in cm
    - petal width in cm
    - class:
            - Iris-Setosa
            - Iris-Versicolour
            - Iris-Virginica

:Summary Statistics:

                Min  Max   Mean    SD   Class Correlation
sepal length:   4.3  7.9   5.84   0.83    0.7826
sepal width:    2.0  4.4   3.05   0.43   -0.4194
petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

:Missing Attribute Values: None
:Class Distribution: 33.3% for each of 3 classes.
:Creator: R.A. Fisher
:Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
:Date: July, 1988

The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken
from Fis

In [3]:
import pandas as pd

data = pd.DataFrame(dataset.data, columns=dataset.feature_names)
data['target'] = dataset.target

print(data.head())

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  


### 3.2 파이토치 데이터 유틸 사용하기

In [4]:
import torch
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split

class IrisDataset(Dataset):
  def __init__(self, train=True):
    dataset = load_iris()
    X_train, X_test, y_train, y_test = train_test_split(
        dataset.data, dataset.target, test_size=0.3, random_state=827
    )
    if train:
      self.data = torch.FloatTensor(X_train)
      self.target = torch.LongTensor(y_train)
    else:
      self.data = torch.FloatTensor(X_test)
      self.target = torch.LongTensor(y_test)

  def __getitem__(self, i):
    return self.data[i], self.target[i]

  def __len__(self):
    return len(self.data)

### 3.3 모델 구현 및 학습

이어서 학습코드를 살펴보겠습니다

In [5]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

# 하이퍼파라미터
batch_size = 64
learning_rate = 1e-3
epochs = 2000

# 모델
model = nn.Sequential(
    nn.Linear(4, 128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 3)
)

# 데이터셋, 데이터로더
train_dataset = IrisDataset(train=True)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# optimizer
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)

# loss
criterion = nn.CrossEntropyLoss()

for epoch in range(epochs):

  # 배치
  for data, target in train_dataloader:
    optimizer.zero_grad()

    pred = model(data)

    loss = criterion(pred, target)
    loss.backward()
    optimizer.step()

  # 학습 기록
  if epoch % 100 == 99:
    print("epoch:", epoch+1, "loss:", loss.item())

epoch: 100 loss: 0.1403876096010208
epoch: 200 loss: 0.07050369679927826
epoch: 300 loss: 0.029931476339697838
epoch: 400 loss: 0.05055944621562958
epoch: 500 loss: 0.018516073003411293
epoch: 600 loss: 0.11159246414899826
epoch: 700 loss: 0.06506195664405823
epoch: 800 loss: 0.01800067350268364
epoch: 900 loss: 0.07500328123569489
epoch: 1000 loss: 0.03301728144288063
epoch: 1100 loss: 0.018364904448390007
epoch: 1200 loss: 0.06862649321556091
epoch: 1300 loss: 0.07898513227701187
epoch: 1400 loss: 0.05183592811226845
epoch: 1500 loss: 0.05753491073846817
epoch: 1600 loss: 0.08133945614099503
epoch: 1700 loss: 0.05385161191225052
epoch: 1800 loss: 0.07937278598546982
epoch: 1900 loss: 0.034015215933322906
epoch: 2000 loss: 0.07192873954772949


### 3.4 모델 성능 평가

In [6]:
test_dataset = IrisDataset(train=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

num_correct = 0

with torch.no_grad():
  for data, target in test_dataloader:
    output = model(data)
    pred = torch.max(output, 1)[1]

    corr = pred.eq(target).sum().item()
    num_correct += corr

  print("Accuracy:", (num_correct/len(test_dataset.data))*100, "%")

Accuracy: 93.33333333333333 %
