<a href="https://colab.research.google.com/github/kgpark88/nlp/blob/main/pytorch/SimpleNeuralNetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset 
import torch.nn.functional as F

In [2]:
iris = load_iris()
X = iris['data']
y = iris['target']
names = iris['target_names']
feature_names = iris['feature_names']

In [3]:
import pandas as pd
# data, feature name 등 다양한 정보를 포함하고 있다.
df = pd.DataFrame(iris.data) 
df.columns = iris.feature_names 
df['label'] = iris.target 

df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [4]:
# 데이터를 numpy 배열로 만들기
X = df.drop('label', axis=1).to_numpy() 
Y = df['label'].to_numpy().reshape((-1,1))

# 데이터 스케일링
scaler = StandardScaler() 
scaler.fit(X) 
X = scaler.transform(X)

scaler.fit(Y)
Y = scaler.transform(Y)

In [5]:
# 텐서 데이터로 변환하는 클래스
class TensorData(Dataset):
    def __init__(self, x_data, y_data):
        self.x_data = torch.FloatTensor(x_data)
        self.y_data = torch.LongTensor(y_data)
        self.len = self.y_data.shape[0]

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index] 

    def __len__(self):
        return self.len

In [6]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data set into training and testing
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=2)

# 학습 데이터, 시험 데이터 배치 형태로 구축하기
trainsets = TensorData(X_train, Y_train)
trainloader = torch.utils.data.DataLoader(trainsets, batch_size=16, shuffle=True)

testsets = TensorData(X_test, Y_test)
testloader = torch.utils.data.DataLoader(testsets, batch_size=16, shuffle=False)

In [7]:
testsets[0]

(tensor([-1.5065,  0.7888, -1.3402, -1.1838]), tensor(0))

In [8]:
class SimpleClassifier(nn.Module):
    def __init__(self):
        super(SimpleClassifier, self).__init__()
        self.fc1 = nn.Linear(4, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 3)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, X):
        X = F.relu(self.fc1(X))
        X = self.fc2(X)
        X = self.fc3(X)
        X = self.softmax(X)

        return X

In [9]:
model = SimpleClassifier()
criterion = nn.CrossEntropyLoss() # cross entropy loss
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-7)

In [10]:
num_epoch = 10
n = len(trainloader)

for epoch in range(num_epoch):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0): # 무작위로 섞인 16개 데이터가 있는 배치가 하나 씩 들어온다.

        inputs, values = data # data에는 X, Y가 들어있다.
    
        optimizer.zero_grad() # 최적화 초기화
        outputs = model(inputs) # 모델에 입력값 대입 후 예측값 산출
        loss = criterion(outputs, values) # 손실 함수 계산
        loss.backward() # 손실 함수 기준으로 역전파 설정 
        optimizer.step() # 역전파를 진행하고 가중치 업데이트
        
print('Finished Training')

Finished Training


In [11]:
correct = 0
with torch.no_grad():
  for i, data in enumerate(testsets):
    label = data[1].numpy()
    predicted = model.forward(data[0].reshape(1,-1))
    predicted_label = predicted.argmax().item()
    
    if label == predicted_label:
      correct += 1

print(correct)
print(correct / len(testsets))

28
0.9333333333333333
