In [234]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

irisDF = pd.read_csv("./iris.csv")
irisNP = np.loadtxt("./iris.csv",dtype=float, delimiter=',', usecols=[0, 1, 2, 3])

In [235]:
class DLDataset(TensorDataset):
    def __init__(self, x_data, y_data):
        super().__init__()
        x_data = x_data.values if isinstance(x_data, pd.DataFrame) else x_data
        y_data = y_data.values if isinstance(y_data, pd.DataFrame) else y_data
        
        self.feature=torch.FloatTensor(x_data)
        self.target=torch.LongTensor(y_data)
    def __len__(self):
        return self.target.shape[0]
    def __getitem__(self, index):
        return self.feature[index], self.target[index]

In [236]:
feature = irisDF[irisDF.columns[:-1]]
target = irisDF[irisDF.columns[-1]]

from sklearn.preprocessing import LabelEncoder
target = LabelEncoder().fit_transform(target)
target=target.reshape(-1,1)
my_dataset=DLDataset(feature, target)

In [237]:
from torch.utils.data import random_split

seed=torch.Generator().manual_seed(42)
trainDS, validDS, testDS = random_split(my_dataset, [0.7,0.1,0.2], generator=seed)

In [238]:
batchsize=5
trainDF=DataLoader(trainDS, batch_size=batchsize, shuffle=True)
validDF=DataLoader(validDS, batch_size=batchsize, shuffle=True)
testDF=DataLoader(testDS, batch_size=batchsize, shuffle=True)

In [239]:
class model(nn.Module) :
    def __init__(self):
        super(model, self).__init__()
        self.fc1 = nn.Linear(4, 3)
    def forward(self, x):
        x = self.fc1(x)
        return x
    
model=model()

In [240]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
EPOCHS = 500
LOSS_FN=nn.CrossEntropyLoss().to(DEVICE)

In [241]:
optimizer=optim.Adam(model.parameters(), lr=0.001)
import torchmetrics.functional as metrics

def training():
    loss_list=[]
    model.train()
    for cnt, (feature, target) in enumerate(trainDF):
        feature, target = feature.to(DEVICE), target.to(DEVICE)
        pre_target = model(feature.float())
        loss=LOSS_FN(pre_target, target.squeeze())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_list.append(loss.item())
        #print(metrics.accuracy( pre_target, target.squeeze(), task="multiclass", num_classes=3 ))
    return sum(loss_list)/len(loss_list)


In [242]:
training()

0.8207704084260123

In [243]:
def testing():
    loss_list=[]
    with torch.no_grad():
        model.eval()
        val_loss=[]
        for cnt, (feature, target) in enumerate(validDF):
            feature, target = feature.to(DEVICE), target.to(DEVICE)
            pre_target = model(feature.float())

            loss=LOSS_FN(pre_target, target.squeeze())
            #print(metrics.accuracy( pre_target, target.squeeze(), task="multiclass", num_classes=3 ))
            loss_list.append(loss.item())
    return sum(loss_list)/len(loss_list)

In [244]:
testing()

0.8215197324752808

In [245]:
#metrics.accuracy( pre_target, target.squeeze(), task="multiclass", num_classes=3\
def accuracy():
    with torch.no_grad():
            model.eval()
            alist=[]
            for cnt, (feature, target) in enumerate(testDF):
                feature, target = feature.to(DEVICE), target.to(DEVICE)
                pre_target = model(feature.float())
                alist.append(metrics.accuracy( pre_target, target.squeeze(), task="multiclass", num_classes=3))
    return sum(alist)/len(alist)

In [246]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=10, verbose=True)


df_list=[]
for eps in range(EPOCHS): # 50

    train_loss=training()
    valid_loss=testing()
    accuracy1 = accuracy()
    if eps %10 ==0 :
        print(f'{eps}번째 반복 ==> train_loss : {train_loss:.2f}, valid_loss : {valid_loss:.2f}, 정확도 : {accuracy1:.2f}')
        df_list.append([eps, train_loss, valid_loss, accuracy1])
        scheduler.step(valid_loss)
        if scheduler.num_bad_epochs >= scheduler.patience:
            print(f"Early stopping at epoch {eps}")
            break



0번째 반복 ==> train_loss : 0.80, valid_loss : 0.81, 정확도 : 0.47
10번째 반복 ==> train_loss : 0.67, valid_loss : 0.68, 정확도 : 0.48


20번째 반복 ==> train_loss : 0.59, valid_loss : 0.60, 정확도 : 0.48
30번째 반복 ==> train_loss : 0.54, valid_loss : 0.54, 정확도 : 0.53
40번째 반복 ==> train_loss : 0.50, valid_loss : 0.50, 정확도 : 0.55
50번째 반복 ==> train_loss : 0.47, valid_loss : 0.47, 정확도 : 0.63
60번째 반복 ==> train_loss : 0.45, valid_loss : 0.45, 정확도 : 0.73
70번째 반복 ==> train_loss : 0.43, valid_loss : 0.43, 정확도 : 0.72
80번째 반복 ==> train_loss : 0.41, valid_loss : 0.41, 정확도 : 0.75
90번째 반복 ==> train_loss : 0.40, valid_loss : 0.40, 정확도 : 0.76
100번째 반복 ==> train_loss : 0.39, valid_loss : 0.38, 정확도 : 0.82
110번째 반복 ==> train_loss : 0.38, valid_loss : 0.37, 정확도 : 0.82
120번째 반복 ==> train_loss : 0.37, valid_loss : 0.36, 정확도 : 0.82
130번째 반복 ==> train_loss : 0.36, valid_loss : 0.35, 정확도 : 0.90
140번째 반복 ==> train_loss : 0.35, valid_loss : 0.34, 정확도 : 0.90
150번째 반복 ==> train_loss : 0.34, valid_loss : 0.33, 정확도 : 0.93
160번째 반복 ==> train_loss : 0.33, valid_loss : 0.32, 정확도 : 0.93
170번째 반복 ==> train_loss : 0.32, valid_loss : 0.31, 정확도 : 0.93
180번째 반복 ==> tra

In [247]:
pd.DataFrame(df_list, columns=['epoch', 'train_loss', 'valid_loss', 'accuracy'])

Unnamed: 0,epoch,train_loss,valid_loss,accuracy
0,0,0.801953,0.809118,tensor(0.4750)
1,10,0.672806,0.680043,tensor(0.4833)
2,20,0.592195,0.596992,tensor(0.4750)
3,30,0.540072,0.542747,tensor(0.5333)
4,40,0.500255,0.500795,tensor(0.5500)
5,50,0.47149,0.471149,tensor(0.6250)
6,60,0.449247,0.447107,tensor(0.7250)
7,70,0.430289,0.42616,tensor(0.7167)
8,80,0.41419,0.409646,tensor(0.7500)
9,90,0.40032,0.396723,tensor(0.7583)
