In [3]:
import torch
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from torch.utils.data import Dataset, DataLoader,random_split
from torch.optim.lr_scheduler import StepLR,ReduceLROnPlateau
import torchmetrics.functional as metrics
import os
import shutil
from torchvision import transforms
from PIL import Image

  Referenced from: <ABE0EE74-6D97-3B8C-B690-C44754774FBC> /Users/sml/anaconda3/envs/Torch_PY38/lib/python3.8/site-packages/torchvision/image.so
  warn(


In [4]:
# 학습 검증용 데이터
folder_path = '../train/train'
target_data = []
img_data = []
for encoding_label,label in enumerate(os.listdir(folder_path)):
    label_path = os.path.join(folder_path, label)
    if os.path.isdir(label_path):  # 디렉토리인 경우에만 진입
        for img in os.listdir(folder_path+'/'+label):
            image_path = os.path.join(folder_path,label,img)
            if os.path.isfile(image_path):  # 파일인 경우에만 진입
                with open(image_path, 'rb') as file:
                    image = Image.open(file)
                    width, height = image.size
                    if width == 48 and height == 48:
                        image_array = np.array(image)
                        target_data.append(encoding_label)
                        img_data.append(image_array)

FileNotFoundError: [Errno 2] No such file or directory: '../train/train'

In [None]:
# 테스트용 데이터
folder_path = '../test/test'
target_test = []
img_test = []
for encoding_label,label in enumerate(os.listdir(folder_path)):
    label_path = os.path.join(folder_path, label)
    if os.path.isdir(label_path):  # 디렉토리인 경우에만 진입
        for img in os.listdir(folder_path+'/'+label):
            image_path = os.path.join(folder_path,label,img)
            if os.path.isfile(image_path):  # 파일인 경우에만 진입
                with open(image_path, 'rb') as file:
                    image = Image.open(file)
                    width, height = image.size
                    if width == 48 and height == 48:
                        image_array = np.array(image)
                        target_test.append(encoding_label)
                        img_test.append(image_array)

In [None]:
# 이미지 데이터 정규화
x_data = np.array(img_data)/255.
x_data = x_data.reshape((-1,48*48))
print(x_data.shape)

(28709, 2304)


In [None]:
x_data_test = np.array(img_test)/255.
x_data_test = x_data_test.reshape((-1,48*48))
print(x_data_test.shape)

(7178, 2304)


In [None]:
target_data = pd.Series(target_data).replace({0:3, 5:4, 2:5, 3:2, 6:0, 4:6, 7:1})
target_data.value_counts()

3    7215
4    4965
5    4830
2    4097
0    3995
6    3171
1     436
Name: count, dtype: int64

In [None]:
target_test = pd.Series(target_test).replace({0:3, 5:4, 2:5, 3:2, 6:0, 4:6, 7:1})
target_test.value_counts()

3    1774
1    1247
6    1233
5    1024
4     958
2     831
0     111
Name: count, dtype: int64

In [None]:
# 데이터 클래스 생성
class DLdataset(Dataset):
    
    def __init__(self,x_data,y_data):
        super().__init__()
        self.feature = torch.FloatTensor(x_data)
        self.target = torch.LongTensor(y_data)
        
    def __len__(self):
        return self.target.shape[0]
    
    def __getitem__(self,idx):
        return self.feature[idx], self.target[idx]

In [None]:
# 데이터셋 생성
dataset = DLdataset(x_data,target_data)
dataset_test = DLdataset(x_data_test, target_test)

In [None]:
# 학습용, 검증용 데이터 준비
seed = torch.Generator().manual_seed(42)
trainDS, validDS = random_split(dataset, [0.8,0.2], generator=seed)


In [None]:
# 배치사이즈 32
BATCH = 32
trainDL = DataLoader(trainDS, batch_size=BATCH)
validDL = DataLoader(validDS, batch_size=BATCH)
testDL = DataLoader(dataset_test, batch_size=BATCH)


In [None]:
# 모델 클래스 정의
class Model(nn.Module):
    
    def __init__(self, IN, OUT):
        super().__init__()
        self.input = nn.Linear(IN, 128) 
        self.af = nn.ReLU()
        self.hidden = nn.Linear(128, 32)
        self.output = nn.Linear(32, OUT)
        
    def forward(self, x):
        y = self.input(x)
        y = self.af(y)
        y = self.hidden(y)
        y = self.af(y)
        y = self.output(y)
        
        return y

In [None]:
# 학습 준비

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

EPOCHS = 100

IN = dataset.feature.shape[1]
OUT = pd.Series(target_data).nunique()

# 모델 생성
model  = Model(IN,OUT)

# 손실함수
LF = nn.CrossEntropyLoss().to(DEVICE)

# 옵티마이저
OPTIMIZER = torch.optim.Adam(model.parameters())

# 스케줄러
SCHEDULER = ReduceLROnPlateau(OPTIMIZER, mode = 'min', patience = 3)

In [None]:
def training(dataLoader):
    
    model.train()
    train_report=[[], []]
    for (feature, target) in dataLoader:

        feature, target = feature.to(DEVICE), target.to(DEVICE)
        
        # 학습
        pre_target = model(feature)
        
        # 손실계산
        loss = LF(pre_target, target)
        train_report[0].append(loss)
  
        # 성능 평가
        acc = metrics.accuracy(pre_target.argmax(dim=1), target, task = 'multiclass',num_classes=OUT)
        train_report[1].append(acc)
        
        # W,b업데이트
        OPTIMIZER.zero_grad()
        loss.backward()
        OPTIMIZER.step()

    loss_score = sum(train_report[0])/len(train_report[0])
    acc_score = sum(train_report[1])/len(train_report[1])
    print(f'[Train loss] ==> {loss_score}    [Train Accuracy] ==> {acc_score}')
    return loss_score, acc_score

In [None]:
def testing(dataLoader):
    
    model.eval()
    
    with torch.no_grad():
        test_report=[[], []]
        for (feature, target)  in dataLoader:
            # 배치크기만큼의 학습 데이터 준비
            feature, target = feature.to(DEVICE), target.to(DEVICE)
            
            # 학습
            pre_target = model(feature)
            
            # 손실계산
            loss = LF(pre_target, target)
            test_report[0].append(loss)
      
            # 성능 평가
            acc = metrics.accuracy(pre_target.argmax(dim=1), target, task = 'multiclass',num_classes=OUT)
            test_report[1].append(acc)
    
    loss_score = sum(test_report[0])/len(test_report[0])
    acc_score = sum(test_report[1])/len(test_report[1])

    print(f'[Test loss] ==> {loss_score}    [Test Accuracy] ==> {acc_score}')
    return loss_score, acc_score

In [None]:
min_loss = 100.0  # 초기 최소 손실 설정
cnt = 0
for eps in range(EPOCHS):
    print(f'[{eps+1}/{EPOCHS}]')
    # 학습
    train_loss, train_acc = training(trainDL)

    # 검증
    val_loss, val_acc = testing(validDL)
    
    # 최소 손실 업데이트
    if val_loss < min_loss:
        min_loss = val_loss
        cnt = 0
        torch.save(model.state_dict(), "my_trained_model.pth")

    else:
        cnt+=1

    # 조기 종료 기능 => 조건 : val_loss가 지정된 횟수 이상 개선이 안되면 학습 종료
    if SCHEDULER.num_bad_epochs >= SCHEDULER.patience or cnt >= 5:
        print(f"Early stopping at epoch {eps}")
        break

[1/100]
[Train loss] ==> 1.742274522781372    [Train Accuracy] ==> 0.29475685954093933
[Test loss] ==> 1.7246193885803223    [Test Accuracy] ==> 0.3150908052921295
[2/100]
[Train loss] ==> 1.697144627571106    [Train Accuracy] ==> 0.3259778320789337
[Test loss] ==> 1.681361436843872    [Test Accuracy] ==> 0.3409455120563507
[3/100]
[Train loss] ==> 1.670581579208374    [Train Accuracy] ==> 0.33845463395118713
[Test loss] ==> 1.6806451082229614    [Test Accuracy] ==> 0.33721956610679626
[4/100]
[Train loss] ==> 1.6505004167556763    [Train Accuracy] ==> 0.35094591975212097
[Test loss] ==> 1.6668769121170044    [Test Accuracy] ==> 0.3443376123905182
[5/100]
[Train loss] ==> 1.6349468231201172    [Train Accuracy] ==> 0.3579532206058502
[Test loss] ==> 1.666142463684082    [Test Accuracy] ==> 0.34493857622146606
[6/100]
[Train loss] ==> 1.6225495338439941    [Train Accuracy] ==> 0.36012938618659973
[Test loss] ==> 1.673123836517334    [Test Accuracy] ==> 0.3357371985912323
[7/100]
[Train l

In [None]:
training(trainDL)

[Train loss] ==> 1.5807474851608276    [Train Accuracy] ==> 0.38126739859580994


(tensor(1.5807, grad_fn=<DivBackward0>), tensor(0.3813))

In [None]:
testing(validDL)

[Test loss] ==> 1.6190330982208252    [Test Accuracy] ==> 0.36282050609588623


(tensor(1.6190), tensor(0.3628))

In [None]:
# predicting(testDL, 5)

In [None]:
# 배치사이즈 32

BATCHLIST = [12, 32, 52, 102, 502]

for BATCH in BATCHLIST:
    trainDL = DataLoader(trainDS, batch_size=BATCH)
    validDL = DataLoader(validDS, batch_size=BATCH)
    testDL = DataLoader(dataset_test, batch_size=BATCH)


    min_loss = 100.0  # 초기 최소 손실 설정
    cnt = 0
    for eps in range(EPOCHS):
        print(f'[{eps+1}/{EPOCHS}]')
        # 학습
        train_loss, train_acc = training(trainDL)

        # 검증
        val_loss, val_acc = testing(validDL)
        
        # 최소 손실 업데이트
        if val_loss < min_loss:
            min_loss = val_loss
            cnt = 0
            torch.save(model.state_dict(), "my_trained_model.pth")

        else:
            cnt+=1

        # 조기 종료 기능 => 조건 : val_loss가 지정된 횟수 이상 개선이 안되면 학습 종료
        if SCHEDULER.num_bad_epochs >= SCHEDULER.patience or cnt >= 5:
            print(f"Early stopping at epoch {eps}")
            break

NameError: name 'DataLoader' is not defined