In [29]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms
import numpy as np
from matplotlib import pyplot as plt

import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

from glob import glob
from PIL import Image
from sklearn.model_selection import train_test_split

In [2]:
import timm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
BATCH_SIZE = 16
L_RATE = 0.001
EPOCH = 3

# 1) 데이터셋 정의

In [42]:
class_encoder = {
    'dog': 0,
    'elephant': 1,
    'giraffe': 2,
    'guitar': 3,
    'horse': 4,
    'house': 5,
    'person': 6
}

class ArtDataset(Dataset):
    def __init__(self, file_list, transforms, mode='train'):
        self.file_list = file_list
        self.transforms = transforms
        self.mode = mode
        
    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        
        if self.mode == 'test':
            label = 0
        else:
            label = img_path.split('/')[2]
            label = class_encoder[label]
        
        img = Image.open(img_path)
        
        img = self.transforms(img)
            
        return img, label

    def __len__(self):
        return len(self.file_list)
            

In [31]:
train_transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                      transforms.RandomCrop((224,224)),
                                      transforms.RandomGrayscale(),
                                      transforms.ToTensor(),
                                      transforms.Resize((224,224)),
                                      transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
test_transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Resize((224,224)),
                                    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])

In [32]:
root_dir = './train'
img_list = glob(root_dir+'/*/*.jpg')

train_list, test_list = train_test_split(img_list, test_size=0.2, shuffle=True, random_state=42)
train_set = ArtDataset(train_list, train_transform)
test_set = ArtDataset(test_list, test_transform)

In [34]:
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)

# 2) 모델 정의

In [23]:
model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=7)
name_list = [name for name, param in model.named_parameters()]
for idx, (name, param) in enumerate(model.named_parameters()):
    if 'blocks.5' in name:
        print(idx)
        break
print(len(name_list))

143
213


In [35]:
class EffNet(nn.Module):
    def __init__(self, num_classes=7):
        super(EffNet, self).__init__()
        self.model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=num_classes)
        
        for (name, param) in list(self.model.named_parameters())[:143]:
            param.requires_grad = False
            
    def forward(self, x):
        out = self.model(x)
        
        return out

# 3) 모델, 손실함수, Optimizer 선언

In [36]:
model = EffNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr= L_RATE)

# 4) 모델 학습

In [37]:
for epoch in range(EPOCH):
    model.train()
    for batch_idx, (img, target) in enumerate(train_loader):
        optimizer.zero_grad()
        
        out = model(img)
        loss = criterion(out, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 10 == 0:
            print("epoch {} Train batch {} loss {:.4f}".format(epoch+1, batch_idx, loss))
            
    model.eval()
    test_loss = 0
    test_acc = 0
    with torch.no_grad():
        for img, target in test_loader:
            out = model(img)
            
            loss = criterion(out, target)
            test_loss += loss
            
            pred = torch.max(out, 1)[1]
            test_acc += (pred==target).sum().item()
    test_loss /= len(test_loader)
    test_acc = test_acc / len(test_set) * 100
        
    print("epoch {} Validate loss {:.4f} Accuracy: {:.2f}%".format(epoch+1, test_loss, test_acc))

epoch 1 Train batch 0 loss 3.3141
epoch 1 Train batch 10 loss 2.7804
epoch 1 Train batch 20 loss 0.4221
epoch 1 Train batch 30 loss 0.9344
epoch 1 Train batch 40 loss 0.8975
epoch 1 Train batch 50 loss 0.4674
epoch 1 Train batch 60 loss 1.0677
epoch 1 Train batch 70 loss 0.7592
epoch 1 Train batch 80 loss 1.3181
epoch 1 Validate loss 0.4257 Accuracy: 85.59%
epoch 2 Train batch 0 loss 0.2879
epoch 2 Train batch 10 loss 0.6150
epoch 2 Train batch 20 loss 0.1908
epoch 2 Train batch 30 loss 0.0809
epoch 2 Train batch 40 loss 0.1267
epoch 2 Train batch 50 loss 1.4203
epoch 2 Train batch 60 loss 0.2151
epoch 2 Train batch 70 loss 0.2238
epoch 2 Train batch 80 loss 0.0466
epoch 2 Validate loss 0.3537 Accuracy: 89.12%
epoch 3 Train batch 0 loss 0.1101
epoch 3 Train batch 10 loss 0.0291
epoch 3 Train batch 20 loss 0.4081
epoch 3 Train batch 30 loss 0.1322
epoch 3 Train batch 40 loss 0.3336
epoch 3 Train batch 50 loss 0.5837
epoch 3 Train batch 60 loss 0.1289
epoch 3 Train batch 70 loss 0.1244
e

In [38]:
torch.save(model, './model.pt')

# 5) 추론

In [69]:
test_dir = './test'
test_list = sorted(glob(test_dir+'/0/*.jpg'))
TestSet = ArtDataset(test_list, test_transform, mode='test')
Test_loader = DataLoader(TestSet, shuffle=False, batch_size=8)

In [None]:
model.eval()
answers=[]

for image, _ in Test_loader:
    output = model(image)
    
    pred = np.array(torch.max(output, 1)[1])
    for p in pred: answers.append(p)
    

In [78]:
import pandas as pd

submission_path = './submission.csv'
submission_df = pd.DataFrame({'answer_value': answers})
submission_df.to_csv(submission_path)