0. 경로 설정

In [25]:
import os

os.getcwd()
base_dir = 'D:/Dacon_Art_Classification/Art_Classification'
data_dir = "D:/Dacon_Art_Classification/data"

1. Dataset 만들기

In [28]:
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import pandas as pd
import cv2

class CustomDataset(Dataset):
    
    def __init__(self, state,img_dir, label:pd.DataFrame=None, transform=None, target_transform=None):
        self.state = state
        self.label = label
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
    def __len__(self):
        return len(self.label)
    def __getitem__(self, idx):
        # linux일때 '/' 바꿔서 돌리기
        if self.state =='train':
            img_path = os.path.join(self.img_dir,"train",self.label['img_path'][idx].split('/')[-1])
            img_label = self.label['artist'][idx]
            img = cv2.imread(img_path)
            if self.transform:
                img = self.transform(img)
            if self.target_transform:
                label = self.target_transform(label)
            return img,img_label
        else:
            img_path = os.path.join(self.img_dir,"test",self.label['img_path'][idx].split('/')[-1])
            img = cv2.imread(img_path)
            if self.transform:
                img = self.transform(img)
            return img
train_label = pd.read_csv(f'{data_dir}/train.csv')
test_label = pd.read_csv(f'{data_dir}/test.csv')

label_dict = dict(zip(train_label['artist'].unique(),range(50)))
print(label_dict)

train_label['artist'] = [label_dict[i] for i in train_label['artist']]

train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((300, 300)), 
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((300, 300)), 
    transforms.ToTensor(),
])

#PIL 파일로 바꿔주기
#colorJitter은 고민해보기 transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
training_data = CustomDataset('train',data_dir,train_label,train_transform)
train_dataloader = DataLoader(training_data, batch_size=16, shuffle=True)

test_data = CustomDataset('test',data_dir,test_label,test_transform)
test_dataloader = DataLoader(test_data, batch_size=16, shuffle=True)
        

{'Diego Velazquez': 0, 'Vincent van Gogh': 1, 'Claude Monet': 2, 'Edgar Degas': 3, 'Hieronymus Bosch': 4, 'Pierre-Auguste Renoir': 5, 'Rene Magritte': 6, 'Michelangelo': 7, 'Peter Paul Rubens': 8, 'Caravaggio': 9, 'Alfred Sisley': 10, 'Edouard Manet': 11, 'Rembrandt': 12, 'Francisco Goya': 13, 'Pablo Picasso': 14, 'Titian': 15, 'Mikhail Vrubel': 16, 'Leonardo da Vinci': 17, 'Kazimir Malevich': 18, 'Andy Warhol': 19, 'Vasiliy Kandinskiy': 20, 'Gustav Klimt': 21, 'Amedeo Modigliani': 22, 'Henri Rousseau': 23, 'Salvador Dali': 24, 'Pieter Bruegel': 25, 'Albrecht Du rer': 26, 'Paul Gauguin': 27, 'Sandro Botticelli': 28, 'Piet Mondrian': 29, 'Eugene Delacroix': 30, 'Paul Klee': 31, 'William Turner': 32, 'Marc Chagall': 33, 'Jan van Eyck': 34, 'Henri Matisse': 35, 'El Greco': 36, 'Gustave Courbet': 37, 'Andrei Rublev': 38, 'Jackson Pollock': 39, 'Edvard Munch': 40, 'Camille Pissarro': 41, 'Raphael': 42, 'Henri de Toulouse-Lautrec': 43, 'Joan Miro': 44, 'Giotto di Bondone': 45, 'Diego Rivera'

2. 모델 만들기

In [29]:
from torchvision.models import resnet50, ResNet50_Weights

model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
#이거 쓰는 법도 공부해보기

In [30]:
from torch import nn
class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()

        # BatchNorm에 bias가 포함되어 있으므로, conv2d는 bias=False로 설정합니다.
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_channels * BasicBlock.expansion),
        )

        # identity mapping, input과 output의 feature map size, filter 수가 동일한 경우 사용.
        self.shortcut = nn.Sequential()

        self.relu = nn.ReLU()

        # projection mapping using 1x1conv
        if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * BasicBlock.expansion)
            )

    def forward(self, x):
        x = self.residual_function(x) + self.shortcut(x)
        x = self.relu(x)
        return x


class BottleNeck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()

        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels * BottleNeck.expansion),
        )

        self.shortcut = nn.Sequential()

        self.relu = nn.ReLU()

        if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels*BottleNeck.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels*BottleNeck.expansion)
            )
            
    def forward(self, x):
        x = self.residual_function(x) + self.shortcut(x)
        x = self.relu(x)
        return x
    
class ResNet(nn.Module):
    def __init__(self, block, num_block, num_classes=10, init_weights=True):
        super().__init__()

        self.in_channels=64

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
        self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
        self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
        self.conv5_x = self._make_layer(block, 512, num_block[3], 2)

        self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc1 = nn.Linear(512 * block.expansion, num_classes)
        self.fc2 = nn.Linear(512 * block.expansion, num_classes)

        # weights inittialization
        if init_weights:
            self._initialize_weights()

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion

        return nn.Sequential(*layers)

    def forward(self,x):
        output = self.conv1(x)
        output = self.conv2_x(output)
        x = self.conv3_x(output)
        x = self.conv4_x(x)
        x = self.conv5_x(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = torch.nn.functional.softmax(x,dim=0)
        return x

    # define weight initialization function
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

def resnet18():
    return ResNet(BasicBlock, [2,2,2,2])

def resnet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])

def resnet50():
    return ResNet(BottleNeck, [3,4,6,3],50)

def resnet101():
    return ResNet(BottleNeck, [3, 4, 23, 3])

def resnet152():
    return ResNet(BottleNeck, [3, 8, 36, 3])

In [31]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = resnet50().to(device)
x = torch.randn(3, 3, 224, 224).to(device)
output = model(x)
print(output.size())

torch.Size([3, 50])


In [32]:
!pip install torchsummary



In [33]:
from torchsummary import summary
summary(model, (3, 224, 224), device=device.type)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

In [58]:
!pip install tqdm



In [77]:
import torch.optim as optim
from tqdm import tqdm
print("start train")
#criterion = nn.MultiLabelSoftMarginLoss()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

num_epochs = 10
model.train()
# train_dataloader에 문제 발생한듯
for epoch in range(num_epochs):
    print("epoch is :",epoch)
    for i, (images,labels) in enumerate(tqdm(train_dataloader)):
        optimizer.zero_grad()

        images = images.to(device)
        labels = torch.tensor(labels).to(device)
        outputs = model(images)
        loss = criterion(outputs,labels)
        
        loss.backward()
        optimizer.step() 

start train
epoch is : 0


  labels = torch.tensor(labels).to(device)
100%|████████████████████████████████████████████████████████████████████████████████| 370/370 [03:00<00:00,  2.05it/s]


epoch is : 1


100%|████████████████████████████████████████████████████████████████████████████████| 370/370 [01:49<00:00,  3.39it/s]


epoch is : 2


100%|████████████████████████████████████████████████████████████████████████████████| 370/370 [01:50<00:00,  3.36it/s]


epoch is : 3


100%|████████████████████████████████████████████████████████████████████████████████| 370/370 [01:51<00:00,  3.32it/s]


epoch is : 4


100%|████████████████████████████████████████████████████████████████████████████████| 370/370 [01:52<00:00,  3.29it/s]


epoch is : 5


100%|████████████████████████████████████████████████████████████████████████████████| 370/370 [01:52<00:00,  3.28it/s]


epoch is : 6


100%|████████████████████████████████████████████████████████████████████████████████| 370/370 [01:52<00:00,  3.28it/s]


epoch is : 7


100%|████████████████████████████████████████████████████████████████████████████████| 370/370 [01:52<00:00,  3.27it/s]


epoch is : 8


100%|████████████████████████████████████████████████████████████████████████████████| 370/370 [01:53<00:00,  3.26it/s]


epoch is : 9


100%|████████████████████████████████████████████████████████████████████████████████| 370/370 [01:53<00:00,  3.26it/s]


In [104]:
print(outputs)

tensor([[1.9121e-01, 1.1226e-01, 1.5298e-01, 1.3262e-01, 8.3595e-02, 1.4613e-01,
         1.7315e-01, 1.3923e-01, 1.5433e-01, 1.3227e-01, 8.0192e-02, 1.3948e-01,
         1.5907e-01, 1.5155e-01, 1.8513e-02, 2.0386e-01, 1.1178e-01, 1.4939e-01,
         7.3985e-02, 1.2125e-02, 4.3116e-02, 1.4439e-01, 2.7588e-02, 1.4246e-01,
         1.4523e-01, 1.5557e-01, 7.6032e-02, 5.1397e-02, 1.4698e-01, 3.7534e-02,
         1.3945e-01, 3.2391e-02, 1.6148e-01, 5.6489e-02, 1.6680e-01, 2.6285e-02,
         1.2918e-01, 1.8341e-01, 1.6123e-01, 7.9654e-02, 1.1344e-01, 1.5334e-01,
         1.0177e-01, 1.2579e-01, 2.3241e-02, 1.6382e-01, 4.8491e-02, 1.5511e-02,
         1.4891e-01, 1.0182e-01],
        [1.0969e-01, 2.3600e-01, 9.0985e-02, 3.8032e-02, 1.5153e-01, 5.6017e-02,
         1.5249e-01, 1.4058e-01, 1.2285e-01, 1.3196e-01, 1.3631e-02, 1.3826e-01,
         1.1317e-02, 1.3214e-01, 5.6361e-02, 3.8889e-02, 2.4891e-02, 1.1136e-01,
         1.4321e-01, 3.7121e-02, 8.8894e-02, 5.4387e-02, 4.8129e-02, 1.5818

In [78]:
torch.save(model,"model.pt")

In [87]:
test_image = cv2.imread(f'{data_dir}/test/TEST_00000.jpg')
test_image = transforms.ToTensor(test_image,)
print(type(test_image))
out = model(test_image)

TypeError: __init__() takes 1 positional argument but 2 were given

In [65]:
import torch
from tqdm import tqdm
from torchvision import transforms
test_data = CustomDataset('test',data_dir,test_label,test_transform)
test_dataloader = DataLoader(test_data, batch_size=16, shuffle=True)
test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((300, 300)), 
    transforms.ToTensor(),
])
model = torch.load('model.pt')
model.eval()
with torch.no_grad():
    for i,image in enumerate(tqdm(test_dataloader)):
        image = image.to(device)
        out = model(image)

100%|████████████████████████████████████████████████████████████████████████████████| 792/792 [01:17<00:00, 10.26it/s]


In [66]:
print(out.argmax(axis=1))

tensor([ 1, 27,  1, 19, 26, 12, 12, 26,  3, 36,  1, 10, 33,  3],
       device='cuda:0')


In [67]:
print(image[0].shape)

torch.Size([3, 300, 300])


In [68]:
from PIL import Image
from torchvision import transforms as T
transformer = T.ToPILImage()
img = transformer(image[0])


In [70]:
img.show()

error: OpenCV(4.6.0) :-1: error: (-5:Bad argument) in function 'cvtColor'
> Overload resolution failed:
>  - src is not a numpy array, neither a scalar
>  - Expected Ptr<cv::UMat> for argument 'src'
