In [1]:
import os
from os.path import join
import cv2
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from glob import glob 
from PIL import Image


batch_size = 32
total_epoch = 5
method = 'GoogLeNet'

In [2]:
#GPU
if torch.cuda.is_available() : 
    device = 'cuda'
else :
    device = 'cpu'
    
torch.manual_seed(777)
if device == 'cuda' :
    torch.cuda.manual_seed_all(777)

In [3]:
#dataloader 짜기

# /../input/2021-ai-w10-p2/images/images/0/aeroplane_s_000004.png

class W10_dataloader(torch.utils.data.Dataset):
    def __init__(self, data_path, split, transform = None):
        self.split = split.upper()
        assert self.split in {'TRAIN', 'TEST'}
        self.transform = transform
        self.data = data_path
        if self.split == "TRAIN" :
            self.label = [int(p.split('/')[-2]) for p in data_path]
        self.data_len = len(self.data)
    
    def __len__(self):
        return self.data_len
    
    def __getitem__(self, index):
        image = Image.open(self.data[index], mode = 'r')
        image = image.convert('RGB')
        
        if self.transform :
            image = self.transform(image)
        
        if self.split == 'TEST':
            return image
        elif self.split == 'TRAIN':
            self.label[index] = np.array(self.label[index])
            return image , torch.from_numpy(self.label[index])
        
transform = transforms.Compose(
    [transforms.Resize((224,224)),
     transforms.ToTensor(),
     transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

In [4]:
train_mother_path = '../input/2021-ai-w10-p2/images/images'
test_mother_path = '../input/2021-ai-w10-p2/test_data/test_data'
train_image_path = glob(join(train_mother_path, '*', '*'))
test_image_path = glob(join(test_mother_path, '*'))


In [5]:
train_data = W10_dataloader(train_image_path, 'train', transform = transform)
test_data = W10_dataloader(test_image_path, 'test', transform = transform)

train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size, shuffle = True, num_workers =4)
test_loader = torch.utils.data.DataLoader(test_data, batch_size = batch_size, shuffle = False, num_workers = 4)

  cpuset_checked))


In [6]:
# model = models.vgg16(pretrained = True)
# model

In [7]:
# 사전 학습 모델 사용
# ResNet , VGG , GoogLenet

if method is "ResNet":
    model = models.resnet18(pretrained = True)
    model.to(device)
    
    #output layer 출력 형태 변환
    model.fc.out_features = 10

if method is 'VGG':
    model = models.vgg16(pretrained = True)
    model.to(device)
    
    model.classifier[6].out_features = 10

if method is "GoogLeNet":
    model = models.googlenet(pretrained = True)
    model.to(device)
    
    model.fc.out_features = 10
    

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth


  0%|          | 0.00/49.7M [00:00<?, ?B/s]

In [8]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import time
import torch.nn.functional as F
import torch.nn as nn
import matplotlib.pyplot as plt
from torchvision import models
import numpy as np
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001 , momentum = 0.9)

In [9]:
def train(model, train_dataloader):
    model.train()
    train_running_loss = 0.0
    train_running_correct = 0
    
    for i, data in enumerate(train_dataloader):
        data, target = data[0].to(device) , data[1].to(device)
        
        optimizer.zero_grad()
        
        hypothesis = model(data)
        
        loss = criterion(hypothesis, target)
        
        train_running_loss += loss.item()
        _, prdt = torch.max(hypothesis.data, 1)
        train_running_correct += (prdt == target).sum().item()
        
        loss.backward()
        optimizer.step()
    train_loss = train_running_loss / len(train_dataloader.dataset)
    train_accuracy = 100. * train_running_correct / len(train_dataloader.dataset)
    
    print(f'Train Loss : {train_loss :.4f}, Train Acc : {train_accuracy :.2f}')
          
    return train_loss, train_accuracy

In [10]:
train_loss , train_accuracy = [] , []
val_loss , val_accuracy = [], []

start = time.time()

for e in range(total_epoch):
    train_epoch_loss , train_epoch_accuracy = train(model, train_loader)
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    print(total_epoch ,train_accuracy,val_accuracy )
    
end = time.time()

Train Loss : 0.0184, Train Acc : 83.21
5 [83.212] []
Train Loss : 0.0062, Train Acc : 93.44
5 [83.212, 93.444] []
Train Loss : 0.0038, Train Acc : 96.00
5 [83.212, 93.444, 96.002] []
Train Loss : 0.0024, Train Acc : 97.55
5 [83.212, 93.444, 96.002, 97.552] []
Train Loss : 0.0016, Train Acc : 98.35
5 [83.212, 93.444, 96.002, 97.552, 98.352] []


In [11]:
def test(model, test_loader):
    model.eval()
    prdts = []
    for i, data in enumerate(test_loader):
        data = data.to(device)
        hypothesis = model(data)
        _, pred = torch.max(hypothesis.data, 1)
        prdts.extend(pred.detach().cpu().tolist())
    return prdts

In [12]:
import pandas as pd
submit = pd.read_csv("../input/2021-ai-w10-p2/format.csv")
prdts = test(model, test_loader)

In [13]:
submit['label'] = prdts
submit.to_csv("submit.csv", index = False)

In [14]:
submit

Unnamed: 0,id,label
0,1,0
1,2,7
2,3,6
3,4,8
4,5,8
...,...,...
9995,9996,7
9996,9997,6
9997,9998,4
9998,9999,1
