In [127]:
import warnings
warnings.filterwarnings(action='ignore')

import os
import pandas as pd
import numpy as np

from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
import IPython.display as ipd
import time

In [128]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import torch
import torchvision
import torch.nn.functional as F
from torchvision.transforms import functional
from torch import Tensor, nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision.utils import make_grid
import torchvision.transforms as transforms
import albumentations as A
from albumentations.pytorch import ToTensor

from efficientnet_pytorch import EfficientNet

In [129]:
import torch
import torchvision
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models

## 데이터 불러오기

In [130]:
train = pd.read_csv('./train/train_data.csv')
test = pd.read_csv('./test/test/test_data.csv')

In [131]:
train.columns = ['file_name','label']

In [132]:
test.head()

Unnamed: 0,file_name
0,idx0001.png
1,idx0002.png
2,idx0003.png
3,idx0004.png
4,idx0005.png


In [167]:
class CustomDataset(torch.utils.data.Dataset): 
    def __init__(self,df,path,option,augmentation=None):
        self.df = df
        self.option = option
        self.augmentation = augmentation
        self.path = path

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx): 
        file_path  = self.df.iloc[idx,0]
        image=  Image.open(os.path.join(self.path,file_path)).convert('RGB')
        image = np.array(image)
        
        #image = transforms.ToTensor()(image=image)['image']
        
        
        if self.augmentation is not None:
            image = self.augmentation(image=image)['image']
        
        
        if self.option =='train':
            label = self.df.iloc[idx,1]
            label = torch.tensor(label, dtype=torch.int64)
            return image, label
        
        return image
    

In [241]:
class MnistModel(nn.Module):
    def __init__(self,):
        super().__init__()
        self.model = models.resnet50(pretrained=False)
        
        self.dropout = nn.Dropout(0.5)
        
        self.classifier = nn.Linear(1000, 10)

    def forward(self, images):
        outputs = self.model(images)
        outputs = self.dropout(outputs)
        outputs = self.classifier(outputs)
        return outputs

In [242]:
model = MnistModel().to('cuda')

In [243]:
optimizer = optim.Adam(model.parameters(), lr=1e-4, )
criterion = nn.CrossEntropyLoss()

In [244]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits = 5)

for trn_idx,val_idx in skf.split(train['file_name'],train['label']):
    train_data = train.loc[trn_idx]
    val_data = train.loc[val_idx]

In [245]:
train_transforms = A.Compose([
    A.Normalize(),

    A.HorizontalFlip(),
    A.RandomRotate90(p=1),
    A.VerticalFlip(p=1),
    ToTensor()
])

test_transforms = A.Compose([
    A.Normalize(),
    ToTensor()
])

train_dataset = CustomDataset(train_data,'train/','train',train_transforms)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0)

valid_dataset = CustomDataset(val_data,'train/','train',train_transforms)
valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=True, num_workers=0)


In [246]:
num_epochs = 100

for epoch in range(num_epochs):
    
    for i, (images, targets) in enumerate(train_loader):
        model.train()
        images, targets = images.to('cuda'), targets.to('cuda')

        optimizer.zero_grad()
        outputs = model(images)

        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()  
        
        if (i+1) % 20 == 0:
            print(f'Epoch: {epoch} - Loss: {loss:.6f}')
            
    
    val_loss = []
    val_acc = []
    for i, (images, targets) in enumerate(valid_loader):
        model.eval()
        images, targets = images.to('cuda'), targets.to('cuda')
        
        with torch.no_grad():
            outputs = model(images)
            valid_loss = criterion(outputs,targets).cpu().detach().numpy()
            
            preds = torch.argmax(outputs,axis = 1)
            preds = preds.cpu().detach().numpy()

            targets = targets.cpu().detach().numpy()
            batch_acc = (preds==targets).mean()
            
            
        
            val_loss.append(valid_loss)
            val_acc.append(batch_acc)
            
    val_loss = np.mean(val_loss)
    val_acc = np.mean(val_acc)
        
    print(f'Epoch: {epoch} - valid Loss: {val_loss:.6f} - valid_acc : {val_acc:.6f}')

        

Epoch: 0 - Loss: 2.285427
Epoch: 0 - Loss: 2.082988
Epoch: 0 - Loss: 2.422536
Epoch: 0 - valid Loss: 2.239116 - valid_acc : 0.185547
Epoch: 1 - Loss: 2.344063
Epoch: 1 - Loss: 2.283278
Epoch: 1 - Loss: 2.265645
Epoch: 1 - valid Loss: 2.050220 - valid_acc : 0.276758
Epoch: 2 - Loss: 2.090169
Epoch: 2 - Loss: 2.103786
Epoch: 2 - Loss: 1.975704
Epoch: 2 - valid Loss: 1.908581 - valid_acc : 0.307227
Epoch: 3 - Loss: 1.928484
Epoch: 3 - Loss: 2.061066
Epoch: 3 - Loss: 1.668773
Epoch: 3 - valid Loss: 1.853724 - valid_acc : 0.367773
Epoch: 4 - Loss: 1.828675
Epoch: 4 - Loss: 2.056792
Epoch: 4 - Loss: 1.801811
Epoch: 4 - valid Loss: 1.750041 - valid_acc : 0.356250
Epoch: 5 - Loss: 1.858526
Epoch: 5 - Loss: 1.747890
Epoch: 5 - Loss: 1.553946
Epoch: 5 - valid Loss: 1.615960 - valid_acc : 0.420703
Epoch: 6 - Loss: 1.415671
Epoch: 6 - Loss: 1.866298
Epoch: 6 - Loss: 1.762425
Epoch: 6 - valid Loss: 1.560263 - valid_acc : 0.448437
Epoch: 7 - Loss: 1.602701
Epoch: 7 - Loss: 1.956170
Epoch: 7 - Loss: 

In [247]:
test_transforms = A.Compose([
    A.Normalize(),
    ToTensor()
])



test_dataset = CustomDataset(test,'test/test','test',test_transforms)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=0)

In [248]:
model.eval()
y_pred = list()
with torch.no_grad():
    for i, d in enumerate(test_loader):
        inputs = d.to('cuda')
        outputs = model(inputs)
        y_pred.append(torch.argmax(outputs, -1).cpu().numpy())
y_pred = np.concatenate(y_pred, 0)

In [249]:
submission = pd.read_csv('sample_submission.csv')
submission['label'] = y_pred

In [250]:
submission.to_csv('sub_211118.csv',index = False)