In [1]:
import warnings
warnings.filterwarnings(action='ignore')

import os
import pandas as pd
import numpy as np

from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
import IPython.display as ipd
import time

In [2]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import torch
import torchvision
import torch.nn.functional as F
from torchvision.transforms import functional
from torch import Tensor, nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision.utils import make_grid
import torchvision.transforms as transforms
import albumentations as A
from albumentations.pytorch import ToTensor

from efficientnet_pytorch import EfficientNet

In [3]:
import torch
import torchvision
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from adamp import AdamP
from torchsummary import summary

## 시드 고정

In [4]:
import random
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  
    torch.backends.cudnn.deterministic = True  
    torch.backends.cudnn.benchmark = True  
seed_everything()

## 데이터 불러오기

In [5]:
train = pd.read_csv('./train/train_data.csv')
test = pd.read_csv('./test/test/test_data.csv')

In [6]:
train.columns = ['file_name','label']

In [7]:
test.head()

Unnamed: 0,file_name
0,idx0001.png
1,idx0002.png
2,idx0003.png
3,idx0004.png
4,idx0005.png


In [8]:
class CustomDataset(torch.utils.data.Dataset): 
    def __init__(self,df,path,option,augmentation=None):
        self.df = df
        self.option = option
        self.augmentation = augmentation
        self.path = path

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx): 
        file_path  = self.df.iloc[idx,0]
        image=  Image.open(os.path.join(self.path,file_path)).convert('RGB')
        image = image.resize((100,100))
        image = np.array(image)
        
        #image = transforms.ToTensor()(image=image)['image']
        
        
        if self.augmentation is not None:
            image = self.augmentation(image=image)['image']
        
        
        if self.option =='train':
            label = self.df.iloc[idx,1]
            label = torch.tensor(label, dtype=torch.int64)
            return image, label
        
        return image
    

In [9]:
class MnistModel(nn.Module):
    def __init__(self,):
        super().__init__()
        #self.conv2d = nn.Conv2d(3, 3, 3, stride=1)
        self.model = torchvision.models.resnet34(pretrained = False)
        #self.model = EfficientNet.from_name('efficientnet-b1')
        self.dropout = nn.Dropout(0.5)
        
        self.classifier = nn.Linear(1000, 10)

    def forward(self, images):
        #outputs = self.conv2d(images)
        outputs = self.model(images)
        outputs = self.dropout(outputs)
        outputs = self.classifier(outputs)
        return outputs

In [10]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits = 5)
folds=[]
for trn_idx,val_idx in skf.split(train['file_name'],train['label']):
    folds.append((trn_idx,val_idx))

In [None]:
num_epochs = 16
best_models = [] # 폴드별로 가장 validation acc가 높은 모델 저장

for i,fold in enumerate(range(5)):
    print('===============',i+1,'fold start===============')
    
    model = MnistModel().to('cuda')
    optimizer = AdamP(model.parameters(), lr=1e-3, )
    criterion = nn.CrossEntropyLoss()
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size = 5,
                                                gamma = 0.9)
        
    
    train_idx = folds[fold][0]
    valid_idx = folds[fold][1]
    
    train_data = train.loc[trn_idx]
    val_data = train.loc[valid_idx]
    
    train_transforms = A.Compose([
    A.Normalize(),
    A.HorizontalFlip(),
    A.RandomRotate90(p=0.5),
    A.VerticalFlip(p=0.5),
    ToTensor()
    ])

    test_transforms = A.Compose([
        A.Normalize(),
        ToTensor()
        ])

    train_dataset = CustomDataset(train_data,'train/','train',train_transforms)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0)

    valid_dataset = CustomDataset(val_data,'train/','train',test_transforms)
    valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False, num_workers=0)
    
    
    valid_acc_max = 0.85
    valid_loss_min = 0.4
    for epoch in range(num_epochs):

        for i, (images, targets) in enumerate(train_loader):
            model.train()
            images, targets = images.to('cuda'), targets.to('cuda')

            optimizer.zero_grad()
            outputs = model(images)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()  

            if (i+1) % 20 == 0:
                print(f'Epoch: {epoch} - Loss: {loss:.6f}')


        val_loss = []
        val_acc = []
        for i, (images, targets) in enumerate(valid_loader):
            model.eval()
            images, targets = images.to('cuda'), targets.to('cuda')

            with torch.no_grad():
                outputs = model(images)
                valid_loss = criterion(outputs,targets).cpu().detach().numpy()

                preds = torch.argmax(outputs,axis = 1)
                preds = preds.cpu().detach().numpy()

                targets = targets.cpu().detach().numpy()
                batch_acc = (preds==targets).mean()



                val_loss.append(valid_loss)
                val_acc.append(batch_acc)

        val_loss = np.mean(val_loss)
        val_acc = np.mean(val_acc)

        print(f'Epoch: {epoch} - valid Loss: {val_loss:.6f} - valid_acc : {val_acc:.6f}')
        
        '''
         if valid_acc_max < val_acc:
            valid_acc_max = val_acc
            best_models.append(model)
            print('model save, model val acc : ',val_acc)
            print('best_models size : ',len(best_models))
            '''


        if valid_loss_min > val_loss:
            valid_loss_min = val_loss
            best_models.append(model)
            
    # Learning rate 조절
    lr_scheduler.step()
    
  
    
        
        

Epoch: 0 - Loss: 2.027545
Epoch: 0 - Loss: 1.299201
Epoch: 0 - Loss: 1.288974
Epoch: 0 - valid Loss: 1.580606 - valid_acc : 0.540820
Epoch: 1 - Loss: 1.208633
Epoch: 1 - Loss: 0.998218
Epoch: 1 - Loss: 1.078507
Epoch: 1 - valid Loss: 0.956541 - valid_acc : 0.649609
Epoch: 2 - Loss: 0.763615
Epoch: 2 - Loss: 0.757862
Epoch: 2 - Loss: 0.787007
Epoch: 2 - valid Loss: 0.842701 - valid_acc : 0.709375
Epoch: 3 - Loss: 0.722738
Epoch: 3 - Loss: 0.806212
Epoch: 3 - Loss: 0.758249
Epoch: 3 - valid Loss: 0.418378 - valid_acc : 0.872070
Epoch: 4 - Loss: 1.025948
Epoch: 4 - Loss: 0.667193
Epoch: 4 - Loss: 0.514454
Epoch: 4 - valid Loss: 0.415807 - valid_acc : 0.851953
Epoch: 5 - Loss: 0.468797
Epoch: 5 - Loss: 0.390045
Epoch: 5 - Loss: 0.435537
Epoch: 5 - valid Loss: 0.634373 - valid_acc : 0.768164
Epoch: 6 - Loss: 0.312579
Epoch: 6 - Loss: 0.278234
Epoch: 6 - Loss: 0.337076
Epoch: 6 - valid Loss: 0.354196 - valid_acc : 0.897461
Epoch: 7 - Loss: 0.384503
Epoch: 7 - Loss: 0.713508
Epoch: 7 - Loss: 

In [49]:
test_transforms = A.Compose([
    A.Normalize(),
    ToTensor()
])

test_dataset = CustomDataset(test,'test/test','test',test_transforms)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

In [50]:
len(best_models)

24

In [51]:
preds = []
for idx,model in enumerate(best_models): 
    print(idx+1, '번째 모델 예측 진행중')
    model = model
    model.eval()
    y_pred = []
    with torch.no_grad():
        for i, d in enumerate(test_loader):
            inputs = d.to('cuda')
            outputs = model(inputs).detach().cpu().numpy()
            y_pred.extend(outputs.argmax(axis=1).astype(int))
            
    preds.append(y_pred)




1 번째 모델 예측 진행중
2 번째 모델 예측 진행중
3 번째 모델 예측 진행중
4 번째 모델 예측 진행중
5 번째 모델 예측 진행중
6 번째 모델 예측 진행중
7 번째 모델 예측 진행중
8 번째 모델 예측 진행중
9 번째 모델 예측 진행중
10 번째 모델 예측 진행중
11 번째 모델 예측 진행중
12 번째 모델 예측 진행중
13 번째 모델 예측 진행중
14 번째 모델 예측 진행중
15 번째 모델 예측 진행중
16 번째 모델 예측 진행중
17 번째 모델 예측 진행중
18 번째 모델 예측 진행중
19 번째 모델 예측 진행중
20 번째 모델 예측 진행중
21 번째 모델 예측 진행중
22 번째 모델 예측 진행중
23 번째 모델 예측 진행중
24 번째 모델 예측 진행중


In [52]:
from collections import Counter
np_pred = np.array(preds).T

pred = []
for i in range(5000):
    cnt = Counter(np_pred[i])
    pred.append(cnt.most_common()[0][0])
pred

[1,
 0,
 8,
 3,
 5,
 9,
 2,
 4,
 5,
 4,
 8,
 7,
 2,
 4,
 3,
 6,
 6,
 5,
 6,
 6,
 4,
 0,
 4,
 4,
 2,
 7,
 2,
 0,
 1,
 4,
 7,
 3,
 8,
 2,
 9,
 3,
 5,
 6,
 1,
 9,
 1,
 6,
 7,
 8,
 6,
 3,
 3,
 3,
 2,
 8,
 0,
 1,
 0,
 5,
 6,
 6,
 0,
 3,
 6,
 0,
 5,
 9,
 8,
 8,
 6,
 2,
 5,
 7,
 8,
 2,
 1,
 1,
 3,
 8,
 1,
 2,
 9,
 8,
 8,
 2,
 5,
 5,
 3,
 8,
 9,
 9,
 2,
 4,
 6,
 6,
 4,
 7,
 7,
 3,
 3,
 4,
 5,
 5,
 0,
 6,
 8,
 7,
 1,
 5,
 0,
 6,
 7,
 9,
 5,
 0,
 9,
 9,
 4,
 0,
 7,
 3,
 1,
 0,
 4,
 7,
 2,
 6,
 7,
 9,
 6,
 1,
 9,
 7,
 3,
 2,
 2,
 4,
 7,
 3,
 6,
 5,
 8,
 3,
 1,
 0,
 4,
 5,
 5,
 1,
 7,
 4,
 1,
 5,
 2,
 0,
 9,
 9,
 2,
 0,
 7,
 5,
 1,
 4,
 3,
 9,
 4,
 8,
 6,
 6,
 6,
 6,
 3,
 3,
 7,
 1,
 4,
 8,
 5,
 7,
 3,
 3,
 2,
 7,
 6,
 8,
 8,
 6,
 7,
 8,
 8,
 4,
 3,
 1,
 5,
 7,
 2,
 4,
 5,
 4,
 6,
 4,
 1,
 2,
 2,
 4,
 1,
 4,
 1,
 3,
 6,
 8,
 3,
 3,
 1,
 8,
 3,
 6,
 5,
 0,
 2,
 8,
 6,
 4,
 1,
 0,
 2,
 0,
 5,
 9,
 1,
 7,
 9,
 8,
 9,
 1,
 1,
 2,
 3,
 3,
 5,
 0,
 7,
 7,
 1,
 5,
 2,
 6,
 9,
 3,
 1,
 1,
 3,
 7,
 7,
 6,


In [53]:
submission = pd.read_csv('sample_submission.csv')
submission['label'] = pred

In [54]:
submission.to_csv('sub_211123.csv',index = False)

In [43]:
submission['label'].value_counts()

6    592
7    569
5    557
3    498
2    491
8    484
0    478
4    467
1    461
9    403
Name: label, dtype: int64