In [632]:
import random
import pandas as pd
import numpy as np
import os
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

#from torchvision.models import resnet18
from torchvision import transforms

from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings(action='ignore') 
import cv2
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from torchvision import models

In [633]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [634]:
CFG = {
    'IMG_HEIGHT_SIZE':64, #64
    'IMG_WIDTH_SIZE':224,#56, #224
    'EPOCHS':15,
    'LEARNING_RATE':1e-3,
    'BATCH_SIZE':64,
    'NUM_WORKERS':4, # 본인의 GPU, CPU 환경에 맞게 설정
    'SEED':41
}

In [635]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [636]:
os.getcwd()

'/home/gsc/dacon_KYOWON/Dacon_KYOWON'

In [637]:
#window
# base_dir = 'D:/Dacon_KYOWON/Dacon_KYOWON'
# data_dir = "D:/Dacon_KYOWON/open"
#ubuntu
base_dir = '/home/gsc/dacon_KYOWON/Dacon_KYOWON'
data_dir = '/home/gsc/dacon_KYOWON/open'


In [638]:
#df = pd.read_csv(f'{data_dir}/train.csv')

In [639]:
# df_len_count = pd.DataFrame(df['len'].value_counts())
# df_len_count.reset_index(inplace=True)
# df_len_count.columns = ['len', 'len_count']

# display(df_len_count)

# len2count = {k:v for k,v in zip(df_len_count['len'], df_len_count['len_count'])}

# idx = 76888 
# for i in tqdm(range(200000)):
#     select1, select2 = random.randint(0, 76887), random.randint(0, 76887)
#     imgFile1 = f'../open/train/TRAIN_{select1:05d}.png'
#     imgFile2 = f'../open/train/TRAIN_{select2:05d}.png'
    
#     if df['len'][select1] + df['len'][select2] > 6:
#         continue
        
#     if len2count[df['len'][select1] + df['len'][select2]] > 30000:
#         continue
    
#     # 이미지 읽기
#     img1 = cv2.imread(imgFile1, 1);
#     img2 = cv2.imread(imgFile2, 1);
    
#     img1 = cv2.resize(img1,(100,125))
#     img2 = cv2.resize(img2,(100,125))
    
#     addh = cv2.hconcat([img1, img2])
    
#     new_data = {
#         'id' : f"TRAIN_{idx:06d}",
#         'img_path' : f"./Cut_mix/TRAIN_{idx:06d}.png",
#         'label' : df['label'][select1] + df['label'][select2],
#         'len' : df['len'][select1] + df['len'][select2]
#     }
    
#     df = df.append(new_data, ignore_index=True)
#     cv2.imwrite(f"./Cut_mix/TRAIN_{idx:06d}.png", addh)
#     idx += 1
#     len2count[df['len'][select1] + df['len'][select2]] += 1
    
# df.to_csv('./train_cutmix.csv', index=False)

In [640]:
df = pd.read_csv('train_cutmix.csv')

In [641]:
# 제공된 학습데이터 중 1글자 샘플들의 단어사전이 학습/테스트 데이터의 모든 글자를 담고 있으므로 학습 데이터로 우선 배치
df['len'] = df['label'].str.len()
train_v1 = df[df['len']==1]

In [642]:
# 제공된 학습데이터 중 2글자 이상의 샘플들에 대해서 단어길이를 고려하여 Train (80%) / Validation (20%) 분할
df = df[df['len']>1]
train_v2, val, _, _ = train_test_split(df, df['len'], test_size=0.2, random_state=CFG['SEED'])

In [643]:
# 학습 데이터로 우선 배치한 1글자 샘플들과 분할된 2글자 이상의 학습 샘플을 concat하여 최종 학습 데이터로 사용
train = pd.concat([train_v1, train_v2])
print(len(train), len(val))

143707 30001


In [644]:
# 학습 데이터로부터 단어 사전(Vocabulary) 구축
train_gt = [gt for gt in train['label']]
train_gt = "".join(train_gt)
letters = sorted(list(set(list(train_gt))))
print(len(letters))

2349


In [645]:
vocabulary = ["-"] + letters
print(len(vocabulary))
idx2char = {k:v for k,v in enumerate(vocabulary, start=0)}
char2idx = {v:k for k,v in idx2char.items()}

2350


In [646]:
print(data_dir)
print(base_dir)

/home/gsc/dacon_KYOWON/open
/home/gsc/dacon_KYOWON/Dacon_KYOWON


In [647]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list,transforms=None, train_mode=True):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        self.train_mode = train_mode
        
    def __len__(self):
        return len(self.img_path_list)
    
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        if self.train_mode==True:
            image = cv2.imread(f"{data_dir}/{img_path.split('/')[-2]}/{img_path.split('/')[-1]}")
        else:
            image = cv2.imread(f"{base_dir}/results1/{img_path.split('/')[-1]}")

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transforms is not None:
            image = self.transforms(image=image)['image']
           
        if self.label_list is not None:
            text = self.label_list[index]
            return image, text
        else:
            return image
            
  

In [648]:
def resize_transform(height,width, state='train'):
    if state == 'train':
        transform = A.Compose([
                                #A.HorizontalFlip(p=0.2),
                                #A.VerticalFlip(p=0.2),
                                A.Rotate(limit=[-10,10], p=1),
                                #A.RandomRotate90(p=0.2),
                                A.Resize(height,width),
                                #A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
                                #A.RandomResizedCrop(height=height, width=width, scale=(0.3, 1.0)),
                                #A.ToGray(),
                                #A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
                                ToTensorV2(),
                                ])
    else:
        transform = A.Compose([
                            A.Resize(height,width),
                            #A.ToGray(),
                            #A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            #A.RandomResizedCrop(height=CFG['IMG_SIZE'], width=CFG['IMG_SIZE'], scale=(0.3, 1.0)),               
                            ToTensorV2()
                            ])

    return transform

In [649]:
train_transform = resize_transform(CFG['IMG_HEIGHT_SIZE'],CFG['IMG_WIDTH_SIZE'])
test_transform = resize_transform(CFG['IMG_HEIGHT_SIZE'],CFG['IMG_WIDTH_SIZE'],'test')

In [650]:
img_path = train['label'].values
print(img_path)
#print(f"{data_dir}/{img_path.split('/')[-2]}/{img_path.split('/')[-1]}")

['머' '써' '빈' ... '행동하다익' '옳싫어하다' '손수']


In [651]:
train_dataset = CustomDataset(train['img_path'].values, train['label'].values, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=CFG['NUM_WORKERS'])

val_dataset = CustomDataset(val['img_path'].values, val['label'].values,test_transform)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=CFG['NUM_WORKERS'])

In [652]:
#image_batch, text_batch = iter(train_loader).next()
#print(image_batch.size(), text_batch)

In [653]:
import functools
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init


def initialize_weights(net_l, scale=1):
    if not isinstance(net_l, list):
        net_l = [net_l]
    for net in net_l:
        for m in net.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
                m.weight.data *= scale  # for residual block
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
                m.weight.data *= scale
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias.data, 0.0)


def make_layer(block, n_layers):
    layers = []
    for _ in range(n_layers):
        layers.append(block())
    return nn.Sequential(*layers)


class ResidualDenseBlock_5C(nn.Module):
    def __init__(self, nf=64, gc=32, bias=True):
        super(ResidualDenseBlock_5C, self).__init__()
        # gc: growth channel, i.e. intermediate channels
        self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias)
        self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias)
        self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias)
        self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias)
        self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias)
        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)

        # initialization
        initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)

    def forward(self, x):
        x1 = self.lrelu(self.conv1(x))
        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
        x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
        return x5 * 0.2 + x


class RRDB(nn.Module):
    '''Residual in Residual Dense Block'''

    def __init__(self, nf, gc=32):
        super(RRDB, self).__init__()
        self.RDB1 = ResidualDenseBlock_5C(nf, gc)
        self.RDB2 = ResidualDenseBlock_5C(nf, gc)
        self.RDB3 = ResidualDenseBlock_5C(nf, gc)

    def forward(self, x):
        out = self.RDB1(x)
        out = self.RDB2(out)
        out = self.RDB3(out)
        return out * 0.2 + x


class RRDBNet(nn.Module):
    def __init__(self, in_nc=3, out_nc=3, nf=64, nb=23, gc=32, sf=4):
        super(RRDBNet, self).__init__()
        RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc)
        self.sf = sf
        print([in_nc, out_nc, nf, nb, gc, sf])

        self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
        self.RRDB_trunk = make_layer(RRDB_block_f, nb)
        self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
        #### upsampling
        self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
        if self.sf==4:
            self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
        self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
        self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True)

        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)

    def forward(self, x):
        fea = self.conv_first(x)
        trunk = self.trunk_conv(self.RRDB_trunk(fea))
        fea = fea + trunk

        fea = self.lrelu(self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest')))
        if self.sf==4:
            fea = self.lrelu(self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest')))
        out = self.conv_last(self.lrelu(self.HRconv(fea)))
        
        return out

In [654]:
from torchsummary import summary as summary_

In [655]:
# BSRNet = RRDBNet(in_nc=3, out_nc=3, nf=64, nb=23, gc=32, sf=4)  # define network
# model_path = 'BSRGAN.pth'
# BSRNet.load_state_dict(torch.load(model_path), strict=True)
# BSRNet.cuda()
# summary_(BSRNet,(3,64,64))

In [656]:
# effnet_b6_test = models.efficientnet_b6(pretrained=True)
# #dfs_freeze(effnet_b6_test)
# effnet_b6_test.cuda()

# summary_(effnet_b6_test,(3,64,224))

In [657]:
def model_freeze(model):
    for para in model.parameters():
        para.requires_grad = False

In [658]:
def dfs_freeze(model):
    for name, child in model.named_children():
        for param in child.parameters():
            param.requires_grad = False
        dfs_freeze(child)

In [669]:
import torchvision.models as models

class RecognitionModel_eff(nn.Module):
    def __init__(self, num_chars=len(char2idx), rnn_hidden_size=256):
        super(RecognitionModel_eff, self).__init__()
        self.num_chars = num_chars
        self.rnn_hidden_size = rnn_hidden_size
        
        #BSRGAN
        self.bsrgan = RRDBNet(in_nc=3, out_nc=3, nf=64, nb=23, gc=32, sf=2)
        model_path = 'BSRGANx2.pth'
        self.bsrgan.load_state_dict(torch.load(model_path), strict=True)
        model_freeze(self.bsrgan)
        #dfs_freeze(self.bsrgan)
        
        
        #efficientNet : backbone
        effnet = models.efficientnet_b0(pretrained=True)
        #effnet = models.efficientnet_b7(pretrained=True)
        #effnet = models.efficientnet_b6(pretrained=True)
        
        # CNN Feature Extract
        effnet_modules = list(effnet.features)[:-3]
        self.feature_extract = nn.Sequential(
            *effnet_modules,
            nn.Conv2d(112, 256, kernel_size=(3,6), stride=1, padding=1), #b0
            #nn.Conv2d(176, 256, kernel_size=(3,6), stride=1, padding=1), #b5
            #nn.Conv2d(200, 256, kernel_size=(3,6), stride=1, padding=1), #b6
            #nn.Conv2d(224, 256, kernel_size=(3,6), stride=1, padding=1), #b7
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True)
        )

        self.linear1 = nn.Linear(1024, rnn_hidden_size)
        
        # RNN
        self.rnn = nn.RNN(input_size=rnn_hidden_size, 
                            hidden_size=rnn_hidden_size,
                            bidirectional=True, 
                            batch_first=True)
        self.linear2 = nn.Linear(self.rnn_hidden_size*2, num_chars)
        
        #LSTM
        self.lstm = nn.LSTM(input_size=rnn_hidden_size,
                           hidden_size= rnn_hidden_size,
                           bidirectional = True,
                           batch_first=True)
        
        #GRU
        self.gru = nn.GRU(input_size=rnn_hidden_size,
                           hidden_size= rnn_hidden_size,
                           bidirectional = True,
                           batch_first=True)
        
    def forward(self, x,cb=None,cr=None,state='train'):
        mean=(0.485, 0.456, 0.406)
        std=(0.229, 0.224, 0.225)
        # bsrgan
        x=x*(1/255)
        x = self.bsrgan(x) # (b,c,h,w)
        
        #resize
        x = F.interpolate(x,scale_factor=0.5,mode='nearest')
        
        #normalize
        for i in range(3):
            x[:,i,:,:] = torch.div(torch.sub(x[:,i,:,:],mean[i]),std[i])
        
        # CNN
        x = self.feature_extract(x) # [batch_size, channels, height, width]
        x = x.permute(0, 3, 1, 2) # [batch_size, width, channels, height]
        
        batch_size = x.size(0)
        T = x.size(1)
        x = x.view(batch_size, T, -1) # [batch_size, T==width, num_features==channels*height]
        x = self.linear1(x)

        # RNN
        #x, hidden = self.rnn(x)
        
        #LSTM
        x, (hidden,_) = self.lstm(x)
        #GRU
        #x, (hidden,_) = self.gru(x)
        
        output = self.linear2(x)
        output = output.permute(1, 0, 2) # [T==10, batch_size, num_classes==num_features]
        
        return output

In [670]:
#RecognitionModel_test = RecognitionModel_eff()
#RecognitionModel_test.cuda()
#summary_(RecognitionModel_test,(3,64,224))

In [671]:
# class RecognitionModel(nn.Module):
#     def __init__(self, num_chars=len(char2idx), rnn_hidden_size=256):
#         super(RecognitionModel, self).__init__()
#         self.num_chars = num_chars
#         self.rnn_hidden_size = rnn_hidden_size
        
#         # CNN Backbone = 사전학습된 resnet18 활용
#         # https://arxiv.org/abs/1512.03385
#         resnet = resnet18(pretrained=True)
#         # CNN Feature Extract
#         resnet_modules = list(resnet.children())[:-3]
#         self.feature_extract = nn.Sequential(
#             *resnet_modules,
#             nn.Conv2d(256, 256, kernel_size=(3,6), stride=1, padding=1),
#             nn.BatchNorm2d(256),
#             nn.ReLU(inplace=True)
#         )

#         self.linear1 = nn.Linear(1024, rnn_hidden_size)
        
#         # RNN
#         self.rnn = nn.RNN(input_size=rnn_hidden_size, 
#                             hidden_size=rnn_hidden_size,
#                             bidirectional=True, 
#                             batch_first=True)
#         self.linear2 = nn.Linear(self.rnn_hidden_size*2, num_chars)
        
        
#     def forward(self, x):
#         # CNN
#         x = self.feature_extract(x) # [batch_size, channels, height, width]
#         x = x.permute(0, 3, 1, 2) # [batch_size, width, channels, height]
         
#         batch_size = x.size(0)
#         T = x.size(1)
#         x = x.view(batch_size, T, -1) # [batch_size, T==width, num_features==channels*height]
#         x = self.linear1(x)
        
#         # RNN
#         x, hidden = self.rnn(x)
        
#         output = self.linear2(x)
#         output = output.permute(1, 0, 2) # [T==10, batch_size, num_classes==num_features]
        
#         return output

In [672]:
criterion = nn.CTCLoss(blank=0) # idx 0 : '-'

In [673]:
def encode_text_batch(text_batch):
    # batch 길이 구하기
    text_batch_targets_lens = [len(text) for text in text_batch]
    text_batch_targets_lens = torch.IntTensor(text_batch_targets_lens)
    #batch 합치기
    text_batch_concat = "".join(text_batch)
    #batch 글자 숫자로 인코딩
    text_batch_targets = [char2idx[c] for c in text_batch_concat]
    text_batch_targets = torch.IntTensor(text_batch_targets)
    
    return text_batch_targets, text_batch_targets_lens

In [674]:
def compute_loss(text_batch, text_batch_logits):
    """
    text_batch: list of strings of length equal to batch size
    text_batch_logits: Tensor of size([T, batch_size, num_classes])
    """
    text_batch_logps = F.log_softmax(text_batch_logits, 2) # [T, batch_size, num_classes]  
    text_batch_logps_lens = torch.full(size=(text_batch_logps.size(1),), 
                                       fill_value=text_batch_logps.size(0), 
                                       dtype=torch.int32).to(device) # [batch_size] 

    text_batch_targets, text_batch_targets_lens = encode_text_batch(text_batch)
    loss = criterion(text_batch_logps, text_batch_targets, text_batch_logps_lens, text_batch_targets_lens)

    return loss

In [675]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    
    best_loss = 999999
    best_model = None
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for image_batch, text_batch in tqdm(iter(train_loader)):
            image_batch = image_batch.to(device).float()
            
            optimizer.zero_grad()
            
            text_batch_logits = model(image_batch)
            loss = compute_loss(text_batch, text_batch_logits)
            #loss.requires_grad(True)
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        _train_loss = np.mean(train_loss)
        
        _val_loss = validation(model, val_loader, device)
        print(f'Epoch : [{epoch}] Train CTC Loss : [{_train_loss:.5f}] Val CTC Loss : [{_val_loss:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_loss)
        
        if best_loss > _val_loss:
            best_loss = _val_loss
            best_model = model
    
    return best_model

In [676]:
def validation(model, val_loader, device):
    model.eval()
    val_loss = []
    with torch.no_grad():
        for image_batch, text_batch in tqdm(iter(val_loader)):
            image_batch = image_batch.to(device).float()
            
            text_batch_logits = model(image_batch)
            loss = compute_loss(text_batch, text_batch_logits)
            
            val_loss.append(loss.item())
    
    _val_loss = np.mean(val_loss)
    return _val_loss

In [677]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [678]:
model = RecognitionModel_eff()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

[3, 3, 64, 23, 32, 2]


  0%|          | 0/2246 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [201]:
torch.save(infer_model,'./weight_file/bsrgan_50_cutmix.pt')

In [None]:
# torch.save(infer_model,'./weight_file/b7_40_lstm_cutmix.pt')

In [None]:
# model = RecognitionModel()
# model.eval()
# optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

# infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

In [548]:
test = pd.read_csv('../open/test.csv')

In [553]:
test_dataset = CustomDataset(test['img_path'].values, None,test_transform)#,train_mode =False)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=CFG['NUM_WORKERS'])

In [554]:
def decode_predictions(text_batch_logits):
    text_batch_tokens = F.softmax(text_batch_logits, 2).argmax(2) # [T, batch_size]
    text_batch_tokens = text_batch_tokens.numpy().T # [batch_size, T]

    text_batch_tokens_new = []
    for text_tokens in text_batch_tokens:
        text = [idx2char[idx] for idx in text_tokens]
        text = "".join(text)
        text_batch_tokens_new.append(text)

    return text_batch_tokens_new

def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for image_batch in tqdm(iter(test_loader)):
            image_batch = image_batch.to(device).float()
            
            text_batch_logits = model(image_batch)
            
            text_batch_pred = decode_predictions(text_batch_logits.cpu())
            
            preds.extend(text_batch_pred)
    return preds


In [220]:
test_model = torch.load('./weight_file/bsrgan_50_cutmix.pt').cuda()

In [221]:
predictions = inference(test_model, test_loader, device)

  0%|          | 0/2317 [00:00<?, ?it/s]

In [222]:
print(predictions)

['남---------말', '상---------향', '알아-----올-이다', '바구--------서', '살----------', '빼놓--------다', '인식-------하다', '데---------티', '소---------풍', '광---------주', '나---------나', '위---------정', '도---------도', '손---------류', '괴로-----워-하다', '카---------드', '합치--------다', '다양-------하다', '흔---------자', '가능-------하다', '호---------주', '실전-------되다', '피우--------다', '스웨--------터', '시작-------되다', '거---------울', '예---------선', '한국--------말', '세워-------지다', '비---------밥', '좋---------다', '남대-----문-시장', '보수--------적', '사진--------기', '내리--------다', '평---------통', '맛설--------다', '특별--------히', '우---------선', '예---------답', '학---------생', '여---------덮', '본---------질', '현---------대', '속하--------다', '지켜-----준-하다', '불---------다', '아---------래', '걸어-------오다', '선---------원', '호---------주', '약---------속', '공프--------장', '가---------위', '기---------자', '저---------전', '기---------침', '뒤---------쪽', '불---------안', '바람-----직-하다', '작아-------지다', '학---------비', '양---------말', '피---------곤', '말---------씀', '아무--------튼', '경-------

In [223]:
# 샘플 별 추론결과를 독립적으로 후처리
def remove_duplicates(text):
    if len(text) > 1:
        letters = [text[0]] + [letter for idx, letter in enumerate(text[1:], start=1) if text[idx] != text[idx-1]]
    elif len(text) == 1:
        letters = [text[0]]
    else:
        return ""
    return "".join(letters)

def correct_prediction(word):
    parts = word.split("-")
    parts = [remove_duplicates(part) for part in parts]
    corrected_word = "".join(parts)
    return corrected_word

In [224]:
submit = pd.read_csv('../open/sample_submission.csv')
submit['label'] = predictions
submit['label'] = submit['label'].apply(correct_prediction)
submit.head(30)

Unnamed: 0,id,label
0,TEST_00000,남말
1,TEST_00001,상향
2,TEST_00002,알아올이다
3,TEST_00003,바구서
4,TEST_00004,살
5,TEST_00005,빼놓다
6,TEST_00006,인식하다
7,TEST_00007,데티
8,TEST_00008,소풍
9,TEST_00009,광주


In [225]:
submit.to_csv('./submission/submission_50_bsrgan_32_cutmix.csv', index=False)

In [None]:
# submit.to_csv('./submission/submission_30_eff_b6_lstm_cutmix.csv', index=False)

In [226]:
sub_text = pd.read_csv('./submission/submission_50_bsrgan_32_cutmix.csv')
sub_text.head(30)

Unnamed: 0,id,label
0,TEST_00000,남말
1,TEST_00001,상향
2,TEST_00002,알아올이다
3,TEST_00003,바구서
4,TEST_00004,살
5,TEST_00005,빼놓다
6,TEST_00006,인식하다
7,TEST_00007,데티
8,TEST_00008,소풍
9,TEST_00009,광주


In [None]:
# cutmix사용 혹은 ATTENTION 구조 사용

In [None]:
#################################################################################

In [429]:
def decode_predictions(text_batch_logits):
    text_batch_tokens = F.softmax(text_batch_logits, 2).argmax(2) # [T, batch_size]
    text_batch_tokens = text_batch_tokens.numpy().T # [batch_size, T]

    text_batch_tokens_new = []
    for text_tokens in text_batch_tokens:
        text = [idx2char[idx] for idx in text_tokens]
        text = "".join(text)
        text_batch_tokens_new.append(text)

    return text_batch_tokens_new

def inference_ensemble(model_list, test_loader, device):

    num = len(model_list)
    print(num)
    for i in range(num):
        model_list[i].eval()
    
    preds = []
    with torch.no_grad():
        for image_batch in tqdm(iter(test_loader)):
            image_batch = image_batch.to(device)
            batch = image_batch.shape[0]
            text_batch_logits = torch.empty(11,batch,2350).cuda()

            for i in range(num):
                text_batch_logits +=(model_list[i](image_batch)*(num-i)/num)
                            
            text_batch_logits = text_batch_logits
            text_batch_pred = decode_predictions(text_batch_logits.cpu())
            
            preds.extend(text_batch_pred)
    return preds


In [430]:
model_test = torch.load('/home/gsc/dacon_KYOWON/Dacon_KYOWON/weight_file/b7_40_lstm_cutmix.pt')

In [None]:
from bsrgan import BSRGAN

model = BSRGAN('/home/gsc/다운로드/BSRGAN.pth', device='cuda:0')
bsrgan_output = model.predict('/home/gsc/사진/100000001020.png')

In [None]:
print(bsrgan_output)

In [None]:
class SRCNN(nn.Module):
    def __init__(self, num_channels=1):
        super(SRCNN, self).__init__()
        self.conv1 = nn.Conv2d(num_channels, 64, kernel_size=9, padding=9 // 2)
        self.conv2 = nn.Conv2d(64, 32, kernel_size=5, padding=5 // 2)
        self.conv3 = nn.Conv2d(32, num_channels, kernel_size=5, padding=5 // 2)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.conv3(x)
        return x

In [None]:
srcnn = SRCNN().cuda()
summary_(srcnn,(1,224,224))


In [None]:
image.shape

In [None]:
test = extransform(image=image)['image']
test.shape

In [None]:
test_2 = convert_rgb_to_ycbcr(test)
test_2.shape
test_2[0]

In [None]:
mean=(0.485, 0.456, 0.406)
std=(0.229, 0.224, 0.225)

In [None]:
a = torch.empty(3,64,224)
print(a.shape)
b = convert_rgb_to_ycbcr(a)
print(b.shape)
y = b[0].unsqueeze(0)
cb = b[1].unsqueeze(0)
cr = b[2].unsqueeze(0)
print(y.shape)
print(cb.shape)
print(cr.shape)
a_ = torch.empty(128,1,64,224)
b_ = torch.empty(128,1,64,224)
c_ = torch.empty(128,1,64,224)
print(a_.shape)
print(a_[0,0,:,:])
k = torch.cat([a_,b_,c_],dim=1)
print(k.shape)
k_ = convert_ycbcr_to_rgb(k)
print(k_.shape)
print(k_[0,:,:,0])
for i in range(3):
    k_[:,:,:,i] = (k_[:,:,:,i]-mean[i])/std[i]
print(k_.shape)
print(k_[0,:,:,0])

In [None]:
image_ycbcr = convert_rgb_to_ycbcr(image)
print(image_ycbcr.shape)

y = image_ycbcr[...,0]
print(y.shape)

y /=255.
print(y.shape)

y = torch.from_numpy(y).to(device)
y = y.unsqueeze(0)
#k = torch.Tensor(y)
print(y.shape)

z = convert_ycbcr_to_rgb(y)
print(z.shape)

In [None]:
# 1. srcnn frozen 시키고
# 2. test 단계에서 normalize 하는법 찾기

In [None]:
transform = A.Compose([
                            A.Resize(64,224),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

In [None]:
def convert_rgb_to_ycbcr(img):
    if type(img) == np.ndarray:
        y = 16. + (64.738 * img[:, :, 0] + 129.057 * img[:, :, 1] + 25.064 * img[:, :, 2]) / 256.
        cb = 128. + (-37.945 * img[:, :, 0] - 74.494 * img[:, :, 1] + 112.439 * img[:, :, 2]) / 256.
        cr = 128. + (112.439 * img[:, :, 0] - 94.154 * img[:, :, 1] - 18.285 * img[:, :, 2]) / 256.
        return np.array([y, cb, cr]).transpose([1, 2, 0])
    elif type(img) == torch.Tensor:
        if len(img.shape) == 4:
            img = img.squeeze(0)
        y = 16. + (64.738 * img[0, :, :] + 129.057 * img[1, :, :] + 25.064 * img[2, :, :]) / 256.
        #print(y.shape)
        cb = 128. + (-37.945 * img[0, :, :] - 74.494 * img[1, :, :] + 112.439 * img[2, :, :]) / 256.
        #print(cb.shape)
        cr = 128. + (112.439 * img[0, :, :] - 94.154 * img[1, :, :] - 18.285 * img[2, :, :]) / 256.
        #print(cr.shape)
        ret = torch.stack([y, cb, cr], dim=0).permute(1, 2, 0)
        #print(ret.shape)
        return ret
    else:
        raise Exception('Unknown Type', type(img))
def convert_ycbcr_to_rgb(img):
    if type(img) == np.ndarray:
        r = 298.082 * img[:, :, 0] / 256. + 408.583 * img[:, :, 2] / 256. - 222.921
        g = 298.082 * img[:, :, 0] / 256. - 100.291 * img[:, :, 1] / 256. - 208.120 * img[:, :, 2] / 256. + 135.576
        b = 298.082 * img[:, :, 0] / 256. + 516.412 * img[:, :, 1] / 256. - 276.836
        return np.array([r, g, b]).transpose([1, 2, 0])
    elif type(img) == torch.Tensor:
        #if len(img.shape) == 4:
        #    img = img.squeeze(0)
        r = 298.082 * img[:,0, :, :] / 256. + 408.583 * img[:,2, :, :] / 256. - 222.921
        g = 298.082 * img[:,0, :, :] / 256. - 100.291 * img[:,1, :, :] / 256. - 208.120 * img[:,2, :, :] / 256. + 135.576
        b = 298.082 * img[:,0, :, :] / 256. + 516.412 * img[:,1, :, :] / 256. - 276.836
        return torch.stack([r, g, b], dim=1).permute(0,2, 3, 1)
    else:
        raise Exception('Unknown Type', type(img))

In [None]:
import PIL.Image as pil_image

In [None]:
##########################################################################

In [488]:
import functools
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init


def initialize_weights(net_l, scale=1):
    if not isinstance(net_l, list):
        net_l = [net_l]
    for net in net_l:
        for m in net.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
                m.weight.data *= scale  # for residual block
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
                m.weight.data *= scale
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias.data, 0.0)


def make_layer(block, n_layers):
    layers = []
    for _ in range(n_layers):
        layers.append(block())
    return nn.Sequential(*layers)


class ResidualDenseBlock_5C(nn.Module):
    def __init__(self, nf=64, gc=32, bias=True):
        super(ResidualDenseBlock_5C, self).__init__()
        # gc: growth channel, i.e. intermediate channels
        self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias)
        self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias)
        self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias)
        self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias)
        self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias)
        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)

        # initialization
        initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)

    def forward(self, x):
        x1 = self.lrelu(self.conv1(x))
        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
        x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
        return x5 * 0.2 + x


class RRDB(nn.Module):
    '''Residual in Residual Dense Block'''

    def __init__(self, nf, gc=32):
        super(RRDB, self).__init__()
        self.RDB1 = ResidualDenseBlock_5C(nf, gc)
        self.RDB2 = ResidualDenseBlock_5C(nf, gc)
        self.RDB3 = ResidualDenseBlock_5C(nf, gc)

    def forward(self, x):
        out = self.RDB1(x)
        out = self.RDB2(out)
        out = self.RDB3(out)
        return out * 0.2 + x


class RRDBNet(nn.Module):
    def __init__(self, in_nc=3, out_nc=3, nf=64, nb=23, gc=32, sf=4):
        super(RRDBNet, self).__init__()
        RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc)
        self.sf = sf
        print([in_nc, out_nc, nf, nb, gc, sf])

        self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
        self.RRDB_trunk = make_layer(RRDB_block_f, nb)
        self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
        #### upsampling
        self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
        if self.sf==4:
            self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
        self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
        self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True)

        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)

    def forward(self, x):
        fea = self.conv_first(x)
        trunk = self.trunk_conv(self.RRDB_trunk(fea))
        fea = fea + trunk

        fea = self.lrelu(self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest')))
        if self.sf==4:
            fea = self.lrelu(self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest')))
        out = self.conv_last(self.lrelu(self.HRconv(fea)))
        
        return out

In [489]:
import torchvision.models as models

class RecognitionModel_eff(nn.Module):
    def __init__(self, num_chars=len(char2idx), rnn_hidden_size=256):
        super(RecognitionModel_eff, self).__init__()
        self.num_chars = num_chars
        self.rnn_hidden_size = rnn_hidden_size
        
        
        effnet = models.efficientnet_b7(pretrained=True)
        
        # CNN Feature Extract
        effnet_modules = list(effnet.features)[:-3]
        self.feature_extract = nn.Sequential(
            *effnet_modules,
            #nn.Conv2d(176, 256, kernel_size=(3,6), stride=1, padding=1), #b5
            #nn.Conv2d(200, 256, kernel_size=(3,6), stride=1, padding=1), #b6
            nn.Conv2d(224, 256, kernel_size=(3,6), stride=1, padding=1), #b7
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True)
        )

        self.linear1 = nn.Linear(1024, rnn_hidden_size)
        
        # RNN
        self.rnn = nn.RNN(input_size=rnn_hidden_size, 
                            hidden_size=rnn_hidden_size,
                            bidirectional=True, 
                            batch_first=True)
        self.linear2 = nn.Linear(self.rnn_hidden_size*2, num_chars)
        
        #LSTM
        self.lstm = nn.LSTM(input_size=rnn_hidden_size,
                           hidden_size= rnn_hidden_size,
                           bidirectional = True,
                           batch_first=True)
        
        #GRU
        self.gru = nn.GRU(input_size=rnn_hidden_size,
                           hidden_size= rnn_hidden_size,
                           bidirectional = True,
                           batch_first=True)
        
    def forward(self, x,cb=None,cr=None,state='train'):
        
        # CNN
        x = self.feature_extract(x) # [batch_size, channels, height, width]
        x = x.permute(0, 3, 1, 2) # [batch_size, width, channels, height]
        
        batch_size = x.size(0)
        T = x.size(1)
        x = x.view(batch_size, T, -1) # [batch_size, T==width, num_features==channels*height]
        x = self.linear1(x)

        #LSTM
        x, (hidden,_) = self.lstm(x)
        
        output = self.linear2(x)
        output = output.permute(1, 0, 2) # [T==10, batch_size, num_classes==num_features]
        
        return output

In [499]:
import torchvision.models as models

class finalModel(nn.Module):
    def __init__(self):
        super(finalModel, self).__init__()
        
        #BSRGAN
        self.bsrgan = RRDBNet(in_nc=3, out_nc=3, nf=64, nb=23, gc=32, sf=4)
        model_path = 'BSRGAN.pth'
        self.bsrgan.load_state_dict(torch.load(model_path), strict=True)
        
        self.effnet = RecognitionModel_eff()
        self.effnet = torch.load('/home/gsc/dacon_KYOWON/Dacon_KYOWON/weight_file/b7_40_lstm_cutmix.pt')
    def forward(self, x):
        mean=(0.485, 0.456, 0.406)
        std=(0.229, 0.224, 0.225)
        # bsrgan
        x = x*(1/255)
        x = self.bsrgan(x) # (b,c,h,w)
        
        #resize
        x = F.interpolate(x,scale_factor=0.25,mode='nearest')
        
        #normalize
        for i in range(3):
            x[:,i,:,:] = torch.div(torch.sub(x[:,i,:,:],mean[i]),std[i])
        
        output = self.effnet(x)
        return output

In [500]:
finalModel = finalModel().cuda()
predictions = inference(finalModel, test_loader, device)

[3, 3, 64, 23, 32, 4]


  0%|          | 0/2317 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [442]:
submit = pd.read_csv('../open/sample_submission.csv')
submit['label'] = predictions
submit['label'] = submit['label'].apply(correct_prediction)
submit.head(30)

Unnamed: 0,id,label
0,TEST_00000,남량
1,TEST_00001,상향
2,TEST_00002,받아들이다
3,TEST_00003,바구니
4,TEST_00004,살
5,TEST_00005,빼놓다
6,TEST_00006,인식하다
7,TEST_00007,센터
8,TEST_00008,소풍
9,TEST_00009,광주


In [None]:
submit.to_csv('./submission/test.csv', index=False)

In [507]:
os.mkdir('results1')

In [524]:
import glob
import os.path as osp
test_img_folder = '/home/gsc/dacon_KYOWON/Dacon_KYOWON/results/*'
bsrgan = RRDBNet(in_nc=3, out_nc=3, nf=64, nb=23, gc=32, sf=2).cuda()
model_path = 'BSRGANx2.pth'
bsrgan.load_state_dict(torch.load(model_path), strict=True)

idx=0
for path in glob.glob(test_img_folder):
    idx += 1
    base = osp.splitext(osp.basename(path))[0]
    print(idx, base)
    # read images
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    img = img * 1.0 / 255
    img = torch.from_numpy(np.transpose(img[:, :, [2, 1, 0]], (2, 0, 1))).float()
    img_LR = img.unsqueeze(0)
    img_LR = img_LR.to(device)

    with torch.no_grad():
        output = bsrgan(img_LR).data.squeeze().float().cpu().clamp_(0, 1).numpy()
    output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))
    output = (output * 255.0).round()
    cv2.imwrite('results2/{:s}.png'.format(base), output)

[3, 3, 64, 23, 32, 2]
1 TEST_30910
2 TEST_14144
3 TEST_14297
4 TEST_30229
5 TEST_31552
6 TEST_13569
7 TEST_46639
8 TEST_04483
9 TEST_20935
10 TEST_06204
11 TEST_73159
12 TEST_63186
13 TEST_52597
14 TEST_41786
15 TEST_47214
16 TEST_57100
17 TEST_70831
18 TEST_60033
19 TEST_15480
20 TEST_18544
21 TEST_15714
22 TEST_66858
23 TEST_48395
24 TEST_54834
25 TEST_52604
26 TEST_38532
27 TEST_70523
28 TEST_06315
29 TEST_03944
30 TEST_56286
31 TEST_30403
32 TEST_32008
33 TEST_33285
34 TEST_25338
35 TEST_65107
36 TEST_42479
37 TEST_68252
38 TEST_70265
39 TEST_66030
40 TEST_25797
41 TEST_26048
42 TEST_12704
43 TEST_35228
44 TEST_14228
45 TEST_57184
46 TEST_29680
47 TEST_66553
48 TEST_59579
49 TEST_26929
50 TEST_52682
51 TEST_52104
52 TEST_44912
53 TEST_02157
54 TEST_34435
55 TEST_30624
56 TEST_00685
57 TEST_24576
58 TEST_20767
59 TEST_07183
60 TEST_01230
61 TEST_28966
62 TEST_62155
63 TEST_17109
64 TEST_31923
65 TEST_69748
66 TEST_73511
67 TEST_03683
68 TEST_30294
69 TEST_63699
70 TEST_45743
71 TEST

KeyboardInterrupt: 

In [555]:
effnet = torch.load('/home/gsc/dacon_KYOWON/Dacon_KYOWON/weight_file/b7_40_lstm_cutmix.pt')
predictions = inference(effnet, test_loader, device)


  0%|          | 0/2317 [00:00<?, ?it/s]

In [556]:
submit = pd.read_csv('../open/sample_submission.csv')
submit['label'] = predictions
submit['label'] = submit['label'].apply(correct_prediction)
submit.head(30)

Unnamed: 0,id,label
0,TEST_00000,남망
1,TEST_00001,상활
2,TEST_00002,받아이다
3,TEST_00003,바구니
4,TEST_00004,살
5,TEST_00005,빼놓
6,TEST_00006,인식하다
7,TEST_00007,센터
8,TEST_00008,소풍
9,TEST_00009,광주
