In [7]:
import random
import pandas as pd
import numpy as np
import os
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from torchvision.models import resnet18
from torchvision import transforms

from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings(action='ignore') 
import cv2
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2


In [8]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [9]:
CFG = {
    'IMG_HEIGHT_SIZE':64, #64
    'IMG_WIDTH_SIZE':224, #224
    'EPOCHS':80,
    'LEARNING_RATE':1e-3,
    'BATCH_SIZE':256,
    'NUM_WORKERS':4, # 본인의 GPU, CPU 환경에 맞게 설정
    'SEED':41
}

In [10]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [11]:
os.getcwd()

'/home/gsc/dacon_KYOWON/Dacon_KYOWON'

In [12]:
#window
# base_dir = 'D:/Dacon_KYOWON/Dacon_KYOWON'
# data_dir = "D:/Dacon_KYOWON/open"
#ubuntu
base_dir = '/home/gsc/dacon_KYOWON/Dacon_KYOWON'
data_dir = '/home/gsc/dacon_KYOWON/open'


In [13]:
#df = pd.read_csv(f'{data_dir}/train.csv')

In [14]:
# df_len_count = pd.DataFrame(df['len'].value_counts())
# df_len_count.reset_index(inplace=True)
# df_len_count.columns = ['len', 'len_count']

# display(df_len_count)

# len2count = {k:v for k,v in zip(df_len_count['len'], df_len_count['len_count'])}

# idx = 76888 
# for i in tqdm(range(200000)):
#     select1, select2 = random.randint(0, 76887), random.randint(0, 76887)
#     imgFile1 = f'../open/train/TRAIN_{select1:05d}.png'
#     imgFile2 = f'../open/train/TRAIN_{select2:05d}.png'
    
#     if df['len'][select1] + df['len'][select2] > 6:
#         continue
        
#     if len2count[df['len'][select1] + df['len'][select2]] > 30000:
#         continue
    
#     # 이미지 읽기
#     img1 = cv2.imread(imgFile1, 1);
#     img2 = cv2.imread(imgFile2, 1);
    
#     img1 = cv2.resize(img1,(100,125))
#     img2 = cv2.resize(img2,(100,125))
    
#     addh = cv2.hconcat([img1, img2])
    
#     new_data = {
#         'id' : f"TRAIN_{idx:06d}",
#         'img_path' : f"./Cut_mix/TRAIN_{idx:06d}.png",
#         'label' : df['label'][select1] + df['label'][select2],
#         'len' : df['len'][select1] + df['len'][select2]
#     }
    
#     df = df.append(new_data, ignore_index=True)
#     cv2.imwrite(f"./Cut_mix/TRAIN_{idx:06d}.png", addh)
#     idx += 1
#     len2count[df['len'][select1] + df['len'][select2]] += 1
    
# df.to_csv('./train_cutmix.csv', index=False)

In [15]:
df = pd.read_csv('train_cutmix.csv')

In [16]:
# 제공된 학습데이터 중 1글자 샘플들의 단어사전이 학습/테스트 데이터의 모든 글자를 담고 있으므로 학습 데이터로 우선 배치
df['len'] = df['label'].str.len()
train_v1 = df[df['len']==1]

In [17]:
# 제공된 학습데이터 중 2글자 이상의 샘플들에 대해서 단어길이를 고려하여 Train (80%) / Validation (20%) 분할
df = df[df['len']>1]
train_v2, val, _, _ = train_test_split(df, df['len'], test_size=0.2, random_state=CFG['SEED'])

In [18]:
# 학습 데이터로 우선 배치한 1글자 샘플들과 분할된 2글자 이상의 학습 샘플을 concat하여 최종 학습 데이터로 사용
train = pd.concat([train_v1, train_v2])
print(len(train), len(val))

143707 30001


In [19]:
# 학습 데이터로부터 단어 사전(Vocabulary) 구축
train_gt = [gt for gt in train['label']]
train_gt = "".join(train_gt)
letters = sorted(list(set(list(train_gt))))
print(len(letters))

2349


In [20]:
vocabulary = ["-"] + letters
print(len(vocabulary))
idx2char = {k:v for k,v in enumerate(vocabulary, start=0)}
char2idx = {v:k for k,v in idx2char.items()}

2350


In [21]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list,transforms=None, train_mode=True):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        self.train_mode = train_mode
        
    def __len__(self):
        return len(self.img_path_list)
    
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        image = cv2.imread(f"{data_dir}/{img_path.split('/')[-2]}/{img_path.split('/')[-1]}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transforms is not None:
            image = self.transforms(image=image)['image']
           
        if self.label_list is not None:
            text = self.label_list[index]
            return image, text
        else:
            return image
  

In [22]:
def resize_transform(height,width, state='train'):
    if state == 'train':
        transform = A.Compose([
                                #A.HorizontalFlip(p=0.2),
                                #A.VerticalFlip(p=0.2),
                                A.Rotate(limit=[-10,10], p=1),
                                #A.RandomRotate90(p=0.2),
                                A.Resize(height,width),
                                #A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
                                #A.RandomResizedCrop(height=height, width=width, scale=(0.3, 1.0)),
                                #A.ToGray(),
                                A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
                                ToTensorV2(),
                                ])
    else:
        transform = A.Compose([
                            A.Resize(height,width),
                            #A.ToGray(),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            #A.RandomResizedCrop(height=CFG['IMG_SIZE'], width=CFG['IMG_SIZE'], scale=(0.3, 1.0)),               
                            ToTensorV2()
                            ])

    return transform

In [23]:
train_transform = resize_transform(CFG['IMG_HEIGHT_SIZE'],CFG['IMG_WIDTH_SIZE'])
test_transform = resize_transform(CFG['IMG_HEIGHT_SIZE'],CFG['IMG_WIDTH_SIZE'],'test')

In [24]:
img_path = train['label'].values
print(img_path)
#print(f"{data_dir}/{img_path.split('/')[-2]}/{img_path.split('/')[-1]}")

['머' '써' '빈' ... '행동하다익' '옳싫어하다' '손수']


In [25]:
train_dataset = CustomDataset(train['img_path'].values, train['label'].values, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=CFG['NUM_WORKERS'])

val_dataset = CustomDataset(val['img_path'].values, val['label'].values,test_transform)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=CFG['NUM_WORKERS'])

In [26]:
image_batch, text_batch = iter(train_loader).next()
print(image_batch.size(), text_batch)

torch.Size([256, 3, 64, 224]) ('팬티적어지다', '글쎄요위협', '동그랗다', '항', '생찾아다니다', '야추가', '버려지다', '되게깸', '빚', '걱정되다긴급', '투표전체', '뺏', '통증옥', '될조심스럽다', '휜', '예의', '그리', '퓌', '메일휴', '역사학수만', '변명', '주무시다새끼', '즐거움죔', '푸르다', '참기름곳곳', '공업', '싸조사하다', '에', '쬡', '안기다총', '생신실현되다', '일본고치다', '참석자', '근거다름없다', '손길', '몽', '운동화', '쫄신랑', '녹화코', '정직하다외침', '쥐참가', '나빠지다성적', '석표시하다', '세기', '보내다확실히', '텍스트', '발소주', '흘러나오다믿', '강요하다', '구분지붕', '꿍', '덤', '젬칼국수', '계산하다퓟', '화분', '깩좌우', '던지다출판', '동화책', '넷째', '수집하다', '입원', '불법', '건전하다년생', '몹시위', '창조하다숟', '자율머물다', '밤새다퓜', '시각주저앉다', '검다', '통과하다', '서부', '손님', '금지되다', '홈페이지요금', '있다감상', '술병', '경영하다녕', '눋', '등록하다깜짝', '며', '뗏할아버지', '앙', '쿤끊어지다', '점차다섯째', '유명하다위', '곡', '쉬다내', '주장하다', '평가되다', '가능해지다', '수', '선자연스럽다', '췻비교하다', '첫유의하다', '양식', '빛', '뵤중요', '들이마시다', '포함접시', '상당수음주', '불법츳', '릉결석하다', '형수', '나가다', '관련', '졌갈증', '재능', '들어오다', '오피스텔았', '어쩐지이리', '정', '깻', '회복되다', '늙', '구분되다청년', '그럼애초', '자차이', '수동적현장', '요구하다폣', '편견아가씨', '금액', '뒵양복', '두다예식장', '땠', '도망가다', '실례의미하다', '밝다경향', '채우다', '신체

In [27]:
from torchsummary import summary as summary_

In [28]:
# effnet_b6 = models.efficientnet_b6(pretrained=True)
# effnet_b6.cuda()
# summary_(effnet_b6,(3,64,224))

In [29]:
cnt = 0
class BidirectionalLSTM(nn.Module):

    def __init__(self, nIn, nHidden, nOut):
        super(BidirectionalLSTM, self).__init__()

        self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)
        self.embedding = nn.Linear(nHidden * 2, nOut)

    def forward(self, input):
        recurrent, _ = self.rnn(input)
        T, b, h = recurrent.size()
        t_rec = recurrent.view(T * b, h)

        output = self.embedding(t_rec)  # [T * b, nOut]
        output = output.view(T, b, -1)

        return output


class CRNN(nn.Module):

    def __init__(self, imgH, nc, nclass, nh, n_rnn=2, leakyRelu=False):
        super(CRNN, self).__init__()
        assert imgH % 16 == 0, 'imgH has to be a multiple of 16'

        ks = [3, 3, 3, 3, 3, 3, 2,2]
        ps = [1, 1, 1, 1, 1, 1, 0,0]
        ss = [1, 1, 1, 1, 1, 1, 1,1]
        nm = [64, 128, 256, 256, 512, 512, 512,512]

        cnn = nn.Sequential()

        def convRelu(i, batchNormalization=False):
            nIn = nc if i == 0 else nm[i - 1]
            nOut = nm[i]
            cnn.add_module('conv{0}'.format(i),
                           nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i]))
            if batchNormalization:
                cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
            if leakyRelu:
                cnn.add_module('relu{0}'.format(i),
                               nn.LeakyReLU(0.2, inplace=True))
            else:
                cnn.add_module('relu{0}'.format(i), nn.ReLU(True))

        convRelu(0)
        cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2))  # 64x16x64
        convRelu(1)
        cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2))  # 128x8x32
        convRelu(2, True)
        convRelu(3)
        cnn.add_module('pooling{0}'.format(2),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 256x4x16
        convRelu(4, True)
        convRelu(5)
        cnn.add_module('pooling{0}'.format(3),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 512x2x16
        convRelu(6, True)  # 512x1x16
        convRelu(7, True)  # 
        cnn.add_module('pooling{0}'.format(4),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 
        self.cnn = cnn
        self.rnn = nn.Sequential(
            BidirectionalLSTM(512, nh, nh),
            BidirectionalLSTM(nh, nh, nclass))

    def forward(self, input):
        conv = self.cnn(input)
        b, c, h, w = conv.size()
        #print(b,c,h,w)

        assert h == 1, "the height of conv must be 1"
        conv = conv.squeeze(2)
        conv = conv.permute(2, 0, 1)  # [w, b, c]

        # rnn features
        output = self.rnn(conv)

        return output

In [30]:
crnn = CRNN(64,3,2350,256).cuda()
input = torch.Tensor(64,3,64,224).cuda()
output = crnn(input)
print(output.shape)

torch.Size([57, 64, 2350])


In [31]:
# import torchvision.models as models

# class RecognitionModel_eff(nn.Module):
#     def __init__(self, num_chars=len(char2idx), rnn_hidden_size=256):
#         super(RecognitionModel_eff, self).__init__()
#         self.num_chars = num_chars
#         self.rnn_hidden_size = rnn_hidden_size
        
#         # CNN Backbone = 사전학습된 resnet18 활용
#         # https://arxiv.org/abs/1512.03385
#         #effnet = models.efficientnet_b5(pretrained=True)
#         #effnet = models.efficientnet_b7(pretrained=True)
#         effnet = models.efficientnet_b6(pretrained=True)
#         # CNN Feature Extract
#         effnet_modules = list(effnet.features)[:-3]
#         self.feature_extract = nn.Sequential(
#             *effnet_modules,
#             #nn.Conv2d(176, 256, kernel_size=(3,6), stride=1, padding=1), #b5
#             nn.Conv2d(200, 256, kernel_size=(3,6), stride=1, padding=1), #b6
#             #nn.Conv2d(224, 256, kernel_size=(3,6), stride=1, padding=1), #b7
#             nn.BatchNorm2d(256),
#             nn.ReLU(inplace=True)
#         )

#         self.linear1 = nn.Linear(1024, rnn_hidden_size)
        
#         # RNN
#         self.rnn = nn.RNN(input_size=rnn_hidden_size, 
#                             hidden_size=rnn_hidden_size,
#                             bidirectional=True, 
#                             batch_first=True)
#         self.linear2 = nn.Linear(self.rnn_hidden_size*2, num_chars)
        
#         #LSTM
#         self.lstm = nn.LSTM(input_size=rnn_hidden_size,
#                            hidden_size= rnn_hidden_size,
#                            bidirectional = True,
#                            batch_first=True)
        
#         #GRU
#         self.gru = nn.GRU(input_size=rnn_hidden_size,
#                            hidden_size= rnn_hidden_size,
#                            bidirectional = True,
#                            batch_first=True)
        
#     def forward(self, x):
#         # CNN
#         x = self.feature_extract(x) # [batch_size, channels, height, width]
#         x = x.permute(0, 3, 1, 2) # [batch_size, width, channels, height]
         
#         batch_size = x.size(0)
#         T = x.size(1)
#         x = x.view(batch_size, T, -1) # [batch_size, T==width, num_features==channels*height]
#         x = self.linear1(x)
#         #print(x.shape)
#         # RNN
#         #x, hidden = self.rnn(x)
        
#         #LSTM
#         x, (hidden,_) = self.lstm(x)
#         #GRU
#         #x, (hidden,_) = self.gru(x)
        
#         output = self.linear2(x)
#         output = output.permute(1, 0, 2) # [T==10, batch_size, num_classes==num_features]
        
#         return output

In [32]:
# RecognitionModel_test = RecognitionModel()
# RecognitionModel_test.cuda()
# summary_(RecognitionModel_test,(3,64,224))

In [33]:
# class RecognitionModel(nn.Module):
#     def __init__(self, num_chars=len(char2idx), rnn_hidden_size=256):
#         super(RecognitionModel, self).__init__()
#         self.num_chars = num_chars
#         self.rnn_hidden_size = rnn_hidden_size
        
#         # CNN Backbone = 사전학습된 resnet18 활용
#         # https://arxiv.org/abs/1512.03385
#         resnet = resnet18(pretrained=True)
#         # CNN Feature Extract
#         resnet_modules = list(resnet.children())[:-3]
#         self.feature_extract = nn.Sequential(
#             *resnet_modules,
#             nn.Conv2d(256, 256, kernel_size=(3,6), stride=1, padding=1),
#             nn.BatchNorm2d(256),
#             nn.ReLU(inplace=True)
#         )

#         self.linear1 = nn.Linear(1024, rnn_hidden_size)
        
#         # RNN
#         self.rnn = nn.RNN(input_size=rnn_hidden_size, 
#                             hidden_size=rnn_hidden_size,
#                             bidirectional=True, 
#                             batch_first=True)
#         self.linear2 = nn.Linear(self.rnn_hidden_size*2, num_chars)
        
        
#     def forward(self, x):
#         # CNN
#         x = self.feature_extract(x) # [batch_size, channels, height, width]
#         x = x.permute(0, 3, 1, 2) # [batch_size, width, channels, height]
         
#         batch_size = x.size(0)
#         T = x.size(1)
#         x = x.view(batch_size, T, -1) # [batch_size, T==width, num_features==channels*height]
#         x = self.linear1(x)
        
#         # RNN
#         x, hidden = self.rnn(x)
        
#         output = self.linear2(x)
#         output = output.permute(1, 0, 2) # [T==10, batch_size, num_classes==num_features]
        
#         return output

In [34]:
criterion = nn.CTCLoss(blank=0) # idx 0 : '-'

In [35]:
def encode_text_batch(text_batch):
    # batch 길이 구하기
    text_batch_targets_lens = [len(text) for text in text_batch]
    text_batch_targets_lens = torch.IntTensor(text_batch_targets_lens)
    #batch 합치기
    text_batch_concat = "".join(text_batch)
    #batch 글자 숫자로 인코딩
    text_batch_targets = [char2idx[c] for c in text_batch_concat]
    text_batch_targets = torch.IntTensor(text_batch_targets)
    
    return text_batch_targets, text_batch_targets_lens

In [36]:
def compute_loss(text_batch, text_batch_logits):
    """
    text_batch: list of strings of length equal to batch size
    text_batch_logits: Tensor of size([T, batch_size, num_classes])
    """
    text_batch_logps = F.log_softmax(text_batch_logits, 2) # [T, batch_size, num_classes]  
    text_batch_logps_lens = torch.full(size=(text_batch_logps.size(1),), 
                                       fill_value=text_batch_logps.size(0), 
                                       dtype=torch.int32).to(device) # [batch_size] 

    text_batch_targets, text_batch_targets_lens = encode_text_batch(text_batch)
    loss = criterion(text_batch_logps, text_batch_targets, text_batch_logps_lens, text_batch_targets_lens)

    return loss

In [37]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    
    best_loss = 999999
    best_model = None
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for image_batch, text_batch in tqdm(iter(train_loader)):
            image_batch = image_batch.to(device)
            
            optimizer.zero_grad()
            
            text_batch_logits = model(image_batch)
            loss = compute_loss(text_batch, text_batch_logits)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        _train_loss = np.mean(train_loss)
        
        _val_loss = validation(model, val_loader, device)
        print(f'Epoch : [{epoch}] Train CTC Loss : [{_train_loss:.5f}] Val CTC Loss : [{_val_loss:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_loss)
        
        if best_loss > _val_loss:
            best_loss = _val_loss
            best_model = model
    
    return best_model

In [38]:
def validation(model, val_loader, device):
    model.eval()
    val_loss = []
    with torch.no_grad():
        for image_batch, text_batch in tqdm(iter(val_loader)):
            image_batch = image_batch.to(device)
            
            text_batch_logits = model(image_batch)
            loss = compute_loss(text_batch, text_batch_logits)
            
            val_loss.append(loss.item())
    
    _val_loss = np.mean(val_loss)
    return _val_loss

In [39]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
# model = RecognitionModel_eff()
# model.eval()
# optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

# infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

In [None]:
torch.save(infer_model,'./weight_file/TransOCR_64_80_cutmix.pt')

In [None]:
# torch.save(infer_model,'./weight_file/b7_40_lstm_cutmix.pt')

In [None]:
# model = RecognitionModel()
# model.eval()
# optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

# infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

In [None]:
test = pd.read_csv('../open/test.csv')

In [None]:
test_dataset = CustomDataset(test['img_path'].values, None,test_transform)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=CFG['NUM_WORKERS'])

In [None]:
def decode_predictions(text_batch_logits):
    text_batch_tokens = F.softmax(text_batch_logits, 2).argmax(2) # [T, batch_size]
    text_batch_tokens = text_batch_tokens.numpy().T # [batch_size, T]

    text_batch_tokens_new = []
    for text_tokens in text_batch_tokens:
        text = [idx2char[idx] for idx in text_tokens]
        text = "".join(text)
        text_batch_tokens_new.append(text)

    return text_batch_tokens_new

def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for image_batch in tqdm(iter(test_loader)):
            image_batch = image_batch.to(device)
            
            text_batch_logits = model(image_batch)
            
            text_batch_pred = decode_predictions(text_batch_logits.cpu())
            
            preds.extend(text_batch_pred)
    return preds

In [None]:
predictions = inference(infer_model, test_loader, device)

In [None]:
print(predictions)

In [None]:
# 샘플 별 추론결과를 독립적으로 후처리
def remove_duplicates(text):
    if len(text) > 1:
        letters = [text[0]] + [letter for idx, letter in enumerate(text[1:], start=1) if text[idx] != text[idx-1]]
    elif len(text) == 1:
        letters = [text[0]]
    else:
        return ""
    return "".join(letters)

def correct_prediction(word):
    parts = word.split("-")
    parts = [remove_duplicates(part) for part in parts]
    corrected_word = "".join(parts)
    return corrected_word

In [None]:
submit = pd.read_csv('../open/sample_submission.csv')
submit['label'] = predictions
submit['label'] = submit['label'].apply(correct_prediction)

In [None]:
submit.to_csv('./submission/submission_80_TransOCR_64_cutmix.csv', index=False)

In [None]:
# submit.to_csv('./submission/submission_30_eff_b6_lstm_cutmix.csv', index=False)

In [None]:
sub_text = pd.read_csv('./submission/submission_80_TransOCR_64_cutmix.csv')
sub_text.head(30)

In [None]:
# cutmix사용 혹은 ATTENTION 구조 사용

In [None]:
#################################################################################

In [None]:
def decode_predictions(text_batch_logits):
    text_batch_tokens = F.softmax(text_batch_logits, 2).argmax(2) # [T, batch_size]
    text_batch_tokens = text_batch_tokens.numpy().T # [batch_size, T]

    text_batch_tokens_new = []
    for text_tokens in text_batch_tokens:
        text = [idx2char[idx] for idx in text_tokens]
        text = "".join(text)
        text_batch_tokens_new.append(text)

    return text_batch_tokens_new

def inference_ensemble(model_list, test_loader, device):

    text_batch_logits = torch.empty(11,256,2350).cuda()

    num = len(model_list)
    for i in range(num):
        model_list[i].eval()
    
    preds = []
    with torch.no_grad():
        for image_batch in tqdm(iter(test_loader)):
            image_batch = image_batch.to(device)
            
            for i in range(num):
                text_batch_logits +=model_list[i](image_batch)
            
            text_batch_logits = text_batch_logits/num
            text_batch_pred = decode_predictions(text_batch_logits.cpu())
            
            preds.extend(text_batch_pred)
    return preds

In [41]:
#infer_model1 = torch.load('./weight_file/b7_40_lstm_cutmix.pt').cuda()
infer_model2 = torch.load('./weight_file/b6_30_lstm_cutmix.pt').cuda()
infer_model3 = torch.load('./weight_file/b5_30_lstm_cutmix.pt').cuda()
model_list = [infer_model1,infer_model2,infer_model3]

AttributeError: Can't get attribute 'RecognitionModel_eff' on <module '__main__'>

In [None]:
input = torch.Tensor(64,3,64,224).cuda()
output1 = infer_model1(input)
print(output1.shape)


In [None]:
output2 = infer_model2(input)
print(output2.shape)


In [None]:
output3 = infer_model3(input)
print(output3.shape)

In [None]:
predictions_test = inference_ensemble(model_list, test_loader, device)

In [None]:
print(predictions_test)

In [None]:
submit_test = pd.read_csv('../open/sample_submission.csv')
submit_test['label'] = predictions_test
submit_test['label'] = submit['label'].apply(correct_prediction)
submit_test.to_csv('./submission/submission_ensemble.csv', index=False)

In [None]:
sub_text = pd.read_csv('./submission/submission_ensemble.csv')
sub_text.head(20)

In [None]:
a = torch.empty()
b = torch.Tensor([1])

a += b
print(a)