# unzip 

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 시드 고정 , device 설정 

In [2]:
# settings
import pandas as pd
import numpy as np
import torch
import os 
import glob
import random

#GPU 설정
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device('cpu')
print("# device  : ", device)
seed = 77

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True


seed_everything(seed)

# device  :  cuda:0


# 데이터 불러오기

In [3]:

train = np.load("/content/drive/MyDrive/vision/OCR이미지분류/train_np.npy", allow_pickle = True)
target = np.load("/content/drive/MyDrive/vision/OCR이미지분류/target_encoded_np.npy", allow_pickle = True)
len(train), len(target)

(33983, 33983)

# google ViT 

In [4]:
!pip install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.6.12-py3-none-any.whl (549 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m549.1/549.1 KB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub
  Downloading huggingface_hub-0.13.3-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: huggingface-hub, timm
Successfully installed huggingface-hub-0.13.3 timm-0.6.12


In [5]:
import cv2
import torch
import albumentations
import albumentations.pytorch  # torch tensor로 변환할 때 필요 

In [6]:
class Dataset(torch.utils.data.Dataset) :
  def __init__(self, transform  , img_path_np, target=None ):
    self.imgs = img_path_np 
    self.target = target
    self.transform = transform
  
  def __len__(self):
    return len(self.imgs)

  def __getitem__(self, idx):
    item = {}
    file_path = self.imgs[idx] 
    img = cv2.imread(file_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  
    item['img'] = self.transform(image = img)['image']
    item['y'] = self.target[idx]  # len 159
    return item

# transformer 
transformer = albumentations.Compose([
    albumentations.Normalize(),
    albumentations.Resize(height = 224, width = 224),
    albumentations.pytorch.transforms.ToTensorV2(),
])


# transformer oneof > albumentations 
transform_oneof = albumentations.Compose([
    albumentations.Normalize(),
    albumentations.Resize(224, 224), 
    albumentations.OneOf([
                          albumentations.MotionBlur(p=1),
                          albumentations.OpticalDistortion(p=1),
                          albumentations.GaussNoise(p=1)                 
    ], p=1),
    albumentations.OneOf([
                          albumentations.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
                          albumentations.OpticalDistortion(p=1),
                          albumentations.GaussNoise(p=1)                 
    ], p=1),
    albumentations.pytorch.transforms.ToTensorV2(),
])

In [14]:
from tqdm import tqdm
import timm

seed_everything(seed)
num_classes = 159
VIT = timm.create_model('vit_base_patch16_224', pretrained=True, num_classes=num_classes)

# 학습

In [18]:
from sklearn.metrics import f1_score


def train_loop(dataloader,model,loss_fn,optimizer,device):
    epoch_loss = 0 
    model.train() 
    for batch in tqdm(dataloader): 
        pred = model(batch["img"].to(device))
        loss = loss_fn(pred, batch["y"].to(device))   
        optimizer.zero_grad() 
        loss.backward()  
        optimizer.step() 
        
        epoch_loss += loss.item() 

    epoch_loss /= len(dataloader) 

    return epoch_loss


@torch.no_grad() 
def test_loop(dataloader,model,loss_fn,device): 
    epoch_loss = 0
    model.eval() 
    
    pred_list = []
    true_list = []
    softmax = torch.nn.Softmax(dim=1) 

    for batch in tqdm(dataloader):   
        pred = model(batch["img"].to(device))
        
        if batch.get("y") is not None: 
            loss = loss_fn(pred, batch["y"].to(device))
            epoch_loss += loss.item()
        
        pred = softmax(pred)
        pred = pred.to("cpu").numpy() 
        true = batch['y'].to('cpu').numpy()

        pred_list.append(pred)
        true_list.append(true)

    epoch_loss /= len(dataloader)

    pred = np.concatenate(pred_list) 
    true = np.concatenate(true_list)
    return epoch_loss , pred , true


In [None]:
from sklearn.model_selection import train_test_split
seed_everything(77)
model = VIT.to(device)
# model check ===================================================================
optimizer = torch.optim.RAdam(model.parameters(), lr = 0.00001)
loss_fn = torch.nn.CrossEntropyLoss() 
train_x, valid_x, train_y, valid_y = train_test_split(train , target, test_size=0.2, random_state=77)

data_train = Dataset(transform_oneof, train_x, train_y)
data_test = Dataset(transform_oneof, valid_x, valid_y)
train_dl = torch.utils.data.DataLoader(data_train, batch_size = 32, shuffle = True)
test_dl = torch.utils.data.DataLoader(data_test, batch_size = 32, shuffle = False)

best_score = 0
patience = 0
num_epochs = 20
model = VIT.to(device)

for epoch in range(num_epochs):
    train_loss = train_loop(train_dl, model , loss_fn,optimizer,device)
    valid_loss , pred , true = test_loop(test_dl, model , loss_fn,device  )      
    pred = np.argmax(pred, axis=1) 
    score = f1_score(true, pred , average="weighted")
    print(f"train loss {train_loss},  valid loss : {valid_loss} ,  f1-score : {score}")
    patience += 1
    if best_score < score:
        patience = 0
        best_score = score
        # path 지정 부탁드릴게용 
        torch.save(model.state_dict(), f"/content/drive/MyDrive/vision/OCR이미지분류/google_base_vit_net_{epoch}.pth")

    if patience == 3:
        break

    print(f" Epoch ({epoch}), BEST F1: {best_score}")
    torch.cuda.empty_cache()
