In [48]:
import cv2
import os
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from glob import glob
from tqdm import tqdm
from easydict import EasyDict
import timm
from sklearn.model_selection import train_test_split, StratifiedKFold

## augmented image label 추가

In [2]:
train_df = pd.read_csv('../data/train_df.csv')

aug_labels = glob("../data/train/aug/*")
for aug_path in aug_labels :
    label = aug_path.split("\\")[-1]
    class_name = label.split("-")[0]
    state = label.split("-")[-1]
        
    file_list = os.listdir(aug_path)
    label_list = [label] * len(file_list)
    class_list = [class_name] * len(file_list)
    state_list = [state] * len(file_list)
    
    aug_df = pd.DataFrame({'index' : [i + len(train_df) for i in range(len(file_list))],
                           'file_name' : file_list,
                           'class' : class_list,
                           'state' : state_list,
                           'label' : label_list
                          })
    train_df = pd.concat([train_df, aug_df])
    
train_df = train_df.reset_index(drop=True)

In [31]:
train_df = pd.read_csv('../data/train_df.csv')

train_df['file_name'] = list(map(lambda y :os.path.join("abcd",y), train_df['file_name']))
train_df

Unnamed: 0,index,file_name,class,state,label
0,0,abcd\10000.png,transistor,good,transistor-good
1,1,abcd\10001.png,capsule,good,capsule-good
2,2,abcd\10002.png,transistor,good,transistor-good
3,3,abcd\10003.png,wood,good,wood-good
4,4,abcd\10004.png,bottle,good,bottle-good
...,...,...,...,...,...
4272,4272,abcd\14272.png,transistor,good,transistor-good
4273,4273,abcd\14273.png,transistor,good,transistor-good
4274,4274,abcd\14274.png,grid,good,grid-good
4275,4275,abcd\14275.png,zipper,good,zipper-good


In [10]:
label_enc = {k : i for i, k in enumerate(sorted(train_df['label'].unique()))}
label_enc

{'bottle-broken_large': 0,
 'bottle-broken_small': 1,
 'bottle-contamination': 2,
 'bottle-good': 3,
 'cable-bent_wire': 4,
 'cable-cable_swap': 5,
 'cable-combined': 6,
 'cable-cut_inner_insulation': 7,
 'cable-cut_outer_insulation': 8,
 'cable-good': 9,
 'cable-missing_cable': 10,
 'cable-missing_wire': 11,
 'cable-poke_insulation': 12,
 'capsule-crack': 13,
 'capsule-faulty_imprint': 14,
 'capsule-good': 15,
 'capsule-poke': 16,
 'capsule-scratch': 17,
 'capsule-squeeze': 18,
 'carpet-color': 19,
 'carpet-cut': 20,
 'carpet-good': 21,
 'carpet-hole': 22,
 'carpet-metal_contamination': 23,
 'carpet-thread': 24,
 'grid-bent': 25,
 'grid-broken': 26,
 'grid-glue': 27,
 'grid-good': 28,
 'grid-metal_contamination': 29,
 'grid-thread': 30,
 'hazelnut-crack': 31,
 'hazelnut-cut': 32,
 'hazelnut-good': 33,
 'hazelnut-hole': 34,
 'hazelnut-print': 35,
 'leather-color': 36,
 'leather-cut': 37,
 'leather-fold': 38,
 'leather-glue': 39,
 'leather-good': 40,
 'leather-poke': 41,
 'metal_nut-ben

In [17]:
aug_transforms = A.Compose([
    A.Normalize(),
    A.Resize(224,224),
    A.OneOf([
        A.Rotate(),
        A.HorizontalFlip(),
        A.VerticalFlip()
    ], p=1),
    
    A.OneOf([
        A.Affine(p=1),
        A.GridDistortion(p=1)    
    ], p=1),
    ToTensorV2()
])

train_df= pd.read_csv('../data/aug_train_df.csv')
for file_name in train_df['file_name'] :
    img = cv2.imread(file_name)
    img = aug_transforms(image=img)['image']
    print(img.shape)

torch.Size([3, 1024, 1024])

In [40]:
timm.list_models()

['adv_inception_v3',
 'bat_resnext26ts',
 'beit_base_patch16_224',
 'beit_base_patch16_224_in22k',
 'beit_base_patch16_384',
 'beit_large_patch16_224',
 'beit_large_patch16_224_in22k',
 'beit_large_patch16_384',
 'beit_large_patch16_512',
 'botnet26t_256',
 'botnet50ts_256',
 'cait_m36_384',
 'cait_m48_448',
 'cait_s24_224',
 'cait_s24_384',
 'cait_s36_384',
 'cait_xs24_384',
 'cait_xxs24_224',
 'cait_xxs24_384',
 'cait_xxs36_224',
 'cait_xxs36_384',
 'coat_lite_mini',
 'coat_lite_small',
 'coat_lite_tiny',
 'coat_mini',
 'coat_tiny',
 'convit_base',
 'convit_small',
 'convit_tiny',
 'convmixer_768_32',
 'convmixer_1024_20_ks9_p14',
 'convmixer_1536_20',
 'convnext_base',
 'convnext_base_384_in22ft1k',
 'convnext_base_in22ft1k',
 'convnext_base_in22k',
 'convnext_large',
 'convnext_large_384_in22ft1k',
 'convnext_large_in22ft1k',
 'convnext_large_in22k',
 'convnext_small',
 'convnext_tiny',
 'convnext_tiny_hnf',
 'convnext_xlarge_384_in22ft1k',
 'convnext_xlarge_in22ft1k',
 'convnext_x

In [68]:
train_df= pd.read_csv('../data/aug_train_df.csv').reset_index(drop=True)
train_imgs, valid_imgs, train_labels, valid_labels = train_test_split(list(train_df['file_name']),
                  list(train_df['label']),
                  train_size=0.9,
                  shuffle=True,
                  random_state=51,
                  stratify=list(train_df['label']))
display(len(train_imgs))
display(len(valid_imgs))
display(train_labels[:5])
display(valid_labels[:5])



7348

817

['bottle-contamination',
 'cable-combined',
 'hazelnut-hole',
 'zipper-good',
 'tile-crack']

['metal_nut-good',
 'leather-good',
 'capsule-poke',
 'zipper-fabric_interior',
 'pill-color']