In [110]:
import os
from PIL import Image
import torchvision.transforms as transforms
import torch
import matplotlib.pyplot as plt
import pandas as pd
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
from augraphy import AugraphyPipeline,  VoronoiTessellation
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import cv2
import uuid
from tqdm import tqdm

In [27]:
np.random.seed(42)

In [37]:
vertical_transform  = A.VerticalFlip(p=0.5)
transform = A.Compose([
    A.ISONoise(color_shift=(0.01, 0.07), intensity=(0.3, 0.8), p=0.7),
    A.ColorJitter(
        brightness=0, contrast=0, saturation=0, # 다른 효과는 끄고
        hue=0.5, # 색조(hue)만 최대치로 변경
        p=0.7# 50% 확률로 적용
        ),
    A.GaussianBlur(blur_limit=(3,7), p=0.5),
    A.Rotate(
        limit=360, p=0.7,
        border_mode=0, value=(255,255,255)  # 흰색으로 채움
    ),
])

# augmentation sequence
my_sequence = [
    VoronoiTessellation(
        num_cells_range=(2000,2000),
        mult_range=(50,80),
        noise_type="random",
        background_value=(100,150),
        numba_jit=1,
        p=0.7
),
]
pipeline  = AugraphyPipeline(my_sequence)

In [29]:
df = pd.read_csv('./data/train.csv')

In [30]:
train_data, val_data = train_test_split(
    df,
    test_size=0.5,  # 5:5 비율로 설정
    random_state=42,
    # 열의 위치 대신 '이름'을 사용하여 stratify 지정 (더 안정적인 방법)
    stratify=df['target'] if 'target' in df.columns else None
)

In [111]:
# augmented
from pandas import DataFrame


alpha = 0.8
fit_count = 200
pbar = tqdm(total=train_data['target'].unique().__len__()*fit_count)
os.makedirs('./data/train_augmented', exist_ok=True)
train_augmented = []
for target,group in train_data.groupby('target'):
    count = 0
    while count < fit_count:
        for file_name in group['ID'].values:
            if count >= fit_count:
                continue
            count += 1
            image = cv2.imread('./data/train/'+file_name)
            if np.random.rand() < 0.4:
                random_file_name = np.random.choice(train_data['ID'].values)
                sum_image = cv2.imread('./data/train/'+random_file_name)
                sum_image = cv2.resize(sum_image, (image.shape[1], image.shape[0]))
                sum_image = vertical_transform(image=sum_image)['image']
                image = cv2.addWeighted(image, alpha, sum_image, 1 - alpha, 0)
            pbar.update(1)
            augmented_image = pipeline.augment(np.array(image))['output']
            augmented_image = transform(image=augmented_image)['image']
            train_augmented_file_name = f"{uuid.uuid4()}.jpg"
            Image.fromarray(augmented_image).save(f"./data/train_augmented/{train_augmented_file_name}")
            train_augmented.append({'ID':train_augmented_file_name,'target':target})
pd.DataFrame(train_augmented).to_csv('./data/train_augmented.csv',index=False)
            # print(type(augmented_image))
            # f"{uuid.uuid4()}.jpg"
            # plt.imshow(augmented_image)
            # plt.axis("off")
            # plt.show()
            # raise

pbar.close()








100%|█████████▉| 3399/3400 [23:52<00:00,  3.96it/s]

In [114]:
# augmented
from pandas import DataFrame

alpha = 0.8
fit_count = 200
pbar = tqdm(total=val_data['target'].unique().__len__()*fit_count)
os.makedirs('./data/val_augmented', exist_ok=True)
val_augmented = []
for target,group in val_data.groupby('target'):
    count = 0
    while count < fit_count:
        for file_name in group['ID'].values:
            if count >= fit_count:
                continue
            count += 1
            image = cv2.imread('./data/train/'+file_name)
            if np.random.rand() < 0.4:
                random_file_name = np.random.choice(val_data['ID'].values)
                sum_image = cv2.imread('./data/train/'+random_file_name)
                sum_image = cv2.resize(sum_image, (image.shape[1], image.shape[0]))
                sum_image = vertical_transform(image=sum_image)['image']
                image = cv2.addWeighted(image, alpha, sum_image, 1 - alpha, 0)
            pbar.update(1)
            augmented_image = pipeline.augment(np.array(image))['output']
            augmented_image = transform(image=augmented_image)['image']
            val_augmented_file_name = f"{uuid.uuid4()}.jpg"
            Image.fromarray(augmented_image).save(f"./data/val_augmented/{val_augmented_file_name}")
            val_augmented.append({'ID':val_augmented_file_name,'target':target})
pd.DataFrame(val_augmented).to_csv('./data/val_augmented.csv',index=False)
            # print(type(augmented_image))
            # f"{uuid.uuid4()}.jpg"
            # plt.imshow(augmented_image)
            # plt.axis("off")
            # plt.show()
            # raise

pbar.close()








  0%|          | 1/3400 [00:46<44:16:58, 46.90s/it]
100%|██████████| 3400/3400 [22:57<00:00,  2.47it/s]
