In [None]:
import os
import sys
import torch

import pandas as pd
import numpy as np
import torch.nn as nn
import wandb

from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, ConcatDataset, random_split
from PIL import Image
from tqdm import tqdm

import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from dotenv import load_dotenv, dotenv_values

# 하이드라와 주피터 노트북은 아규먼트 관련 충돌이 발생하므로 초기화 해줌
sys.argv = ['']
# 환경변수 읽기

load_dotenv()
if (python_path := dotenv_values().get('PYTHONPATH')) and python_path not in sys.path: sys.path.append(python_path)

from src.dataset.CvImageDatasetFast import get_datasets
#from src.dataset.CvImageDataset import get_datasets
from src.models.CustomModel import CustomModel
from src.utils import config, utils

# 시드 고정
def random_seed(seed_num=42):

    """ SEED = seed_num
    os.environ['PYTHONHASHSEED'] = str(SEED)
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.benchmark = True """
    
    # seed_everything 은 위의 내용 제어 + 밑에내용
    pl.seed_everything(seed_num)
    #torch.backends.cudnn.deterministic = True
    #torch.backends.cudnn.benchmark = False

# 데이터 준비 함수
def prepare_data(batch_size=32, num_workers=4):
    
   # 데이터셋 생성
    train_dataset, val_dataset, test_dataset = get_datasets()

    # DataLoader 정의
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=False
    )

    val_loader = DataLoader(
        val_dataset,  # 별도의 검증 데이터셋
        batch_size=batch_size,
        shuffle=False,  # 검증 시에는 셔플하지 않음
        num_workers=num_workers,
        pin_memory=True,
        drop_last=False
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=0,
        pin_memory=True,
        drop_last=False
    )
    
    return train_loader, val_loader, test_loader

def test(trainer, model, test_loader):

    # 테스트
    trainer.test(model, test_loader)

    print("테스트 갯수=",len(model.test_predictions))
    
    if len(model.test_predictions) > 0:
        # 모든 예측값과 실제값 합치기
        all_preds = model.test_predictions
        
        pred_df = pd.DataFrame(test_loader.dataset.df, columns=['ID', 'target'])
        pred_df['target'] = all_preds

        sample_submission_df = pd.read_csv(config.CV_CLS_TEST_CSV)
        assert (sample_submission_df['ID'] == pred_df['ID']).all()
        pred_df.to_csv(config.OUTPUTS_DIR + "/pred.csv", index=False)

    else:
        print("테스트 결과를 가져올 수 없습니다.")

def main():

    # model config
    model_name = 'efficientnet_b4' # 'resnet50' 'efficientnet_b4', ...

    # training config
    EPOCHS = 2
    BATCH_SIZE = 16
    num_workers = 0
    num_classes = 17
    learning_rate = 1e-3
    drop_out = 0.4
    do_test = True

    # 모델 초기화 전에 설정
    torch.set_float32_matmul_precision('medium')
    
    # WandB Logger 초기화
    wandb_logger = WandbLogger(
        project="cv-classification",                                                            # 프로젝트 이름
        name=utils.generate_experiment_name(model_name, learning_rate, BATCH_SIZE),             # 실험 이름 (선택사항)
        job_type="train"                        # 작업 타입 (선택사항)
    )
   
    random_seed(42)

    # 데이터 로더 준비
    train_loader, val_loader, test_loader = prepare_data(batch_size=BATCH_SIZE, num_workers=num_workers)
    
    model = CustomModel(
        model_name= model_name,
        num_classes=num_classes,
        learning_rate=learning_rate,
        drop_rate = drop_out
    )

     # 콜백을 직접 생성
    callbacks = [
        EarlyStopping(
            monitor='val_loss',
            patience=5,
            mode='min',
            min_delta=0.001,
            verbose=True
        ),
        LearningRateMonitor(
            logging_interval='epoch',
            log_momentum=False
        )
    ]

    trainer = Trainer(default_root_dir=config.OUTPUTS_DIR, max_epochs=EPOCHS, accelerator='auto', callbacks=callbacks, logger=wandb_logger)
    
    # 훈련
    trainer.fit(model, train_loader, val_loader)
    #trainer.fit(model, train_loader, val_loader, ckpt_path=config.OUTPUTS_DIR + "/lightning_logs/version_0/checkpoints/epoch=9-step=790.ckpt")
    #trainer.fit(model, train_loader, val_loader, ckpt_path=config.OUTPUTS_DIR + "/lightning_logs/version_1/checkpoints/epoch=11-step=948.ckpt")
    #trainer.fit(model, train_loader, val_loader, ckpt_path=config.OUTPUTS_DIR + "/lightning_logs/version_17/checkpoints/epoch=6-step=280.ckpt")
    
    # 테스트
    if(do_test == True): 
        test(trainer, model, test_loader)

    # WandB 종료
    wandb.finish()
if __name__ == "__main__":
    main()

In [None]:
import os
import sys
import torch

import pandas as pd
import numpy as np
import torch.nn as nn
import wandb

from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, ConcatDataset, random_split
from PIL import Image
from tqdm import tqdm
import timm

import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from dotenv import load_dotenv, dotenv_values

# 하이드라와 주피터 노트북은 아규먼트 관련 충돌이 발생하므로 초기화 해줌
sys.argv = ['']
# 환경변수 읽기

load_dotenv()
if (python_path := dotenv_values().get('PYTHONPATH')) and python_path not in sys.path: sys.path.append(python_path)

from src.dataset.CvImageDatasetFast import get_datasets
#from src.dataset.CvImageDataset import get_datasets
from src.models.CustomModel import CustomModel
from src.utils import config, utils


    # model config
model_name = 'tf_efficientnet_b4' # 'resnet50' 'efficientnet_b4', ...

# training config
EPOCHS = 2
BATCH_SIZE = 16
num_workers = 0
num_classes = 17
learning_rate = 1e-3
drop_out = 0.4
do_test = True


model = CustomModel(model_name=model_name, num_classes=num_classes, learning_rate=learning_rate, drop_rate=drop_out)

# 기본 사용법
config = timm.data.resolve_data_config({}, model=model)

print(config)

In [None]:
import timm

# 모델의 기본 설정 확인
model = timm.create_model('tf_efficientnet_b4', pretrained=True)
print(model)


In [None]:
from augraphy import *
import cv2
import matplotlib.pyplot as plt

# 이미지 로드
image = cv2.imread("/data/ephemeral/home/python_work/git/gx-train/data/row/train/0a4adccbb7fe73e0.jpg")
# 25 ~ 90 노이즈 추가: 전반적으로 균일한 회색 노이즈가 덧입혀져 있습니다.
# SubtleNoise 인스턴스 생성
subtle_noise = SubtleNoise(
    subtle_range=90,  # 노이즈 강도 설정
    p=1.0            # 100% 확률로 적용
)

# 노이즈 적용
noisy_image = subtle_noise(image)


plt.figure(figsize=(30, 10))
    
plt.imshow(noisy_image)
plt.axis('off')
plt.tight_layout()
plt.show()



In [None]:
from augraphy import *
import cv2

paper_phase = []
    
for i in range(7):  # 7개의 서로 다른 워터마크
    watermark = WaterMark(
        watermark_word="random",
        watermark_font_size=(1, 2),  
        watermark_font_thickness=(1, 2),
        watermark_rotation=(i*30, i*30 + 60),  # 각기 다른 회전
        watermark_location="random",
        watermark_color=(200, 200, 200),
        watermark_method="darken",
        p=1.0
    )
    paper_phase.append(watermark)

# 완전한 파이프라인
pipeline = AugraphyPipeline(
    ink_phase=[
        SubtleNoise(subtle_range=5, p=0.5)
    ],
    paper_phase=paper_phase,
    post_phase=[
        Brightness(brightness_range=(0.95, 1.05), p=0.3)
    ]
)

image = cv2.imread("/data/ephemeral/home/python_work/git/gx-train/data/row/train/0a4adccbb7fe73e0.jpg")
# 이미지 처리
result = pipeline.augment(image)


plt.figure(figsize=(20, 7))
    
plt.imshow(result['output'])
plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
from augraphy import *
import cv2
import matplotlib.pyplot as plt

# 이미지 로드
image = cv2.imread("/data/ephemeral/home/python_work/git/gx-train/data/row/train/f2079413e6c49f35.jpg")
# 팩스 효과로 모노크롬 처리
cp =ColorPaper(
            hue_range=(0, 360),
            saturation_range=(0, 5),  # 매우 낮은 채도
            p=0.7
        )

monochrome_image = cp(image)


plt.figure(figsize=(30, 10))
    
plt.imshow(monochrome_image)
plt.axis('off')
plt.tight_layout()
plt.show()



In [None]:
from augraphy import *
import cv2
import matplotlib.pyplot as plt

# 올바른 회전 및 모서리 잘림 효과 파이프라인
pipeline = AugraphyPipeline(
    ink_phase=[
   
    ],
    paper_phase=[

    ],
    post_phase=[
       Geometric(
            rotate_range=(1, 300),
            scale=(1, 1.2),    # 올바른 매개변수명  
            translation=(100, -100),
            fliplr=0.5,
            flipud=0.5,
            #crop=(),
            p=1.0
        ),
        Brightness(brightness_range=(0.95, 1.05), p=0.3)
    ]
)

# 이미지 처리
image = cv2.imread("/data/ephemeral/home/python_work/git/gx-train/data/row/train/0a4adccbb7fe73e0.jpg")
result = pipeline.augment(image)

# 결과 표시
plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.title("Original")
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(cv2.cvtColor(result["output"], cv2.COLOR_BGR2RGB))
plt.title("Rotated with Cropped Edges")
plt.axis('off')
plt.show()

In [8]:
import pandas as pd

df = pd.read_csv("/data/ephemeral/home/python_work/git/gx-train/data/row/augment.csv")

len(df)

df.value_counts('target').sort_index()

target
0     631
1     293
2     635
3     640
4     628
5     644
6     631
7     636
8     636
9     639
10    637
11    635
12    642
13    471
14    323
15    639
16    640
Name: count, dtype: int64

In [1]:
import pandas as pd

df = pd.read_csv("/data/ephemeral/home/python_work/git/gx-train/data/row/train.csv")

len(df)

df.value_counts('target')

target
0     100
2     100
3     100
4     100
5     100
7     100
6     100
8     100
9     100
16    100
10    100
11    100
12    100
15    100
13     74
14     50
1      46
Name: count, dtype: int64

In [None]:
import pandas as pd



# df2의 id 필드에 인덱스 번호 추가
df = pd.read_csv("/data/ephemeral/home/python_work/git/gx-train/data/row/train.csv")


# concat 실행
result = pd.concat([df, df], ignore_index=True)

len(result)

6280