In [None]:
import bitsandbytes as bnb
import transformers

print("bitsandbytes version:", bnb.__version__)
print("transformers version:", transformers.__version__)

bitsandbytes version: 0.47.0
transformers version: 4.55.4


In [None]:
import os, io
import pandas as pd
from PIL import Image
from urllib.request import Request, urlopen
from urllib.parse import urlparse
from urllib.error import URLError, HTTPError

# ===== 설정 =====
inputs_file = "/content/drive/MyDrive/Colab Notebooks/wook/deeplearningchallenge/deep_chal_multitask_dataset.parquet"
URL_TIMEOUT = 20

# ===== 유틸 =====
def is_url(s: str) -> bool:
    try:
        return urlparse(str(s)).scheme in ("http", "https")
    except Exception:
        return False

def is_valid_image_url(u: str, timeout: int = URL_TIMEOUT) -> bool:
    """URL이 존재하고, 실제 이미지로 디코딩 가능한지 점검"""
    try:
        req = Request(u, headers={"User-Agent": "Mozilla/5.0"})
        with urlopen(req, timeout=timeout) as r:
            raw = r.read()
        if len(raw) < 32:
            return False
        # 손상 파일/HTML 응답 걸러내기
        bio = io.BytesIO(raw)
        Image.open(bio).verify()   # 포맷/무결성 점검
        # 재오픈해서 실제 디코딩 가능한지도 확인
        Image.open(io.BytesIO(raw)).convert("RGB")
        return True
    except (HTTPError, URLError, TimeoutError, Image.UnidentifiedImageError):
        return False
    except Exception:
        return False

# ===== 로드 =====
df = pd.read_parquet(inputs_file)
n_total = len(df)

# 컬럼 존재 체크
for col in ("input_type", "input"):
    if col not in df.columns:
        raise ValueError(f"'{col}' column is required, but not found in the dataset.")

# URL 이미지 행만 타깃팅
mask_image = df["input_type"].astype(str).str.lower().eq("image")
mask_url   = df["input"].astype(str).apply(is_url)
target_idx = df[mask_image & mask_url].index

# 점검 & 제거 목록 수집
bad_idx = []
try:
    from tqdm import tqdm
except Exception:
    def tqdm(x, **k): return x

for i in tqdm(target_idx, total=len(target_idx), desc="Checking image URLs"):
    u = str(df.at[i, "input"])
    if not is_valid_image_url(u):
        bad_idx.append(i)

# 제거 및 저장
clean_df = df.drop(index=bad_idx).reset_index(drop=True)

base_dir, base_name = os.path.split(inputs_file)
stem = os.path.splitext(base_name)[0]
out_path = os.path.join(base_dir, f"{stem}_clean.parquet")
clean_df.to_parquet(out_path, index=False)

# 리포트
print("=== URL 이미지 정리 결과 ===")
print(f"총 행         : {n_total}")
print(f"URL 이미지 행 : {len(target_idx)}")
print(f"제거된 행     : {len(bad_idx)}")
print(f"남은 행       : {len(clean_df)}")
print(f"저장 경로     : {out_path}")

In [None]:
import pandas as pd
import json
import os

# 이미 세션에 존재한다고 가정: df (원본 DataFrame), bad_idx (제거 대상 인덱스 리스트)
assert 'df' in globals(), "df가 세션에 없습니다."
assert 'bad_idx' in globals(), "bad_idx가 세션에 없습니다."

# 1) 제거 인덱스 5개 미리보기
print("=== 제거 인덱스 미리보기(5) ===")
preview_idx = bad_idx[:5]
print(preview_idx)

# 2) 해당 인덱스의 URL 5개 확인
print("\n=== 제거된 행의 URL(5) ===")
preview_urls = df.loc[preview_idx, "input"].astype(str).tolist()
for i, u in enumerate(preview_urls, 1):
    print(f"{i}. {u}")

# 3) 전체 제거 인덱스/URL 저장 (원본 parquet와 같은 위치에)
inputs_file = "/content/drive/MyDrive/Colab Notebooks/wook/deeplearningchallenge/deep_chal_multitask_dataset.parquet"
base_dir, base_name = os.path.split(inputs_file)
stem = os.path.splitext(base_name)[0]

idx_json = os.path.join(base_dir, f"{stem}_removed_indices.json")
with open(idx_json, "w") as f:
    json.dump(list(map(int, bad_idx)), f)
print(f"\n[저장] 제거 인덱스 JSON: {idx_json}")

idx_csv = os.path.join(base_dir, f"{stem}_removed_indices.csv")
pd.DataFrame({
    "index": list(map(int, bad_idx)),
    "input": df.loc[bad_idx, "input"].astype(str).values
}).to_csv(idx_csv, index=False)
print(f"[저장] 제거 인덱스+URL CSV: {idx_csv}")


1. 데이터 분할(trian, val)

In [None]:
from qlora_vl_qwen_25 import build_train_valid

OUT_DIR = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning"

# 1) 분할 (라벨 포함 파일이므로 labels_file=None)
train_df, valid_df = build_train_valid(
    out_root=OUT_DIR,
    inputs_file="/content/drive/MyDrive/Colab Notebooks/wook/deeplearningchallenge/deep_chal_multitask_dataset_clean.parquet",
    labels_file=None,
    valid_ratio=0.1
)

2. LLM fine-tuning 준비

In [None]:
import importlib, qlora_vl_qwen_25 as m
import torch

m.IMG_BASE = "/content"  # 상대경로 쓰면 맞춰주세요
OUT_DIR = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning"

m.train_model(
    base_model=m.DEFAULT_BASE_MODEL,
    train_file=f"{OUT_DIR}/datasets/qlora/train.parquet",
    valid_file=f"{OUT_DIR}/datasets/qlora/valid.parquet",
    out_root=OUT_DIR,
    profile="balanced",
    add_task_hint=True,
    lora_r=64, lora_alpha=128, lora_dropout=0.05,
)


In [None]:
from urllib.request import Request, urlopen
from urllib.error import HTTPError, URLError

URL_TIMEOUT = 10
url = "https://pulpcovers.com/wp-content/uploads/2012/01/36591544-6652526511_fe9af8fcd6_o1.jpg"

try:
    req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
    with urlopen(req, timeout=URL_TIMEOUT) as r:
        print("Response status:", r.status)
        content_length = len(r.read())
        print("Downloaded bytes:", content_length)
except HTTPError as e:
    print("HTTPError:", e.code, e.reason)
except URLError as e:
    print("URLError:", e.reason)
except Exception as e:
    print("Other Error:", e)


In [None]:
import pandas as pd
import requests
from PIL import Image
import os
import io
from tqdm.auto import tqdm
from urllib.parse import urlparse
import warnings

# ===============================================================
# ⚠️ 설정: 자신의 환경에 맞게 이 부분을 수정하세요.
# ===============================================================
# 원본 데이터셋 파일 경로
DATASET_PATH = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/valid.parquet"

# 다운로드한 이미지를 저장할 구글 드라이브 폴더 경로
IMAGE_SAVE_DIR = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/images_v"
# ===============================================================

# 저장할 디렉토리 생성
os.makedirs(IMAGE_SAVE_DIR, exist_ok=True)
print(f"이미지를 저장할 경로: {IMAGE_SAVE_DIR}")

df = pd.read_parquet(DATASET_PATH)
image_rows = df[df['input_type'] == 'image'].copy()
print(f"총 {len(image_rows)}개의 이미지를 다운로드합니다.")

downloaded_paths = []

for index, row in tqdm(image_rows.iterrows(), total=len(image_rows), desc="이미지 다운로드 중"):
    url = row['input']
    try:
        # URL에서 파일 이름 추출
        parsed_url = urlparse(url)
        # 고유한 파일명을 위해 인덱스와 원본 파일명을 조합
        filename = f"{index}_{os.path.basename(parsed_url.path)}"
        save_path = os.path.join(IMAGE_SAVE_DIR, filename)

        # 이미 파일이 존재하면 다운로드 건너뛰기
        if os.path.exists(save_path):
            downloaded_paths.append(save_path)
            continue

        response = requests.get(url, timeout=20, headers={"User-Agent": "Mozilla/5.0"})
        response.raise_for_status() # HTTP 에러가 있으면 예외 발생

        # 이미지가 유효한지 확인
        img = Image.open(io.BytesIO(response.content))
        img.verify() # 이미지 데이터 유효성 검사

        # 유효하면 파일로 저장
        with open(save_path, "wb") as f:
            f.write(response.content)
        downloaded_paths.append(save_path)

    except Exception as e:
        warnings.warn(f"다운로드 실패 (인덱스: {index}, URL: {url}): {e}")
        downloaded_paths.append(None) # 실패한 경우 None으로 표시

# 원본 데이터프레임의 'input' 열을 다운로드된 로컬 경로로 업데이트
image_rows['input'] = downloaded_paths

# 이미지가 아닌 데이터와 다시 합치기
non_image_rows = df[df['input_type'] != 'image']
updated_df = pd.concat([non_image_rows, image_rows]).sort_index()

# 다운로드에 실패한 데이터는 제외
updated_df.dropna(subset=['input'], inplace=True)

# 수정된 데이터셋을 새 파일로 저장
UPDATED_DATASET_PATH = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/valid_local.parquet"
updated_df.to_parquet(UPDATED_DATASET_PATH, index=False)

print("\n" + "="*50)
print("✅ 이미지 다운로드 및 데이터셋 업데이트 완료!")
print(f"업데이트된 데이터셋이 다음 경로에 저장되었습니다:\n{UPDATED_DATASET_PATH}")
print("이제 파인튜닝 스크립트의 'train_path'를 이 경로로 변경하여 사용하세요.")
print("="*50)

In [None]:
import os

folder_path = "/content/images"  # 확인할 폴더 경로
count = sum(1 for f in os.listdir(folder_path) if f.lower().endswith(".jpg"))

print("JPG 파일 개수:", count)


In [None]:
# ==========================
# Load data & Run
# ==========================
MYTEST_PATH = os.path.join("/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/valid_local.parquet")
# Load df
df = pd.read_parquet(MYTEST_PATH)
if "input_type" not in df.columns and "input_tpye" in df.columns:
    df = df.rename(columns={"input_tpye": "input_type"})
df = df.reset_index(drop=True)
df.insert(0, "id", df.index.astype(str))

# Quick stats
print(f"Total rows: {len(df):,}")
print("\nBy task:\n", df["task"].str.lower().value_counts(dropna=False))
print("\nBy input_type:\n", df["input_type"].str.lower().value_counts(dropna=False))

In [None]:
# ==========================
# Load data & Run
# ==========================
MYTEST_PATH = os.path.join("/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/valid.parquet")
# Load df
df = pd.read_parquet(MYTEST_PATH)
if "input_type" not in df.columns and "input_tpye" in df.columns:
    df = df.rename(columns={"input_tpye": "input_type"})
df = df.reset_index(drop=True)
df.insert(0, "id", df.index.astype(str))

# Quick stats
print(f"Total rows: {len(df):,}")
print("\nBy task:\n", df["task"].str.lower().value_counts(dropna=False))
print("\nBy input_type:\n", df["input_type"].str.lower().value_counts(dropna=False))

In [None]:
import pandas as pd
import os

# ===============================================================
# ⚠️ 설정: 자신의 환경에 맞게 파일 경로를 확인하세요.
# ===============================================================
# 원본 전체 데이터셋 파일 경로
ORIGINAL_TRAIN_PATH = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/valid.parquet"

# 이미지 다운로더가 생성한, URL 이미지만 처리된 파일 경로
PROCESSED_LOCAL_PATH = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/valid_local.parquet"

# 최종적으로 vqa 데이터가 포함될 파일 경로
FINAL_TRAIN_PATH = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/valid_final_with_vqa.parquet"
# ===============================================================

print("데이터 병합을 시작합니다...")

# 1. 원본 데이터셋에서 'vqa' 태스크 데이터만 불러오기
print(f"'{ORIGINAL_TRAIN_PATH}'에서 'vqa' 데이터를 읽는 중...")
original_df = pd.read_parquet(ORIGINAL_TRAIN_PATH)
vqa_df = original_df[original_df['task'] == 'vqa'].copy()
print(f"-> {len(vqa_df)}개의 'vqa' 행을 찾았습니다.")

# 2. 이미지 다운로더가 처리한 데이터 불러오기
print(f"'{PROCESSED_LOCAL_PATH}'에서 로컬 이미지 경로 데이터를 읽는 중...")
local_df = pd.read_parquet(PROCESSED_LOCAL_PATH)
print(f"-> {len(local_df)}개의 처리된 행을 찾았습니다.")

# 3. 두 데이터프레임 합치기
print("두 데이터셋을 병합하는 중...")
final_df = pd.concat([local_df, vqa_df], ignore_index=True).sort_values(by='task')
print(f"-> 총 {len(final_df)}개의 행으로 병합되었습니다.")

# 4. 최종 결과물 저장
final_df.to_parquet(FINAL_TRAIN_PATH, index=False)

print("\n" + "="*50)
print("✅ 데이터 병합 완료!")
print(f"최종 데이터셋이 다음 경로에 저장되었습니다:\n{FINAL_TRAIN_PATH}")
print("\n이제 파인튜닝 스크립트의 'train_path'를 이 최종 파일 경로로 변경하여 사용하세요.")
print("="*50)

In [None]:
# 병합 후 데이터 통계 확인
print("\n[병합 후 데이터 통계]")
print("By task:\n", final_df["task"].str.lower().value_counts(dropna=False))
print("\nBy input_type:\n", final_df["input_type"].value_counts())

In [None]:
import json
import os
import pandas as pd

# ⚠️ 설정: 본인 환경의 경로로 수정해주세요.
output_dir = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/qlora-out/checkpoint-5018"

# Trainer가 로그를 저장하는 파일 경로
log_history_path = os.path.join(output_dir, "trainer_state.json")

try:
    with open(log_history_path, "r") as f:
        log_history = json.load(f)["log_history"]

    # 보기 쉽게 DataFrame으로 변환
    df_log = pd.DataFrame(log_history)

    print("✅ 학습 로그를 성공적으로 불러왔습니다.")

    # 훈련 손실(loss)과 검증 손실(eval_loss)만 필터링해서 보기
    # dropna()는 해당 값이 없는 행(예: 훈련 로그에는 eval_loss가 없음)을 제거합니다.
    df_train_loss = df_log[['step', 'loss']].dropna()
    df_eval_loss = df_log[['step', 'eval_loss']].dropna()

    print("\n--- 훈련 손실 (Training Loss) ---")
    print(df_train_loss.to_string(index=False))

    print("\n--- 검증 손실 (Validation Loss) ---")
    print(df_eval_loss.to_string(index=False))

except FileNotFoundError:
    print(f"🚨 오류: '{log_history_path}' 파일을 찾을 수 없습니다. 경로를 다시 확인해주세요.")
except (KeyError, IndexError):
    print("아직 로그 기록이 충분히 쌓이지 않았거나 파일에 문제가 있습니다.")

In [None]:
!rm -f /content/qwen25_vl_qlora_finetune.py

In [None]:
import json
import os

# checkpoint 경로
checkpoint_dir = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/qlora-out/checkpoint-4000"  # 또는 사용중인 체크포인트 경로

# trainer_state.json 읽기
state_path = os.path.join(checkpoint_dir, "trainer_state.json")
with open(state_path, 'r') as f:
    state = json.load(f)

# 현재 설정 확인
print("Current settings in trainer_state.json:")
print(f"  eval_steps: {state.get('eval_steps', 'not set')}")
print(f"  save_steps: {state.get('save_steps', 'not set')}")

# 수정
state['eval_steps'] = 100
state['save_steps'] = 100

# 저장
with open(state_path, 'w') as f:
    json.dump(state, f, indent=2)

print("\nUpdated to:")
print(f"  eval_steps: 100")
print(f"  save_steps: 100")

3. image fine-tuning 준비

In [None]:
import os
import pandas as pd
# ==========================
# Load data & Run
# ==========================
IG_PATH = os.path.join("/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/train_final.parquet")
# Load df
df = pd.read_parquet(IG_PATH)
df = df.reset_index(drop=True)
df.insert(0, "id", df.index.astype(str))

# Quick stats
print(f"Total rows: {len(df):,}")
print("\nBy task:\n", df["task"].str.lower().value_counts(dropna=False))
print("\nBy input_type:\n", df["input_type"].str.lower().value_counts(dropna=False))

In [None]:
# captioning, vqa 데이터만 필터링
df_filtered = df[df["task"].str.lower().isin(["captioning", "vqa"])]

print(f"Filtered rows: {len(df_filtered):,}")
print(df_filtered["task"].value_counts())

# 저장 (parquet & json)
out_parquet = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/image_train.parquet"
out_json = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/image_train.json"

df_filtered.to_parquet(out_parquet, index=False)
df_filtered.to_json(out_json, orient="records", lines=True, force_ascii=False)

print(f"Saved parquet -> {out_parquet}")
print(f"Saved json -> {out_json}")

In [None]:
# ==========================
# Load data & Run
# ==========================
IMV_PATH = os.path.join("/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/valid_final.parquet")
# Load df
df = pd.read_parquet(IMV_PATH)
df = df.reset_index(drop=True)
df.insert(0, "id", df.index.astype(str))

# Quick stats
print(f"Total rows: {len(df):,}")
print("\nBy task:\n", df["task"].str.lower().value_counts(dropna=False))
print("\nBy input_type:\n", df["input_type"].str.lower().value_counts(dropna=False))

In [None]:
# captioning, vqa 데이터만 필터링
df_filtered = df[df["task"].str.lower().isin(["captioning", "vqa"])]

print(f"Filtered rows: {len(df_filtered):,}")
print(df_filtered["task"].value_counts())

# 저장 (parquet & json)
out_parquet = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/image_valid.parquet"
out_json = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/image_valid.json"

df_filtered.to_parquet(out_parquet, index=False)
df_filtered.to_json(out_json, orient="records", lines=True, force_ascii=False)

print(f"Saved parquet -> {out_parquet}")
print(f"Saved json -> {out_json}")

4. LLM 학습

In [None]:
!wget -O llm_finetune.py https://raw.githubusercontent.com/ksw0425/deeplearning_challenge/refs/heads/main/llm_finetune.py

In [None]:
from llm_finetune import train

adapter_dir = train(
    base_model="Qwen/Qwen2.5-VL-7B-Instruct",
    train_path="/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/train_final.parquet",
    valid_path="/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/valid_final.parquet",
    out_dir="/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/qlora-out",
    profile="dev",
)

5. projector 학습 (이미지 증강은 적용 안했음)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
import random
import numpy as np

# 증강 확인 함수
def test_augmentation(train_path, num_samples=3):
    """
    Captioning과 VQA 태스크의 증강 전후 비교
    """
    # 데이터 로드
    df = pd.read_parquet(train_path)

    # 태스크별로 샘플 선택
    captioning_samples = df[df['task'].str.contains('caption', case=False)].sample(min(num_samples, len(df)))
    vqa_samples = df[df['task'].str.contains('vqa|question', case=False)].sample(min(num_samples, len(df)))

    # Augmentor 생성
    augmentor = ImageAugmentor()

    # Figure 설정
    fig, axes = plt.subplots(num_samples * 2, 4, figsize=(16, num_samples * 8))
    fig.suptitle('Image Augmentation Test: Captioning vs VQA', fontsize=16)

    row = 0

    # Captioning 샘플 처리
    print("=" * 60)
    print("CAPTIONING SAMPLES (Strong Augmentation)")
    print("=" * 60)

    for idx, (_, sample) in enumerate(captioning_samples.iterrows()):
        print(f"\nCaptioning Sample {idx+1}:")
        print(f"Task: {sample['task']}")
        print(f"Output: {sample['output'][:100]}...")

        # 원본 이미지 로드
        try:
            original_img = load_image_strict(sample['input'])
        except Exception as e:
            print(f"Error loading image: {e}")
            continue

        # 증강 3번 적용
        augmented_imgs = []
        for _ in range(3):
            aug_img = augmentor.augment(original_img.copy(), sample['task'])
            augmented_imgs.append(aug_img)

        # 시각화
        axes[row, 0].imshow(original_img)
        axes[row, 0].set_title(f'Original (Caption {idx+1})', fontsize=10)
        axes[row, 0].axis('off')

        for j, aug_img in enumerate(augmented_imgs):
            axes[row, j+1].imshow(aug_img)
            axes[row, j+1].set_title(f'Augmented {j+1}', fontsize=10)
            axes[row, j+1].axis('off')

        row += 1

    # VQA 샘플 처리
    print("\n" + "=" * 60)
    print("VQA SAMPLES (Minimal Augmentation)")
    print("=" * 60)

    for idx, (_, sample) in enumerate(vqa_samples.iterrows()):
        print(f"\nVQA Sample {idx+1}:")
        print(f"Task: {sample['task']}")
        print(f"Question: {sample.get('question', 'N/A')}")
        print(f"Answer: {sample['output'][:100]}...")

        # 원본 이미지 로드
        try:
            original_img = load_image_strict(sample['input'])
        except Exception as e:
            print(f"Error loading image: {e}")
            continue

        # 증강 3번 적용
        augmented_imgs = []
        for _ in range(3):
            aug_img = augmentor.augment(original_img.copy(), sample['task'])
            augmented_imgs.append(aug_img)

        # 시각화
        axes[row, 0].imshow(original_img)
        axes[row, 0].set_title(f'Original (VQA {idx+1})', fontsize=10)
        axes[row, 0].axis('off')

        for j, aug_img in enumerate(augmented_imgs):
            axes[row, j+1].imshow(aug_img)
            axes[row, j+1].set_title(f'Augmented {j+1}', fontsize=10)
            axes[row, j+1].axis('off')

        row += 1

    plt.tight_layout()
    plt.show()

    return fig

# 증강 차이 정량적 분석
def analyze_augmentation_difference(train_path):
    """
    증강 강도를 정량적으로 분석
    """
    df = pd.read_parquet(train_path)
    augmentor = ImageAugmentor()

    results = {
        'captioning': {'pixel_diff': [], 'color_diff': []},
        'vqa': {'pixel_diff': [], 'color_diff': []}
    }

    # 각 태스크별로 10개 샘플 분석
    for task_type in ['caption', 'vqa']:
        samples = df[df['task'].str.contains(task_type, case=False)].head(10)

        for _, sample in samples.iterrows():
            try:
                # 원본 이미지
                original = load_image_strict(sample['input'])
                original_array = np.array(original)

                # 증강 이미지
                augmented = augmentor.augment(original.copy(), sample['task'])
                augmented_array = np.array(augmented)

                # 픽셀 차이 계산
                pixel_diff = np.mean(np.abs(original_array - augmented_array))

                # 색상 히스토그램 차이
                orig_hist = np.histogram(original_array, bins=256)[0]
                aug_hist = np.histogram(augmented_array, bins=256)[0]
                color_diff = np.sum(np.abs(orig_hist - aug_hist))

                task_key = 'captioning' if 'caption' in task_type else 'vqa'
                results[task_key]['pixel_diff'].append(pixel_diff)
                results[task_key]['color_diff'].append(color_diff)

            except Exception as e:
                print(f"Error processing sample: {e}")
                continue

    # 결과 출력
    print("\n" + "=" * 60)
    print("AUGMENTATION STRENGTH ANALYSIS")
    print("=" * 60)

    for task in results:
        if results[task]['pixel_diff']:
            avg_pixel = np.mean(results[task]['pixel_diff'])
            avg_color = np.mean(results[task]['color_diff'])

            print(f"\n{task.upper()}:")
            print(f"  Average Pixel Difference: {avg_pixel:.2f}")
            print(f"  Average Color Histogram Difference: {avg_color:.2f}")

            # 증강 강도 판정
            if avg_pixel < 5:
                strength = "Very Weak"
            elif avg_pixel < 15:
                strength = "Weak"
            elif avg_pixel < 30:
                strength = "Moderate"
            else:
                strength = "Strong"

            print(f"  Augmentation Strength: {strength}")

    # 시각화
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))

    # Pixel difference 비교
    axes[0].bar(['Captioning', 'VQA'],
                [np.mean(results['captioning']['pixel_diff']),
                 np.mean(results['vqa']['pixel_diff'])])
    axes[0].set_title('Average Pixel Difference')
    axes[0].set_ylabel('Pixel Difference')

    # Color difference 비교
    axes[1].bar(['Captioning', 'VQA'],
                [np.mean(results['captioning']['color_diff']),
                 np.mean(results['vqa']['color_diff'])])
    axes[1].set_title('Average Color Histogram Difference')
    axes[1].set_ylabel('Histogram Difference')

    plt.suptitle('Augmentation Strength Comparison: Captioning vs VQA')
    plt.tight_layout()
    plt.show()

# 단일 이미지 증강 테스트
def test_single_augmentation(image_path, task="captioning"):
    """
    단일 이미지로 증강 테스트
    """
    augmentor = ImageAugmentor()

    # 이미지 로드
    img = Image.open(image_path).convert('RGB')

    # 6번 증강
    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
    fig.suptitle(f'Single Image Augmentation Test - Task: {task}', fontsize=14)

    # 원본
    axes[0, 0].imshow(img)
    axes[0, 0].set_title('Original')
    axes[0, 0].axis('off')

    # 증강 7번
    positions = [(0, 1), (0, 2), (0, 3), (1, 0), (1, 1), (1, 2), (1, 3)]
    for i, (row, col) in enumerate(positions):
        aug_img = augmentor.augment(img.copy(), task)
        axes[row, col].imshow(aug_img)
        axes[row, col].set_title(f'Augmented {i+1}')
        axes[row, col].axis('off')

    plt.tight_layout()
    plt.show()

# 실행 예시
if __name__ == "__main__":
    train_path = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/image_train.parquet"

    # 1. 증강 시각화 테스트
    print("Testing augmentation on samples...")
    fig = test_augmentation(train_path, num_samples=3)

    # 2. 증강 강도 분석
    print("\nAnalyzing augmentation strength...")
    analyze_augmentation_difference(train_path)

    # 3. 저장 (선택사항)
    fig.savefig('/content/drive/MyDrive/Colab Notebooks/wook/augmentation_test.png', dpi=150, bbox_inches='tight')

In [None]:
import os
import shutil

# Step 1: 체크포인트 백업
checkpoint_4000 = "/content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/qlora-out/checkpoint-4000"
backup_path = checkpoint_4000 + "_backup"

if not os.path.exists(backup_path):
    shutil.copytree(checkpoint_4000, backup_path)
    print(f"Backup created: {backup_path}")

# Step 2: optimizer.pt 제거 (충돌 방지)
optimizer_path = os.path.join(checkpoint_4000, "optimizer.pt")
if os.path.exists(optimizer_path):
    os.remove(optimizer_path)
    print("Removed optimizer.pt")

# Step 3: trainer_state.json 수정
import json
state_path = os.path.join(checkpoint_4000, "trainer_state.json")
if os.path.exists(state_path):
    with open(state_path, 'r') as f:
        state = json.load(f)

    # 설정 업데이트
    state['eval_steps'] = 500
    state['save_steps'] = 500
    state['learning_rate'] = 3e-5

    with open(state_path, 'w') as f:
        json.dump(state, f, indent=2)
    print("Updated trainer_state.json")

Backup created: /content/drive/MyDrive/Colab Notebooks/wook/fine-tuning/datasets/qlora/qlora-out/checkpoint-4000_backup
Updated trainer_state.json


In [None]:
!wget -O projector_finetune.py https://raw.githubusercontent.com/ksw0425/deeplearning_challenge/refs/heads/main/projector_finetune.py

--2025-08-25 10:31:14--  https://raw.githubusercontent.com/ksw0425/deeplearning_challenge/refs/heads/main/projector_finetune.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 27026 (26K) [text/plain]
Saving to: ‘projector_finetune.py’


2025-08-25 10:31:14 (10.4 MB/s) - ‘projector_finetune.py’ saved [27026/27026]



In [None]:
!pip install -U "transformers>=4.46" "accelerate>=0.34" "peft>=0.11" bitsandbytes pandas pillow requests scikit-learn

Collecting transformers>=4.46
  Downloading transformers-4.55.4-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting peft>=0.11
  Downloading peft-0.17.1-py3-none-any.whl.metadata (14 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Collecting pandas
  Downloading pandas-2.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Downloading transformers-4.55.4-py3-none-any.whl (11.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 

In [None]:
!wget -O llm_projector_finetune.py https://raw.githubusercontent.com/ksw0425/deeplearning_challenge/refs/heads/main/llm_projector_finetune.py

--2025-08-25 10:55:01--  https://raw.githubusercontent.com/ksw0425/deeplearning_challenge/refs/heads/main/llm_projector_finetune.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26002 (25K) [text/plain]
Saving to: ‘llm_projector_finetune.py’


2025-08-25 10:55:01 (9.32 MB/s) - ‘llm_projector_finetune.py’ saved [26002/26002]

