In [None]:
import torch
import pandas as pd
import yaml, sys
from pathlib import Path
from sklearn.model_selection import train_test_split
from transformers import (
    AutoModelForCausalLM, Blip2Model, BlipImageProcessor, 
    AutoTokenizer, BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader, Subset, ConcatDataset

BASE_DIR = Path("..").resolve()

# 주요 디렉토리 경로 정의
SRC_DIR = BASE_DIR / "src"
CONFIG_DIR = BASE_DIR / "configs"
OUTPUT_DIR = BASE_DIR / "outputs"

# src 폴더 import 경로에 추가
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

# From src dir
from model import BLIP2ForPhi, setup_model, select_train_params
from dataset import ImageCaptioningDataset, get_datasets
from trainer import CustomTrainer


In [3]:
import os
os.environ["HF_TOKEN"] = "TOKEN ID"

In [None]:
config_path = CONFIG_DIR / "config.yaml"

with open(config_path, 'r') as f:
        config = yaml.safe_load(f)

model, image_processor, tokenizer = setup_model(config)
num_trainable_params = select_train_params(model, language_model=False)

print(f"training {num_trainable_params} params...")
print(f"Preparing dataset...")

In [None]:
train_dataset1, valid_dataset1, train_debug1, valid_debug1 = get_datasets(config['dataset']['cocoCaption'], config, image_processor, tokenizer)
train_dataset2, valid_dataset2, train_debug2, valid_debug2 = get_datasets(config['dataset']['flickr'], config, image_processor, tokenizer)
train_dataset3, valid_dataset3, train_debug3, valid_debug3 = get_datasets(config['dataset']['llava'], config, image_processor, tokenizer)

concat_train_dataset = ConcatDataset([train_dataset1, train_dataset2])
concat_valid_dataset = ConcatDataset([valid_dataset1, valid_dataset2])

num_samples_to_train, num_samples_to_valid = int(len(concat_train_dataset)*0.1), int(len(concat_valid_dataset)*0.1)

subset_of_train_dataset3 = train_dataset3.shuffle(seed=42).select(range(num_samples_to_train))
subset_of_valid_dataset3 = valid_dataset3.shuffle(seed=42).select(range(num_samples_to_valid))

final_train_dataset = ConcatDataset([concat_train_dataset, subset_of_train_dataset3])
final_valid_dataset = ConcatDataset([concat_valid_dataset, subset_of_valid_dataset3])

print(f"Train Dataset length: {len(final_train_dataset)}, Valid Dataset length: {len(final_valid_dataset)}")

In [None]:
trainable_params = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.AdamW(
    trainable_params, 
    lr=float(config['training']['learning_rate']), 
    weight_decay=config['training']['weight_decay']
)

trainer = CustomTrainer(
        model=model,
        optimizer=optimizer,
        tokenizer=tokenizer,
        train_dataset=final_train_dataset,
        val_dataset=final_valid_dataset,
        dataset_name='ImageCaptioning',
        batch_size=config['training']['batch_size'],
        save_dir=config['path']['save_dir'],
        repo_id=config['hf']['repo_id']
    )

print("Starting training...")
trainer.train(
    num_epochs=config['training']['num_epochs'],
    resume_from_checkpoint=config['path'].get('resume_from_checkpoint'),
)
print("Training finished!")

In [None]:
# ==========================================================
# 1. 모든 학습 코드를 담을 메인 함수 정의
# ==========================================================
def main():
    # --- Imports ---
    import torch
    import pandas as pd
    import yaml, sys
    from pathlib import Path
    from sklearn.model_selection import train_test_split
    from transformers import (
        AutoModelForCausalLM, Blip2Model, BlipImageProcessor, 
        AutoTokenizer, BitsAndBytesConfig
    )
    from peft import LoraConfig, get_peft_model
    from datasets import load_dataset
    from torch.utils.data import Dataset, DataLoader, Subset, ConcatDataset

    # --- Path Setup (중요: 함수 안에서 경로 설정) ---
    BASE_DIR = Path("..").resolve()
    SRC_DIR = BASE_DIR / "src"
    CONFIG_DIR = BASE_DIR / "configs"
    OUTPUT_DIR = BASE_DIR / "outputs"
    
    if str(SRC_DIR) not in sys.path:
        sys.path.append(str(SRC_DIR))

    # --- Custom Module Imports (중요: 함수 안에서 import) ---
    from model import BLIP2ForPhi, setup_model, select_train_params
    from dataset import ImageCaptioningDataset, get_datasets
    from trainer import CustomTrainer

    # --- Config & Token Setup ---
    import os
    os.environ["HF_TOKEN"] = "YOUR_TOKEN_HERE" # 실제 토큰을 여기에 넣거나 환경 변수 사용
    
    config_path = CONFIG_DIR / "config.yaml"
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)

    # --- Model & Data Preparation ---
    model, image_processor, tokenizer = setup_model(config)
    num_trainable_params = select_train_params(model, language_model=False)
    print(f"training {num_trainable_params} params...")
    print(f"Preparing dataset...")

    train_dataset1, valid_dataset1, _, _ = get_datasets(config['dataset']['cocoCaption'], config, image_processor, tokenizer)
    train_dataset2, valid_dataset2, _, _ = get_datasets(config['dataset']['flickr'], config, image_processor, tokenizer)
    train_dataset3, valid_dataset3, _, _ = get_datasets(config['dataset']['llava'], config, image_processor, tokenizer)

    concat_train_dataset = ConcatDataset([train_dataset1, train_dataset2])
    concat_valid_dataset = ConcatDataset([valid_dataset1, valid_dataset2])

    num_samples_to_train = int(len(concat_train_dataset) * 0.1)
    num_samples_to_valid = int(len(concat_valid_dataset) * 0.1)

    subset_of_train_dataset3 = train_dataset3.shuffle(seed=42).select(range(num_samples_to_train))
    subset_of_valid_dataset3 = valid_dataset3.shuffle(seed=42).select(range(num_samples_to_valid))

    final_train_dataset = ConcatDataset([concat_train_dataset, subset_of_train_dataset3])
    final_valid_dataset = ConcatDataset([concat_valid_dataset, subset_of_valid_dataset3])
    
    print(f"Train Dataset length: {len(final_train_dataset)}, Valid Dataset length: {len(final_valid_dataset)}")

    # --- Optimizer & Trainer Setup ---
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.AdamW(
        trainable_params, 
        lr=float(config['training']['learning_rate']), 
        weight_decay=config['training']['weight_decay']
    )

    trainer = CustomTrainer(
        model=model,
        optimizer=optimizer,
        tokenizer=tokenizer,
        train_dataset=final_train_dataset,
        val_dataset=final_valid_dataset,
        dataset_name='ImageCaptioning',
        batch_size=config['training']['batch_size'],
        save_dir=config['path']['save_dir'],
        repo_id=config['hf']['repo_id']
    )

    # --- Start Training ---
    print("Starting training...")
    trainer.train(
        num_epochs=config['training']['num_epochs'],
        resume_from_checkpoint=config['path'].get('resume_from_checkpoint'),
    )
    print("Training finished!")


# ==========================================================
# 2. notebook_launcher로 위에서 정의한 main 함수 실행
# ==========================================================
from accelerate import notebook_launcher

# 2개의 GPU에서 main 함수를 실행합니다.
notebook_launcher(main, num_processes=2)