# 01. Setup and Configuration

## 개요
라이브러리 import, 설정값 정의, 메모리 관리 함수 등 기본 설정을 담당하는 노트북입니다.

## 주요 구성
- 필수 라이브러리 import
- CFG 설정값 정의  
- Device 설정 및 Seed 고정
- 메모리 관리 유틸리티 함수들


## Library Imports


In [None]:
import pandas as pd
import numpy as np
import os
import random
import gc
from datetime import datetime
import glob

from sklearn.model_selection import train_test_split
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split

import pyarrow as pa
import pyarrow.parquet as pq


## Configuration Settings


In [None]:
CFG = {
    'BATCH_SIZE': 4096,
    'EPOCHS_PER_SPLIT': 3,  # 각 split 데이터당 에포크 수
    'LEARNING_RATE': 1e-3,
    'SEED': 42,
    'DOWNSAMPLE_RATIO': 2,  # clicked=0 데이터를 clicked=1의 몇 배로 샘플링할지
    'SPLIT_DATA_PATH': '../../data/processed/split_data/',
    'MODELS_PATH': '../../models/',
    'MODEL_NAME': 'ctr_lstm_mlp_model',
    # Enhanced Gradient Descent Parameters
    'WEIGHT_DECAY': 1e-5,
    'GRADIENT_CLIP_NORM': 1.0,
    'CATASTROPHIC_THRESHOLD': 1.2,
    'LR_REDUCTION_FACTOR': 0.7,
    'SAVE_CHECKPOINT_EVERY': 3
}

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# models 폴더가 없으면 생성
os.makedirs(CFG['MODELS_PATH'], exist_ok=True)
print(f"Model directory: {CFG['MODELS_PATH']}")

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name()}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")


## Seed Setting


In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['SEED'])  # Seed 고정
print(f"Seed set to: {CFG['SEED']}")


## Memory Management Utilities


In [None]:
def clear_memory():
    """메모리 정리를 위한 함수"""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

def get_memory_usage():
    """현재 GPU 메모리 사용량 확인"""
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1024**2  # MB
        cached = torch.cuda.memory_reserved() / 1024**2  # MB
        return f"GPU Memory - Allocated: {allocated:.1f}MB, Cached: {cached:.1f}MB"
    return "CPU mode - No GPU memory tracking"

# 초기 메모리 상태 확인
print(f"Initial memory usage: {get_memory_usage()}")
print("\nSetup completed successfully!")
