# TFT Simulator - RL Training on Colab

Google Colab에서 TFT RL 모델을 학습합니다.

**사용 가능한 모델:**
- `CustomPPO`: 커스텀 Masked PPO (추천)
- `DuelingDQN`: Dueling DQN with Noisy Networks + Munchausen RL
- `TransformerPPO`: Transformer 기반 PPO

## 1. 환경 설정

In [None]:
# GPU 확인
!nvidia-smi

In [None]:
# 레포지토리 클론
!git clone https://github.com/kue0806/tft-simulator.git
%cd tft-simulator

In [None]:
# 의존성 설치
!pip install -q torch gymnasium tqdm numpy pandas pydantic

In [None]:
# PyTorch GPU 확인
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 2. 환경 테스트

In [None]:
# TFT 환경 테스트
from src.rl.env.tft_env import TFTEnv
import numpy as np

env = TFTEnv()
obs, info = env.reset()

print(f"Observation shape: {obs.shape}")
print(f"Action space: {env.action_space.n}")
print(f"Valid actions: {np.sum(info['valid_action_mask'])}")

## 3. 모델 학습

In [None]:
# 학습 설정
MODEL_NAME = "CustomPPO"  # CustomPPO, DuelingDQN, TransformerPPO
TIMESTEPS = 500000        # 학습 스텝 수
CHECKPOINT_INTERVAL = 100 # 체크포인트 저장 간격 (에피소드)

In [None]:
# 학습 실행
!python train_single_model.py \
    --model $MODEL_NAME \
    --timesteps $TIMESTEPS \
    --checkpoint-interval $CHECKPOINT_INTERVAL \
    --save-dir models/colab_trained

## 4. 학습된 모델 테스트

In [None]:
# 저장된 모델 확인
import os
model_dir = f"models/colab_trained/{MODEL_NAME}/"
if os.path.exists(model_dir):
    for f in os.listdir(model_dir):
        print(f)
else:
    print("Model directory not found")

In [None]:
# 모델 평가
import glob
from src.rl.env.tft_env import TFTEnv
from src.rl.models.base import ModelConfig

# 모델 클래스 로드
if MODEL_NAME == "CustomPPO":
    from src.rl.models.custom_masked_ppo import CustomMaskedPPO as ModelClass
elif MODEL_NAME == "DuelingDQN":
    from src.rl.models.dueling_dqn import DuelingDQNModel as ModelClass
elif MODEL_NAME == "TransformerPPO":
    from src.rl.models.transformer_ppo import TransformerPPO as ModelClass

# 가장 최근 best 모델 찾기
model_files = glob.glob(f"models/colab_trained/{MODEL_NAME}/*best*")
if model_files:
    latest_model = max(model_files)
    print(f"Loading: {latest_model}")
    
    env = TFTEnv()
    config = ModelConfig()
    model = ModelClass(env, config)
    model.load(latest_model)
    
    # 평가
    results = model.evaluate(n_episodes=20, deterministic=True)
    print(f"\nEvaluation Results:")
    print(f"  Avg Placement: {results['avg_placement']:.2f}")
    print(f"  Top 4 Rate: {results['top4_rate']:.1%}")
    print(f"  Win Rate: {results['win_rate']:.1%}")
else:
    print("No model found!")

## 5. 모델 다운로드

In [None]:
# 학습된 모델 압축
!zip -r trained_models.zip models/colab_trained/

In [None]:
# Google Drive에 저장
from google.colab import drive
drive.mount('/content/drive')
!cp trained_models.zip /content/drive/MyDrive/

In [None]:
# 또는 직접 다운로드
from google.colab import files
files.download('trained_models.zip')