## Gdrive mount and Data_DIR configure

In [11]:
from google.colab import drive
drive.mount('/content/drive')

DATA_DIR='/content/drive/MyDrive/[2025]k-league/open_track1'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Gpytorch 설치.


> https://docs.gpytorch.ai/en/latest/index.html



In [12]:
!pip install gpytorch



# Configuration
- Import Packages
- Some Constant

In [13]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.model_selection import GroupKFold, train_test_split
from sklearn.cluster import KMeans
import os
import warnings
from typing import Dict, Any

# 경고 무시
warnings.filterwarnings('ignore')

# [설정]
DATA_DIR = '/content/drive/MyDrive/[2025]k-league/open_track1'
VAL_RATIO = 0.10 # 검증에 사용할 데이터 비율 (10%)
NUM_LAG = 1
BATCH_SIZE = 512
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device: {device}")

Using device: cuda


# Load Data
- from train.csv and match_info.csv
- some function for augment data
- some function for processing
  - role (estimated FW, MF, DF, GK),
  - some features from match_info
  

In [14]:
print("1. 데이터 로드 및 전처리 시작...")

# ====================================================
# 1-1. 데이터 로드 및 Augmentation (동일)
# ====================================================
train_df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))
match_info = pd.read_csv(os.path.join(DATA_DIR, 'match_info.csv'))

def augment_data(df):
    aug_df = df.copy()
    aug_df['start_y'] = 68.0 - aug_df['start_y']
    aug_df['end_y'] = 68.0 - aug_df['end_y']
    aug_df['game_episode'] = aug_df['game_episode'].astype(str) + '_aug'
    return pd.concat([df, aug_df], ignore_index=True)

train_aug_df = augment_data(train_df)

# Context Feature Generation (동일)
event_types = train_aug_df['type_name'].unique().tolist()
type_map = {t: i for i, t in enumerate(event_types)}

def process_match_info(matches):
    df = matches.copy()
    df['match_date_kst'] = pd.to_datetime(df['game_date']) + pd.Timedelta(hours=9)
    df['match_hour'] = df['match_date_kst'].dt.hour
    df['is_weekend'] = (df['match_date_kst'].dt.weekday >= 5).astype(int)

    home = df[['match_date_kst', 'home_team_id']].rename(columns={'home_team_id': 'team_id'})
    away = df[['match_date_kst', 'away_team_id']].rename(columns={'away_team_id': 'team_id'})
    full_schedule = pd.concat([home, away])
    full_schedule['date_only'] = full_schedule['match_date_kst'].dt.normalize()
    full_schedule = full_schedule.drop_duplicates().sort_values(['team_id', 'date_only'])
    full_schedule['prev_date'] = full_schedule.groupby('team_id')['date_only'].shift(1)
    full_schedule['rest_days'] = (full_schedule['date_only'] - full_schedule['prev_date']).dt.days
    full_schedule['rest_days'] = full_schedule['rest_days'].fillna(7).clip(0, 14)

    rest_map = dict(zip(zip(full_schedule['team_id'], full_schedule['date_only'].dt.date.astype(str)), full_schedule['rest_days']))
    df['date_str'] = df['match_date_kst'].dt.date.astype(str)

    df['home_rest'] = df.apply(lambda x: rest_map.get((x['home_team_id'], x['date_str']), 7), axis=1)
    df['away_rest'] = df.apply(lambda x: rest_map.get((x['away_team_id'], x['date_str']), 7), axis=1)
    df['rest_diff'] = df['home_rest'] - df['away_rest']

    df['time_slot'] = pd.cut(df['match_hour'], bins=[-1, 17, 24], labels=[0, 1]).astype(int)

    return df

match_info_processed = process_match_info(match_info)
match_context = match_info_processed[['game_id', 'match_hour', 'time_slot', 'is_weekend', 'home_rest', 'away_rest', 'rest_diff']]

# Player Role Mapping (KMeans)
orig_train = train_aug_df[~train_aug_df['game_episode'].str.contains('_aug')].copy()
def create_player_roles(df):
    stats = df.groupby('player_id').agg({'start_x': 'mean', 'type_name': lambda x: list(x)}).reset_index()
    stats['is_gk'] = stats['type_name'].apply(lambda x: any(t in ['Catch','Parry','Goal Keeping'] for t in x))
    stats['role'] = 0
    field = stats[~stats['is_gk']].copy()
    if len(field) > 0:
        km = KMeans(n_clusters=3, random_state=42, n_init=10).fit(field[['start_x']])
        idx = np.argsort(km.cluster_centers_.flatten())
        cmap = {old: new + 1 for new, old in enumerate(idx)}
        field['role'] = pd.Series(km.labels_, index=field.index).map(cmap)
        stats.loc[field.index, 'role'] = field['role']
    return dict(zip(stats['player_id'], stats['role']))

role_map = create_player_roles(orig_train)
home_gm = match_info.groupby('home_team_id')['home_score'].mean().to_dict()
away_gm = match_info.groupby('away_team_id')['away_score'].mean().to_dict()
orig_train['step_dx'] = orig_train['end_x'] - orig_train['start_x']
orig_train['step_dy'] = orig_train['end_y'] - orig_train['start_y']
orig_train['step_dist'] = np.sqrt(orig_train['step_dx']**2 + orig_train['step_dy']**2)
player_stats_map = orig_train.groupby('player_id').agg({'step_dist': 'mean', 'step_dx': 'mean'}).to_dict('index')


1. 데이터 로드 및 전처리 시작...


# Load Data section 2
- create features for training
- feature created from author. just selected features that would fit the soccer strategy.

In [15]:
# 2. PyTorch Feature Engineering
def create_features(df):
    df = df.merge(match_context, on='game_id', how='left')
    df = df.sort_values(['game_episode', 'action_id'])

    df['player_id_num'] = df['player_id'].astype(str).str.extract(r'(\d+)').astype(float)
    df['game_id_str'] = df['game_id'].astype(str)

    if 'is_home' not in df.columns: df['is_home'] = 0
    df['is_home'] = df['is_home'].astype(int)

    df['current_team_rest'] = np.where(df['is_home'] == 1, df['home_rest'], df['away_rest'])
    df['opp_team_rest'] = np.where(df['is_home'] == 1, df['away_rest'], df['home_rest'])

    df['type_id'] = df['type_name'].map(type_map)
    df['role'] = df['player_id'].map(role_map).fillna(0).astype(int)

    df['team_id_num'] = df['game_id_str'].str.split('_').str[0].astype(int)
    df['team_home_avg'] = df['team_id_num'].map(home_gm)
    df['team_away_avg'] = df['team_id_num'].map(away_gm)

    sp_keys = ['Corner', 'Freekick', 'Penalty', 'Kick Off']
    df['is_set_piece'] = df['type_name'].astype(str).apply(lambda x: 1 if any(k in x for k in sp_keys) else 0)

    df['zone_x'] = np.clip((df['start_x'] / 105.0 * 6).astype(int), 0, 5)
    df['zone_y'] = np.clip((df['start_y'] / 68.0 * 3).astype(int), 0, 2)
    df['tactical_zone'] = df['zone_y'] * 6 + df['zone_x']

    df['is_final_third'] = (df['start_x'] > 70).astype(int)
    df['min_dist_to_touchline'] = np.minimum(df['start_y'], 68 - df['start_y'])
    df['is_near_touchline'] = (df['min_dist_to_touchline'] < 5).astype(int)

    df['player_avg_dist'] = df['player_id'].map(lambda x: player_stats_map.get(x, {}).get('step_dist', np.nan))
    df['player_avg_dx'] = df['player_id'].map(lambda x: player_stats_map.get(x, {}).get('step_dx', np.nan))

    # Current Action Dynamics (유출 방지용)
    df['current_dx'] = df['end_x'] - df['start_x']
    df['current_dy'] = df['end_y'] - df['start_y']
    df['current_dist'] = np.sqrt(df['current_dx']**2 + df['current_dy']**2)
    df['current_angle'] = np.arctan2(df['current_dy'], df['current_dx'])

    # Goal Metrics
    df['dist_to_goal'] = np.sqrt((105 - df['start_x'])**2 + (34 - df['start_y'])**2)
    df['angle_to_goal'] = np.arctan2(34 - df['start_y'], 105 - df['start_x'])

    Y_NEAR = 30.34
    Y_FAR = 37.66
    X_GOAL = 105
    angle_near = np.arctan2(Y_NEAR - df['start_y'], X_GOAL - df['start_x'])
    angle_far = np.arctan2(Y_FAR - df['start_y'], X_GOAL - df['start_x'])
    df['goal_open_angle'] = np.abs(angle_far - angle_near)

    # Match State
    df['goal_event'] = np.where((df['type_name'] == 'Goal'), np.where(df['is_home'] == 1, 1, -1), 0)
    df['cumulative_score_diff'] = df.groupby('game_episode')['goal_event'].cumsum().shift(1).fillna(0)
    df['is_draw'] = (df['cumulative_score_diff'] == 0).astype(int)
    df['next_action_is_shot'] = ((df['type_name'].shift(-1) == 'Shoot') | (df['type_name'].shift(-1) == 'Goal')).astype(int)
    is_new_episode = df['game_episode'] != df['game_episode'].shift(1)
    df.loc[is_new_episode, 'next_action_is_shot'] = 0
    df['is_draw_next_shot'] = (df['is_draw'] * df['next_action_is_shot']).astype(int)

    grp = df.groupby('game_episode')

    # Time/Order Metrics
    df['time_delta'] = grp['time_seconds'].diff().fillna(0)
    df['episode_time'] = df['time_seconds'] - grp['time_seconds'].transform('min')
    df['event_order'] = grp.cumcount()
    df['match_phase'] = (df['time_seconds'] // 900).astype(int)

    # Lag 1 Features
    cols_to_shift = ['start_x', 'start_y', 'time_seconds', 'role', 'type_id', 'is_home', 'is_set_piece', 'tactical_zone',
                     'current_dx', 'current_dy', 'current_dist', 'current_angle', 'player_id_num']

    for i in range(1, NUM_LAG + 1):
        s = grp[cols_to_shift].shift(i)
        s.columns = [f'lag_{i}_{c}' for c in cols_to_shift]
        df = pd.concat([df, s], axis=1)

        df[f'lag_{i}_dx'] = df[f'lag_{i}_current_dx']
        df[f'lag_{i}_dy'] = df[f'lag_{i}_current_dy']
        df[f'lag_{i}_dist'] = df[f'lag_{i}_current_dist']
        df[f'lag_{i}_angle'] = df[f'lag_{i}_current_angle']

        df['angle_to_goal_diff'] = np.abs(df['angle_to_goal'] - df[f'lag_{i}_angle'])
        df.loc[df['angle_to_goal_diff'] > np.pi, 'angle_to_goal_diff'] = 2 * np.pi - df['angle_to_goal_diff']

        df[f'is_same_player_{i}'] = (df['player_id_num'] == df[f'lag_{i}_player_id_num']).astype(int)
        df[f'is_same_role_{i}'] = (df['role'] == df[f'lag_{i}_role']).astype(int)
        df[f'type_transition_{i}'] = df['type_id'] * 100 + df[f'lag_{i}_type_id'].fillna(-100)

    final_df = df.groupby('game_episode').tail(1).reset_index(drop=True)

    final_df['target_dx'] = final_df['end_x'] - final_df['start_x']
    final_df['target_dy'] = final_df['end_y'] - final_df['start_y']

    return final_df

print("2. 피처 엔지니어링 수행 중...")
train_data = create_features(train_aug_df)

2. 피처 엔지니어링 수행 중...


In [16]:
# 3. 최종 피처 및 Null 처리

feature_cols = [
    'start_x', 'start_y', 'dist_to_goal', 'angle_to_goal', 'goal_open_angle', 'is_near_touchline',
    'is_home', 'type_id', 'role', 'team_home_avg', 'team_away_avg', 'is_set_piece',
    'tactical_zone', 'is_final_third', 'min_dist_to_touchline',
    'current_team_rest', 'opp_team_rest', 'rest_diff', 'match_hour', 'time_slot', 'is_weekend',
    'time_delta', 'episode_time', 'event_order', 'match_phase',
    'cumulative_score_diff', 'is_draw', 'next_action_is_shot', 'is_draw_next_shot',
    'player_avg_dist', 'player_avg_dx',
]

i = 1
feature_cols.extend([f'lag_{i}_{c}' for c in ['start_x', 'start_y', 'role', 'type_id', 'is_set_piece', 'is_home', 'tactical_zone']])
feature_cols.extend([f'lag_{i}_{c}' for c in ['dx', 'dy', 'dist', 'angle']])
feature_cols.extend([f'is_same_player_{i}', f'is_same_role_{i}', f'type_transition_{i}', f'lag_{i}_time_seconds', 'angle_to_goal_diff'])

# 중복 제거 및 Null 처리
feature_cols = sorted(list(set(feature_cols)))

# Null 처리 (PyTorch는 NaN을 허용하지 않음)
for c in feature_cols:
    train_data[c] = train_data[c].fillna(train_data[c].median())

META_COLS = ['start_x', 'start_y', 'end_x', 'end_y', 'game_episode', 'game_id', 'target_dx', 'target_dy']
data_for_split = train_data[feature_cols + META_COLS].copy()

print(f"   최종 사용 피처 수: {len(feature_cols)}")
print(f"   전체 데이터 건수: {len(data_for_split)}")


   최종 사용 피처 수: 47
   전체 데이터 건수: 30870


# Dataset Class

In [17]:
# 4. PyTorch Dataset 및 DataLoader 정의 (수정본)

class KLeagueDataset(Dataset):
    """K-리그 DGP 학습을 위한 PyTorch Dataset"""
    def __init__(self, data_df, feature_cols, meta_cols, target_mode='delta'):
        # Input Features (X)
        self.X = torch.tensor(data_df[feature_cols].values, dtype=torch.float32).to(device)

        # Target 선택 로직 추가
        if target_mode == 'delta':
            # 상대적 이동 거리 예측 (기존 방식)
            y_cols = ['target_dx', 'target_dy']
        elif target_mode == 'absolute':
            # 최종 목적지 좌표 직접 예측
            y_cols = ['end_x', 'end_y']
        else:
            raise ValueError("target_mode must be either 'delta' or 'absolute'")

        self.Y = torch.tensor(data_df[y_cols].values, dtype=torch.float32).to(device)

        # Meta Data (ADE 복원 및 분석용)
        self.meta_df = data_df[meta_cols].reset_index(drop=True)
        self.meta_data = {col: self.meta_df[col].values for col in meta_cols}
        self.target_mode = target_mode # 모드 저장

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx], idx

# Create DataLoader
- Train-Valid split (9:1)
- Normalization for X feature (necessity is now known)

In [18]:
# 5. Train-Validation (90:10) 분할 및 DataLoader 생성
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

print("3. Train-Validation (90:10) 분할 및 DataLoader 생성...")

# 모드 설정
TARGET_MODE = 'delta'  # 'delta' 또는 'absolute' 중 선택.

# 1. Game ID 기반 분할 로직 (기존과 동일)
groups_unique = data_for_split['game_id'].unique()
train_game_ids, val_game_ids = train_test_split(groups_unique, test_size=VAL_RATIO, random_state=42)

train_df_final = data_for_split[data_for_split['game_id'].isin(train_game_ids)].copy().reset_index(drop=True)
val_df_final = data_for_split[data_for_split['game_id'].isin(val_game_ids)].copy().reset_index(drop=True)

# 2. X 피처 정규화
scaler = StandardScaler()
train_df_final[feature_cols] = scaler.fit_transform(train_df_final[feature_cols])
val_df_final[feature_cols] = scaler.transform(val_df_final[feature_cols])

# 3. Dataset 생성 시 target_mode 전달
train_dataset = KLeagueDataset(train_df_final, feature_cols, META_COLS, target_mode=TARGET_MODE)
val_dataset = KLeagueDataset(val_df_final, feature_cols, META_COLS, target_mode=TARGET_MODE)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

data_loaders = {
    'train': train_loader,
    'val': val_loader,
    'val_dataset': val_dataset,
    'val_meta': val_dataset.meta_data,
}

print(f"  [Mode: {TARGET_MODE}]")
print(f"  Training Set Size: {len(train_dataset)} actions")
print(f"  Validation Set Size: {len(val_dataset)} actions")

3. Train-Validation (90:10) 분할 및 DataLoader 생성...
  [Mode: delta]
  Training Set Size: 27738 actions
  Validation Set Size: 3132 actions


## X/Y multivariate Prediction with Deep Gaussian Process.
- Commented for performance reasons.

In [19]:
# import torch
# import gpytorch
# from gpytorch.models.deep_gps import DeepGPLayer, DeepGP
# from gpytorch.means import ConstantMean, LinearMean
# from gpytorch.kernels import RBFKernel, ScaleKernel, MaternKernel
# from gpytorch.variational import (
#     VariationalStrategy,
#     CholeskyVariationalDistribution,
# )
# from gpytorch.likelihoods import MultitaskGaussianLikelihood
# from gpytorch.mlls import DeepApproximateMLL
# import math

# # Set up training data (multivariate input, two-dimensional output)
# # train_x: [n x input_dims], train_y: [n x output_dims]
# N = 100
# INPUT_DIMS = 49
# OUTPUT_DIMS = 2

# SCALE_Y = 1.5

# # Define a hidden layer for the deep GP
# class ToyDeepGPHiddenLayer(DeepGPLayer):
#     def __init__(self, input_dims, output_dims, num_inducing=128):
#         # Each layer uses a variational strategy
#         if input_dims == 49:
#             # 데이터 로더에서 샘플을 가져와 초기화 (매우 좋은 접근입니다)
#             train_batch_x, _, _ = next(iter(train_loader))
#             train_batch_x = torch.nan_to_num(train_batch_x, nan=0.0)
#             inducing_points = train_batch_x[:num_inducing, :].clone()
#         else:
#             # 중간 레이어는 이전 레이어의 출력을 받으므로
#             # 해당 차원(input_dims)에 맞는 랜덤 포인트를 생성합니다.
#             inducing_points = torch.randn(num_inducing, input_dims)


#         batch_shape = torch.Size([output_dims]) if output_dims is not None else torch.Size([])
#         variational_distribution = CholeskyVariationalDistribution(
#             num_inducing_points=num_inducing,
#             batch_shape=batch_shape
#         )
#         variational_strategy = VariationalStrategy(
#             self,
#             inducing_points,
#             variational_distribution,
#             learn_inducing_locations=True
#         )
#         super().__init__(variational_strategy, input_dims, output_dims)

#         #self.mean_module = ConstantMean()
#         self.mean_module = LinearMean(input_size=input_dims)
#         # self.covar_module = ScaleKernel(
#         #     RBFKernel(ard_num_dims=input_dims, batch_shape=batch_shape),
#         #     batch_shape=batch_shape
#         # )
#         self.covar_module = ScaleKernel(
#             MaternKernel(nu=2.5, ard_num_dims=input_dims, batch_shape=batch_shape),
#             batch_shape=batch_shape
#         )

#     def forward(self, x):
#         mean_x = self.mean_module(x)
#         covar_x = self.covar_module(x)
#         return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# # Define the full Deep GP model
# class DeepGPRegressionModel(DeepGP):
#     def __init__(self, input_dims, output_dims):
#         # Define the layers
#         hidden_layer = ToyDeepGPHiddenLayer(input_dims, 5, num_inducing=64) # Example hidden layer
#         # hidden_layer2 = ToyDeepGPHiddenLayer(10, 2, num_inducing=64)
#         # The final layer has output_dims=OUTPUT_DIMS
#         last_layer = ToyDeepGPHiddenLayer(5, output_dims, num_inducing=64)
#         super().__init__()
#         self.hidden_layer = hidden_layer
#         # self.hidden_layer2 = hidden_layer2
#         self.last_layer = last_layer

#     def forward(self, x):
#         h = self.hidden_layer(x)
#         # h = self.hidden_layer2(h)
#         output = self.last_layer(h)
#         return output

# # Define the full Deep GP model
# class DeepGPRegressionModel_XY(DeepGP):
#     def __init__(self, input_dims, output_dims):
#         # Define the layers
#         hidden_layer_x = ToyDeepGPHiddenLayer(input_dims, 5, num_inducing=64) # Example hidden layer
#         last_layer_x = ToyDeepGPHiddenLayer(5, 1, num_inducing=64)

#         hidden_layer_y = ToyDeepGPHiddenLayer(input_dims, 5, num_inducing=64) # Example hidden layer
#         last_layer_y = ToyDeepGPHiddenLayer(5, 1, num_inducing=64)

#         super().__init__()
#         self.hidden_layer_x = hidden_layer_x
#         self.last_layer_x = last_layer_x
#         self.hidden_layer_y = hidden_layer_y
#         self.last_layer_y = last_layer_y

#     def forward(self, x):

#         h_x = self.hidden_layer_x(x)
#         output_x = self.last_layer_x(h_x)

#         h_y = self.hidden_layer_x(x)
#         output_y = self.last_layer_x(h_y)

#         return output_x, output_y

# # Initialize model and likelihood
# model = DeepGPRegressionModel(INPUT_DIMS, OUTPUT_DIMS)
# # model = DeepGPRegressionModel_XY(INPUT_DIMS, OUTPUT_DIMS)
# # Use MultitaskGaussianLikelihood for multi-dimensional output
# likelihood = MultitaskGaussianLikelihood(num_tasks=OUTPUT_DIMS)
# likelihood.initialize(noise=0.01)

# model.to(device)
# likelihood.to(device)

# # Training loop set up
# model.train()
# likelihood.train()

# optimizer = torch.optim.Adam([
#     {'params': model.parameters()},
#     {'params': likelihood.parameters()},
# ], lr=0.01)
# # Use DeepApproximateMLL for training Deep GPs
# mll = DeepApproximateMLL(
#     gpytorch.mlls.VariationalELBO(likelihood, model, 512)
#     ) # num_samples for stochastic training

# num_epochs = 100
# for i in range(num_epochs+1):
#     for train_x, train_y, _ in train_loader:
#       # if torch.isnan(train_x).any() or torch.isnan(train_y).any():
#       #   print("input tensor has nan!!!")

#       train_x = train_x.to(device)
#       target_y = train_y.to(device).clone()
#       target_y[:, 1] = target_y[:, 1] * SCALE_Y

#       train_x = torch.nan_to_num(train_x, nan=0.0)

#       optimizer.zero_grad()
#       output = model(train_x)
#       loss = -mll(output, target_y)
#       loss.backward()
#       optimizer.step()
#     if i % 10 == 0:
#         print(f'Epoch {i+1}/{num_epochs}, Loss: {loss.item():.4f}')

#     ADE = []
#     model.eval()
#     likelihood.eval()
#     with torch.no_grad():
#         for valid_x, valid_y, _ in val_loader:
#             valid_x = valid_x.to(device)
#             valid_y = valid_y.to(device)
#             valid_x = torch.nan_to_num(valid_x, nan=0.0)

#             predictions = likelihood(model(valid_x))
#             raw_mean = predictions.mean

#             if raw_mean.dim() == 3: # (Samples, Batch, 2) 인 경우
#                 mean_ = raw_mean.mean(0)
#             else:
#                 mean_ = raw_mean

#             real_pred = mean_.clone()
#             real_pred[:, 1] = real_pred[:, 1] / SCALE_Y

#             diff = real_pred - valid_y
#             dist = torch.sqrt(torch.sum(diff**2, dim=1)) # (Batch_Size,)

#             if i % 10 == 0:
#                 print(f"Sample Pred (m): {real_pred[0].cpu().numpy()}, Actual (m): {valid_y[0].cpu().numpy()}")

#             ADE.extend(dist.cpu().tolist())

#     print(f'Epoch {i+1}/{num_epochs}, ADE: {np.mean(ADE):.4f} m')

## X-Y Prediction separately with Deep Gaussian Process.
- Selected for performance reasons.
- Used RBF Kernel. if you can, use Matern, Linear, or other polynomial kernel to fit the data!
  - Average Distance Errors (ADEs) calculated for valid data
  - BUT while watching sample, it's not suitable enough. It's choatic process though!

In [20]:
import torch
import gpytorch
from gpytorch.models.deep_gps import DeepGPLayer, DeepGP
from gpytorch.means import ConstantMean, LinearMean
from gpytorch.kernels import RBFKernel, ScaleKernel, MaternKernel
from gpytorch.variational import (
    VariationalStrategy,
    CholeskyVariationalDistribution,
)
from gpytorch.likelihoods import MultitaskGaussianLikelihood
from gpytorch.mlls import DeepApproximateMLL
import math

# [설정]
INPUT_DIMS = 49 # 실제 피처 개수
OUTPUT_DIMS = 2
SCALE_Y = 1.0 # 필요시 10.0 등으로 조정

# Layer 정의
class ToyDeepGPHiddenLayer(DeepGPLayer):
    def __init__(self, input_dims, output_dims, num_inducing=128):
        if input_dims == 49: # INPUT_DIMS와 일치 확인
            train_batch_x, _, _ = next(iter(train_loader))
            train_batch_x = torch.nan_to_num(train_batch_x, nan=0.0)
            inducing_points = train_batch_x[:num_inducing, :].clone()
        else:
            inducing_points = torch.randn(num_inducing, input_dims)

        # output_dims가 None일 때(마지막 레이어)를 위한 처리
        batch_shape = torch.Size([output_dims]) if output_dims is not None else torch.Size([])

        variational_distribution = CholeskyVariationalDistribution(
            num_inducing_points=num_inducing,
            batch_shape=batch_shape
        )
        variational_strategy = VariationalStrategy(
            self,
            inducing_points,
            variational_distribution,
            learn_inducing_locations=True
        )
        super().__init__(variational_strategy, input_dims, output_dims)

        self.mean_module = LinearMean(input_size=input_dims)
        self.covar_module = ScaleKernel(
            RBFKernel(ard_num_dims=input_dims, batch_shape=batch_shape),
            batch_shape=batch_shape
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# 모델 정의: X, Y 독립 경로
class DeepGPRegressionModel_XY(DeepGP):
    def __init__(self, input_dims, output_dims):
        super().__init__()
        # Intermediate layers can have output_dims (e.g. 5 or 16)
        self.hidden_layer_x = ToyDeepGPHiddenLayer(input_dims, 16, num_inducing=128)
        # Last layers must have output_dims=None for independent scalar output
        self.last_layer_x = ToyDeepGPHiddenLayer(16, None, num_inducing=128)

        self.hidden_layer_y = ToyDeepGPHiddenLayer(input_dims, 16, num_inducing=128)
        self.last_layer_y = ToyDeepGPHiddenLayer(16, None, num_inducing=128)

    def forward(self, x):
        h_x = self.hidden_layer_x(x)
        out_x = self.last_layer_x(h_x) # MultivariateNormal

        h_y = self.hidden_layer_y(x)
        out_y = self.last_layer_y(h_y) # MultivariateNormal

        # Combine independent MVNs into a Multitask MVN
        return gpytorch.distributions.MultitaskMultivariateNormal.from_independent_mvns([out_x, out_y])

# 초기화
model = DeepGPRegressionModel_XY(INPUT_DIMS, OUTPUT_DIMS).to(device)
likelihood = MultitaskGaussianLikelihood(num_tasks=2).to(device)
likelihood.initialize(noise=0.01)

# MLL에 전체 모델 전달
mll = DeepApproximateMLL(gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_dataset)))

optimizer = torch.optim.Adam([
    {'params': model.parameters()},
    {'params': likelihood.parameters()},
], lr=0.01)

# 학습 루프
num_epochs = 300
for i in range(num_epochs+1):
    model.train()
    likelihood.train()
    for train_x, train_y, _ in train_loader:
        train_x = train_x.to(device)
        target_y = train_y.to(device).clone()
        target_y[:, 1] *= SCALE_Y
        train_x = torch.nan_to_num(train_x, nan=0.0)

        optimizer.zero_grad()
        output = model(train_x)
        loss = -mll(output, target_y)
        loss.backward()
        optimizer.step()

    if i % 10 == 0:
        print(f'Epoch {i+1}/{num_epochs}, Loss: {loss.item():.4f}')

    # 검증 루프
    ADE = []
    model.eval()
    likelihood.eval()
    with torch.no_grad():
        for valid_x, valid_y, _ in val_loader:
            valid_x = valid_x.to(device)
            valid_y = valid_y.to(device)
            valid_x = torch.nan_to_num(valid_x, nan=0.0)

            predictions = likelihood(model(valid_x))

            # 샘플링 차원 평균 (Samples, Batch, 2) -> (Batch, 2)
            if predictions.mean.dim() == 3:
                mean_ = predictions.mean.mean(0)
            else:
                mean_ = predictions.mean

            real_pred = mean_.clone()
            real_pred[:, 1] /= SCALE_Y # 스케일 복원

            dist = torch.sqrt(torch.sum((real_pred - valid_y)**2, dim=1))
            ADE.extend(dist.cpu().tolist())

            if i % 10 == 0:
                print(f"Sample Pred (m): {real_pred[0].cpu().numpy()}, Actual (m): {valid_y[0].cpu().numpy()}")

    print(f'Epoch {i+1}/{num_epochs}, ADE: {np.mean(ADE):.4f} m')

Epoch 1/300, Loss: 323.8426
Sample Pred (m): [-8.958214  -0.5889279], Actual (m): [ 0.656985 -0.874684]
Sample Pred (m): [4.3899317  0.56050056], Actual (m): [16.35648  47.492832]
Sample Pred (m): [ 5.006949  -2.7299352], Actual (m): [-0.192675 -7.334344]
Sample Pred (m): [-13.380256   -0.6447384], Actual (m): [-0.5544   8.72134]
Sample Pred (m): [10.844568  1.595641], Actual (m): [-5.25756  -8.045828]
Sample Pred (m): [-4.0640235 -1.0359652], Actual (m): [7.55496  0.331228]
Sample Pred (m): [-5.6383357  0.8012058], Actual (m): [12.66237  -1.391892]
Epoch 1/300, ADE: 19.7809 m
Epoch 2/300, ADE: 17.7965 m
Epoch 3/300, ADE: 16.3778 m
Epoch 4/300, ADE: 15.4293 m
Epoch 5/300, ADE: 15.1043 m
Epoch 6/300, ADE: 14.7657 m
Epoch 7/300, ADE: 14.5798 m
Epoch 8/300, ADE: 14.5512 m
Epoch 9/300, ADE: 14.3287 m
Epoch 10/300, ADE: 14.3087 m
Epoch 11/300, Loss: 50.1223
Sample Pred (m): [13.66391    0.8697136], Actual (m): [ 0.656985 -0.874684]
Sample Pred (m): [27.338812   5.8979893], Actual (m): [16.3