In [6]:
!pip install -Uq tabpfn

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence

from tabpfn import TabPFNRegressor
from tabpfn.constants import ModelVersion

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [13]:
def set_seed(seed: int):
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False
  np.random.seed(seed)

  return seed
seed = set_seed(810)

In [12]:
# 데이터 로드
file_path = '/content/drive/MyDrive/1데이콘/K리그-서울시립대공개AI경진대회/dataset/'

train_df = pd.read_csv(file_path  + 'final_train_df.csv')
test_df = pd.read_csv(file_path  + 'final_test_df.csv')

print(train_df.shape, test_df.shape)

(30870, 84) (2414, 84)


# LSTM

## Dataset & DataLoader

In [5]:
class EpisodeDataset(Dataset):
    def __init__(self, df, feature_cols, target_cols):
        self.feature_cols = feature_cols
        self.target_cols = target_cols

        self.episodes = []
        self.targets = []

        for ep, g in df.groupby("game_episode"):
            g = g.sort_values("time_seconds").reset_index(drop=True)

            seq = torch.tensor(g[feature_cols].values, dtype=torch.float32)
            tgt = torch.tensor(g[target_cols].iloc[-1].values,dtype=torch.float32)

            self.episodes.append(seq)
            self.targets.append(tgt)

    def __len__(self):
        return len(self.episodes)

    def __getitem__(self, idx):
        seq = self.episodes[idx]     # [T, 110]
        tgt = self.targets[idx]      # [2]
        length = seq.size(0)
        return seq, length, tgt


def collate_fn(batch):
    seqs, lengths, tgts = zip(*batch)
    lengths = torch.tensor(lengths, dtype=torch.long)
    padded = pad_sequence(seqs, batch_first=True)   # [B, Tmax, F]
    tgts = torch.stack(tgts)                 # [B, 2]

    return padded, lengths, tgts

In [None]:
episodes_unique = final_df["game_episode"].unique()

train_eps, val_eps = train_test_split(episodes_unique, test_size=0.2, random_state=seed)

train_df = final_df[final_df["game_episode"].isin(train_eps)].reset_index(drop=True)
val_df = final_df[final_df["game_episode"].isin(val_eps)].reset_index(drop=True)

In [None]:
# target_mode = 'delta'
batch_size = 32

In [None]:
# 데이터셋 & 데이터로더
train_dataset = EpisodeDataset(train_df, feature_cols, target_cols)
val_dataset = EpisodeDataset(val_df, feature_cols, target_cols)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

## LSTM Modeling

In [None]:
# batch_size = 32
EPOCHS = 100
LR = 1e-3
input_dim = 108
hidden_dim = 64

In [None]:
class LSTMBaseline(nn.Module):
    def __init__(self, input_dim=input_dim, hidden_dim=hidden_dim):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=1,
            batch_first=True,
        )
        self.fc = nn.Linear(hidden_dim, 2)  # (x_norm, y_norm)

    def forward(self, x, lengths):
        # x: [B, T, 2], lengths: [B]
        packed = pack_padded_sequence(
            x, lengths.cpu(), batch_first=True, enforce_sorted=False
        )
        _, (h_n, _) = self.lstm(packed)
        h_last = h_n[-1]      # [B, H] 마지막 layer의 hidden state
        out = self.fc(h_last) # [B, 2]
        return out

model = LSTMBaseline(input_dim=input_dim, hidden_dim=hidden_dim).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

In [None]:
best_dist = float("inf")
best_model_state = None

for epoch in range(1, EPOCHS + 1):
    # --- Train ---
    model.train()
    total_loss = 0.0

    for seq, length, tgt in tqdm(train_loader):
        seq, length, tgt = seq.to(device), length.to(device), tgt.to(device)

        optimizer.zero_grad()
        pred = model(seq, length)
        loss = criterion(pred, tgt)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * seq.size(0)

    train_loss = total_loss / len(train_loader.dataset)

    # --- Valid: 평균 유클리드 거리 ---
    # --- Valid: dx, dy 벡터 거리 ---
    model.eval()
    dists = []

    with torch.no_grad():
      for seq, length, tgt in tqdm(val_loader):
          seq, length, tgt = seq.to(device), length.to(device), tgt.to(device)
          pred = model(seq, length)

          diff = pred - tgt
          dist = torch.sqrt(torch.sum(diff * diff, dim=1))  # [B]
          dists.append(dist.cpu().numpy())

    mean_dist = np.concatenate(dists).mean()


    print(
        f"[Epoch {epoch}] "
        f"train_loss={train_loss:.4f} | "
        f"valid_mean_dist={mean_dist:.4f}"
    )

    # ----- BEST MODEL 업데이트 -----
    if mean_dist < best_dist:
        best_dist = mean_dist
        best_model_state = model.state_dict().copy()
        print(f" --> Best model updated! (dist={best_dist:.4f})")

100%|██████████| 446/446 [00:01<00:00, 284.71it/s]
100%|██████████| 112/112 [00:00<00:00, 369.71it/s]


[Epoch 1] train_loss=1.0096 | valid_mean_dist=1.0951
 --> Best model updated! (dist=1.0951)


100%|██████████| 446/446 [00:01<00:00, 334.81it/s]
100%|██████████| 112/112 [00:00<00:00, 932.58it/s]


[Epoch 2] train_loss=1.0052 | valid_mean_dist=1.1013


100%|██████████| 446/446 [00:01<00:00, 371.90it/s]
100%|██████████| 112/112 [00:00<00:00, 949.83it/s]


[Epoch 3] train_loss=1.0046 | valid_mean_dist=1.1009


100%|██████████| 446/446 [00:01<00:00, 371.16it/s]
100%|██████████| 112/112 [00:00<00:00, 927.77it/s]


[Epoch 4] train_loss=1.0047 | valid_mean_dist=1.1026


100%|██████████| 446/446 [00:01<00:00, 375.42it/s]
100%|██████████| 112/112 [00:00<00:00, 938.29it/s]


[Epoch 5] train_loss=1.0055 | valid_mean_dist=1.0970


100%|██████████| 446/446 [00:01<00:00, 368.94it/s]
100%|██████████| 112/112 [00:00<00:00, 927.91it/s]


[Epoch 6] train_loss=1.0046 | valid_mean_dist=1.0807
 --> Best model updated! (dist=1.0807)


100%|██████████| 446/446 [00:01<00:00, 375.66it/s]
100%|██████████| 112/112 [00:00<00:00, 913.46it/s]


[Epoch 7] train_loss=1.0041 | valid_mean_dist=1.0906


100%|██████████| 446/446 [00:01<00:00, 373.64it/s]
100%|██████████| 112/112 [00:00<00:00, 838.59it/s]


[Epoch 8] train_loss=1.0038 | valid_mean_dist=1.1029


100%|██████████| 446/446 [00:01<00:00, 354.55it/s]
100%|██████████| 112/112 [00:00<00:00, 711.58it/s]


[Epoch 9] train_loss=1.0039 | valid_mean_dist=1.0913


100%|██████████| 446/446 [00:01<00:00, 283.52it/s]
100%|██████████| 112/112 [00:00<00:00, 715.26it/s]


[Epoch 10] train_loss=1.0038 | valid_mean_dist=1.1010


100%|██████████| 446/446 [00:01<00:00, 296.54it/s]
100%|██████████| 112/112 [00:00<00:00, 932.80it/s]


[Epoch 11] train_loss=1.0037 | valid_mean_dist=1.0942


100%|██████████| 446/446 [00:01<00:00, 376.65it/s]
100%|██████████| 112/112 [00:00<00:00, 823.50it/s]


[Epoch 12] train_loss=1.0042 | valid_mean_dist=1.0970


100%|██████████| 446/446 [00:01<00:00, 378.06it/s]
100%|██████████| 112/112 [00:00<00:00, 942.35it/s]


[Epoch 13] train_loss=1.0036 | valid_mean_dist=1.0883


100%|██████████| 446/446 [00:01<00:00, 377.34it/s]
100%|██████████| 112/112 [00:00<00:00, 947.62it/s]


[Epoch 14] train_loss=1.0045 | valid_mean_dist=1.1033


100%|██████████| 446/446 [00:01<00:00, 369.52it/s]
100%|██████████| 112/112 [00:00<00:00, 764.04it/s]


[Epoch 15] train_loss=1.0034 | valid_mean_dist=1.0872


100%|██████████| 446/446 [00:02<00:00, 214.52it/s]
100%|██████████| 112/112 [00:00<00:00, 846.01it/s]


[Epoch 16] train_loss=1.0041 | valid_mean_dist=1.1026


100%|██████████| 446/446 [00:01<00:00, 375.86it/s]
100%|██████████| 112/112 [00:00<00:00, 933.45it/s]


[Epoch 17] train_loss=1.0039 | valid_mean_dist=1.0922


100%|██████████| 446/446 [00:01<00:00, 315.64it/s]
100%|██████████| 112/112 [00:00<00:00, 637.84it/s]


[Epoch 18] train_loss=1.0039 | valid_mean_dist=1.0977


100%|██████████| 446/446 [00:01<00:00, 282.40it/s]
100%|██████████| 112/112 [00:00<00:00, 637.83it/s]


[Epoch 19] train_loss=1.0039 | valid_mean_dist=1.0878


100%|██████████| 446/446 [00:01<00:00, 347.08it/s]
100%|██████████| 112/112 [00:00<00:00, 901.84it/s]


[Epoch 20] train_loss=1.0041 | valid_mean_dist=1.1023


100%|██████████| 446/446 [00:01<00:00, 374.41it/s]
100%|██████████| 112/112 [00:00<00:00, 937.08it/s]


[Epoch 21] train_loss=1.0035 | valid_mean_dist=1.1128


100%|██████████| 446/446 [00:01<00:00, 363.93it/s]
100%|██████████| 112/112 [00:00<00:00, 927.90it/s]


[Epoch 22] train_loss=1.0042 | valid_mean_dist=1.1022


100%|██████████| 446/446 [00:01<00:00, 375.08it/s]
100%|██████████| 112/112 [00:00<00:00, 932.54it/s]


[Epoch 23] train_loss=1.0038 | valid_mean_dist=1.0936


100%|██████████| 446/446 [00:01<00:00, 376.16it/s]
100%|██████████| 112/112 [00:00<00:00, 848.93it/s]


[Epoch 24] train_loss=1.0037 | valid_mean_dist=1.1049


100%|██████████| 446/446 [00:01<00:00, 370.08it/s]
100%|██████████| 112/112 [00:00<00:00, 948.23it/s]


[Epoch 25] train_loss=1.0042 | valid_mean_dist=1.0983


100%|██████████| 446/446 [00:01<00:00, 370.52it/s]
100%|██████████| 112/112 [00:00<00:00, 933.63it/s]


[Epoch 26] train_loss=1.0034 | valid_mean_dist=1.0948


100%|██████████| 446/446 [00:01<00:00, 331.93it/s]
100%|██████████| 112/112 [00:00<00:00, 644.82it/s]


[Epoch 27] train_loss=1.0037 | valid_mean_dist=1.0881


100%|██████████| 446/446 [00:01<00:00, 279.26it/s]
100%|██████████| 112/112 [00:00<00:00, 701.63it/s]


[Epoch 28] train_loss=1.0032 | valid_mean_dist=1.0948


100%|██████████| 446/446 [00:01<00:00, 315.10it/s]
100%|██████████| 112/112 [00:00<00:00, 899.93it/s]


[Epoch 29] train_loss=1.0030 | valid_mean_dist=1.0996


100%|██████████| 446/446 [00:01<00:00, 362.31it/s]
100%|██████████| 112/112 [00:00<00:00, 918.12it/s]


[Epoch 30] train_loss=1.0033 | valid_mean_dist=1.1016


100%|██████████| 446/446 [00:01<00:00, 356.63it/s]
100%|██████████| 112/112 [00:00<00:00, 300.36it/s]


[Epoch 31] train_loss=1.0036 | valid_mean_dist=1.1029


100%|██████████| 446/446 [00:01<00:00, 303.83it/s]
100%|██████████| 112/112 [00:00<00:00, 933.78it/s]


[Epoch 32] train_loss=1.0037 | valid_mean_dist=1.1049


100%|██████████| 446/446 [00:01<00:00, 375.23it/s]
100%|██████████| 112/112 [00:00<00:00, 956.38it/s]


[Epoch 33] train_loss=1.0035 | valid_mean_dist=1.0988


100%|██████████| 446/446 [00:01<00:00, 368.86it/s]
100%|██████████| 112/112 [00:00<00:00, 927.08it/s]


[Epoch 34] train_loss=1.0030 | valid_mean_dist=1.0855


100%|██████████| 446/446 [00:01<00:00, 369.80it/s]
100%|██████████| 112/112 [00:00<00:00, 920.23it/s]


[Epoch 35] train_loss=1.0036 | valid_mean_dist=1.0979


100%|██████████| 446/446 [00:01<00:00, 315.45it/s]
100%|██████████| 112/112 [00:00<00:00, 663.78it/s]


[Epoch 36] train_loss=1.0034 | valid_mean_dist=1.0946


100%|██████████| 446/446 [00:01<00:00, 277.09it/s]
100%|██████████| 112/112 [00:00<00:00, 605.61it/s]


[Epoch 37] train_loss=1.0041 | valid_mean_dist=1.0923


100%|██████████| 446/446 [00:01<00:00, 350.56it/s]
100%|██████████| 112/112 [00:00<00:00, 962.78it/s]


[Epoch 38] train_loss=1.0038 | valid_mean_dist=1.0981


100%|██████████| 446/446 [00:01<00:00, 375.96it/s]
100%|██████████| 112/112 [00:00<00:00, 953.06it/s]


[Epoch 39] train_loss=1.0038 | valid_mean_dist=1.0919


100%|██████████| 446/446 [00:01<00:00, 374.57it/s]
100%|██████████| 112/112 [00:00<00:00, 912.39it/s]


[Epoch 40] train_loss=1.0040 | valid_mean_dist=1.0975


100%|██████████| 446/446 [00:01<00:00, 366.73it/s]
100%|██████████| 112/112 [00:00<00:00, 943.99it/s]


[Epoch 41] train_loss=1.0029 | valid_mean_dist=1.0939


100%|██████████| 446/446 [00:01<00:00, 375.58it/s]
100%|██████████| 112/112 [00:00<00:00, 858.38it/s]


[Epoch 42] train_loss=1.0036 | valid_mean_dist=1.0837


100%|██████████| 446/446 [00:01<00:00, 376.29it/s]
100%|██████████| 112/112 [00:00<00:00, 888.45it/s]


[Epoch 43] train_loss=1.0039 | valid_mean_dist=1.0924


100%|██████████| 446/446 [00:01<00:00, 369.92it/s]
100%|██████████| 112/112 [00:00<00:00, 932.05it/s]


[Epoch 44] train_loss=1.0041 | valid_mean_dist=1.0928


100%|██████████| 446/446 [00:01<00:00, 348.18it/s]
100%|██████████| 112/112 [00:00<00:00, 677.55it/s]


[Epoch 45] train_loss=1.0035 | valid_mean_dist=1.0906


100%|██████████| 446/446 [00:01<00:00, 281.62it/s]
100%|██████████| 112/112 [00:00<00:00, 724.88it/s]


[Epoch 46] train_loss=1.0034 | valid_mean_dist=1.0974


100%|██████████| 446/446 [00:01<00:00, 308.36it/s]
100%|██████████| 112/112 [00:00<00:00, 943.55it/s]


[Epoch 47] train_loss=1.0039 | valid_mean_dist=1.0901


100%|██████████| 446/446 [00:01<00:00, 377.07it/s]
100%|██████████| 112/112 [00:00<00:00, 952.40it/s]


[Epoch 48] train_loss=1.0034 | valid_mean_dist=1.0911


100%|██████████| 446/446 [00:01<00:00, 371.83it/s]
100%|██████████| 112/112 [00:00<00:00, 908.18it/s]


[Epoch 49] train_loss=1.0039 | valid_mean_dist=1.0948


100%|██████████| 446/446 [00:01<00:00, 371.93it/s]
100%|██████████| 112/112 [00:00<00:00, 921.77it/s]


[Epoch 50] train_loss=1.0032 | valid_mean_dist=1.1080


100%|██████████| 446/446 [00:01<00:00, 369.80it/s]
100%|██████████| 112/112 [00:00<00:00, 919.71it/s]


[Epoch 51] train_loss=1.0040 | valid_mean_dist=1.0902


100%|██████████| 446/446 [00:01<00:00, 366.89it/s]
100%|██████████| 112/112 [00:00<00:00, 915.59it/s]


[Epoch 52] train_loss=1.0039 | valid_mean_dist=1.1008


100%|██████████| 446/446 [00:01<00:00, 380.87it/s]
100%|██████████| 112/112 [00:00<00:00, 934.93it/s]


[Epoch 53] train_loss=1.0035 | valid_mean_dist=1.0995


100%|██████████| 446/446 [00:01<00:00, 375.78it/s]
100%|██████████| 112/112 [00:00<00:00, 818.94it/s]


[Epoch 54] train_loss=1.0038 | valid_mean_dist=1.0978


100%|██████████| 446/446 [00:01<00:00, 280.81it/s]
100%|██████████| 112/112 [00:00<00:00, 724.60it/s]


[Epoch 55] train_loss=1.0039 | valid_mean_dist=1.0979


100%|██████████| 446/446 [00:01<00:00, 287.89it/s]
100%|██████████| 112/112 [00:00<00:00, 918.10it/s]


[Epoch 56] train_loss=1.0036 | valid_mean_dist=1.0928


100%|██████████| 446/446 [00:01<00:00, 373.43it/s]
100%|██████████| 112/112 [00:00<00:00, 942.21it/s]


[Epoch 57] train_loss=1.0034 | valid_mean_dist=1.0855


100%|██████████| 446/446 [00:01<00:00, 371.05it/s]
100%|██████████| 112/112 [00:00<00:00, 929.24it/s]


[Epoch 58] train_loss=1.0039 | valid_mean_dist=1.0966


100%|██████████| 446/446 [00:01<00:00, 358.18it/s]
100%|██████████| 112/112 [00:00<00:00, 934.80it/s]


[Epoch 59] train_loss=1.0036 | valid_mean_dist=1.1064


100%|██████████| 446/446 [00:01<00:00, 372.06it/s]
100%|██████████| 112/112 [00:00<00:00, 935.33it/s]


[Epoch 60] train_loss=1.0033 | valid_mean_dist=1.0854


100%|██████████| 446/446 [00:01<00:00, 374.35it/s]
100%|██████████| 112/112 [00:00<00:00, 913.75it/s]


[Epoch 61] train_loss=1.0040 | valid_mean_dist=1.0955


100%|██████████| 446/446 [00:01<00:00, 371.61it/s]
100%|██████████| 112/112 [00:00<00:00, 702.27it/s]


[Epoch 62] train_loss=1.0035 | valid_mean_dist=1.0870


100%|██████████| 446/446 [00:01<00:00, 371.62it/s]
100%|██████████| 112/112 [00:00<00:00, 922.23it/s]


[Epoch 63] train_loss=1.0036 | valid_mean_dist=1.0796
 --> Best model updated! (dist=1.0796)


100%|██████████| 446/446 [00:01<00:00, 290.85it/s]
100%|██████████| 112/112 [00:00<00:00, 605.99it/s]


[Epoch 64] train_loss=1.0030 | valid_mean_dist=1.0998


100%|██████████| 446/446 [00:01<00:00, 274.57it/s]
100%|██████████| 112/112 [00:00<00:00, 865.80it/s]


[Epoch 65] train_loss=1.0034 | valid_mean_dist=1.0960


100%|██████████| 446/446 [00:01<00:00, 373.49it/s]
100%|██████████| 112/112 [00:00<00:00, 907.98it/s]


[Epoch 66] train_loss=1.0033 | valid_mean_dist=1.0923


100%|██████████| 446/446 [00:01<00:00, 366.52it/s]
100%|██████████| 112/112 [00:00<00:00, 917.12it/s]


[Epoch 67] train_loss=1.0033 | valid_mean_dist=1.0912


100%|██████████| 446/446 [00:01<00:00, 369.14it/s]
100%|██████████| 112/112 [00:00<00:00, 937.23it/s]


[Epoch 68] train_loss=1.0031 | valid_mean_dist=1.1057


100%|██████████| 446/446 [00:01<00:00, 375.17it/s]
100%|██████████| 112/112 [00:00<00:00, 926.31it/s]


[Epoch 69] train_loss=1.0043 | valid_mean_dist=1.1000


100%|██████████| 446/446 [00:01<00:00, 374.33it/s]
100%|██████████| 112/112 [00:00<00:00, 884.59it/s]


[Epoch 70] train_loss=1.0035 | valid_mean_dist=1.0845


100%|██████████| 446/446 [00:01<00:00, 377.17it/s]
100%|██████████| 112/112 [00:00<00:00, 964.65it/s]


[Epoch 71] train_loss=1.0041 | valid_mean_dist=1.0805


100%|██████████| 446/446 [00:01<00:00, 374.26it/s]
100%|██████████| 112/112 [00:00<00:00, 906.11it/s]


[Epoch 72] train_loss=1.0037 | valid_mean_dist=1.1034


100%|██████████| 446/446 [00:01<00:00, 317.36it/s]
100%|██████████| 112/112 [00:00<00:00, 700.52it/s]


[Epoch 73] train_loss=1.0033 | valid_mean_dist=1.0842


100%|██████████| 446/446 [00:01<00:00, 274.10it/s]
100%|██████████| 112/112 [00:00<00:00, 650.03it/s]


[Epoch 74] train_loss=1.0032 | valid_mean_dist=1.0992


100%|██████████| 446/446 [00:01<00:00, 339.44it/s]
100%|██████████| 112/112 [00:00<00:00, 928.07it/s]


[Epoch 75] train_loss=1.0035 | valid_mean_dist=1.0858


100%|██████████| 446/446 [00:01<00:00, 372.89it/s]
100%|██████████| 112/112 [00:00<00:00, 946.88it/s]


[Epoch 76] train_loss=1.0031 | valid_mean_dist=1.0858


100%|██████████| 446/446 [00:01<00:00, 375.94it/s]
100%|██████████| 112/112 [00:00<00:00, 931.11it/s]


[Epoch 77] train_loss=1.0026 | valid_mean_dist=1.0959


100%|██████████| 446/446 [00:01<00:00, 378.21it/s]
100%|██████████| 112/112 [00:00<00:00, 842.06it/s]


[Epoch 78] train_loss=1.0040 | valid_mean_dist=1.0965


100%|██████████| 446/446 [00:01<00:00, 373.47it/s]
100%|██████████| 112/112 [00:00<00:00, 959.77it/s]


[Epoch 79] train_loss=1.0035 | valid_mean_dist=1.0959


100%|██████████| 446/446 [00:01<00:00, 375.56it/s]
100%|██████████| 112/112 [00:00<00:00, 958.98it/s]


[Epoch 80] train_loss=1.0036 | valid_mean_dist=1.0950


100%|██████████| 446/446 [00:01<00:00, 380.81it/s]
100%|██████████| 112/112 [00:00<00:00, 936.94it/s]


[Epoch 81] train_loss=1.0034 | valid_mean_dist=1.0987


100%|██████████| 446/446 [00:01<00:00, 337.12it/s]
100%|██████████| 112/112 [00:00<00:00, 737.38it/s]


[Epoch 82] train_loss=1.0036 | valid_mean_dist=1.0910


100%|██████████| 446/446 [00:01<00:00, 280.79it/s]
100%|██████████| 112/112 [00:00<00:00, 719.56it/s]


[Epoch 83] train_loss=1.0036 | valid_mean_dist=1.0841


100%|██████████| 446/446 [00:01<00:00, 302.79it/s]
100%|██████████| 112/112 [00:00<00:00, 919.29it/s]


[Epoch 84] train_loss=1.0035 | valid_mean_dist=1.0865


100%|██████████| 446/446 [00:01<00:00, 374.52it/s]
100%|██████████| 112/112 [00:00<00:00, 926.87it/s]


[Epoch 85] train_loss=1.0033 | valid_mean_dist=1.0930


100%|██████████| 446/446 [00:01<00:00, 382.77it/s]
100%|██████████| 112/112 [00:00<00:00, 940.66it/s]


[Epoch 86] train_loss=1.0033 | valid_mean_dist=1.0974


100%|██████████| 446/446 [00:01<00:00, 377.09it/s]
100%|██████████| 112/112 [00:00<00:00, 937.12it/s]


[Epoch 87] train_loss=1.0035 | valid_mean_dist=1.1002


100%|██████████| 446/446 [00:01<00:00, 372.14it/s]
100%|██████████| 112/112 [00:00<00:00, 955.68it/s]


[Epoch 88] train_loss=1.0031 | valid_mean_dist=1.0847


100%|██████████| 446/446 [00:01<00:00, 375.16it/s]
100%|██████████| 112/112 [00:00<00:00, 949.13it/s]


[Epoch 89] train_loss=1.0042 | valid_mean_dist=1.0872


100%|██████████| 446/446 [00:01<00:00, 381.04it/s]
100%|██████████| 112/112 [00:00<00:00, 937.14it/s]


[Epoch 90] train_loss=1.0034 | valid_mean_dist=1.0959


100%|██████████| 446/446 [00:01<00:00, 371.82it/s]
100%|██████████| 112/112 [00:00<00:00, 847.31it/s]


[Epoch 91] train_loss=1.0034 | valid_mean_dist=1.0931


100%|██████████| 446/446 [00:01<00:00, 286.18it/s]
100%|██████████| 112/112 [00:00<00:00, 708.32it/s]


[Epoch 92] train_loss=1.0033 | valid_mean_dist=1.0933


100%|██████████| 446/446 [00:01<00:00, 282.21it/s]
100%|██████████| 112/112 [00:00<00:00, 927.42it/s]


[Epoch 93] train_loss=1.0037 | valid_mean_dist=1.1023


100%|██████████| 446/446 [00:01<00:00, 374.92it/s]
100%|██████████| 112/112 [00:00<00:00, 940.62it/s]


[Epoch 94] train_loss=1.0036 | valid_mean_dist=1.0942


100%|██████████| 446/446 [00:01<00:00, 373.38it/s]
100%|██████████| 112/112 [00:00<00:00, 950.93it/s]


[Epoch 95] train_loss=1.0036 | valid_mean_dist=1.0823


100%|██████████| 446/446 [00:01<00:00, 371.50it/s]
100%|██████████| 112/112 [00:00<00:00, 929.49it/s]


[Epoch 96] train_loss=1.0033 | valid_mean_dist=1.0887


100%|██████████| 446/446 [00:01<00:00, 366.68it/s]
100%|██████████| 112/112 [00:00<00:00, 965.72it/s]


[Epoch 97] train_loss=1.0036 | valid_mean_dist=1.0923


100%|██████████| 446/446 [00:01<00:00, 381.72it/s]
100%|██████████| 112/112 [00:00<00:00, 954.53it/s]


[Epoch 98] train_loss=1.0033 | valid_mean_dist=1.0840


100%|██████████| 446/446 [00:01<00:00, 367.53it/s]
100%|██████████| 112/112 [00:00<00:00, 865.52it/s]


[Epoch 99] train_loss=1.0036 | valid_mean_dist=1.0933


100%|██████████| 446/446 [00:01<00:00, 370.65it/s]
100%|██████████| 112/112 [00:00<00:00, 922.88it/s]

[Epoch 100] train_loss=1.0035 | valid_mean_dist=1.0863





## 추론

In [None]:
class EpisodeDataset(Dataset):
    def __init__(self, df, feature_cols, target_cols):
        self.feature_cols = feature_cols
        self.target_cols = target_cols

        self.episodes = []
        for ep, g in df.groupby("game_episode"):

            seq = g[feature_cols].values.astype(np.float32)

            # test에는 실제 target 없을 수 있어서 처리
            if set(target_cols).issubset(g.columns):
                tgt = g[target_cols].values[-1].astype(np.float32)
            else:
                tgt = np.zeros(len(target_cols), dtype=np.float32)

            self.episodes.append((seq, tgt))

    def __len__(self):
        return len(self.episodes)

    def __getitem__(self, idx):
        seq, tgt = self.episodes[idx]
        length = len(seq)
        return torch.tensor(seq), length, torch.tensor(tgt)

In [None]:
test_dataset = EpisodeDataset(
    test_df,
    feature_cols=feature_cols,
    target_cols=["target_dx", "target_dy"]  # 없어도 됨
)

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    collate_fn=collate_fn
)

In [None]:
model.eval()
preds_x = []
preds_y = []

with torch.no_grad():
    for x, length, y in tqdm(test_loader, desc="Inference"):
        x = x.to(device)              # [B, T, 2]
        length = length.to(device) # [B]

        pred = model(x, length)     # [B, 2]

        # 정규화된 출력 → 원래 스케일 복원
        pred = pred.cpu().numpy()    # shape: (B, 2)

        # 스케일링(105, 68) 복구 후 리스트에 저장
        px = pred[:, 0] * 105.0
        py = pred[:, 1] * 68.0

        preds_x.extend(px.tolist())
        preds_y.extend(py.tolist())

print("Inference Done.")

Inference: 100%|██████████| 38/38 [00:00<00:00, 370.27it/s]

Inference Done.





# TabPFN

In [None]:
def dataset(df, is_train: bool, K: int):


# submission 생성

In [None]:
submission = pd.read_csv(
    "/content/drive/MyDrive/1데이콘/Track1알고리즘부문:K리그-서울시립대공개AI경진대회/dataset/sample_submission.csv")

submission["end_x"] = preds_x
submission["end_y"] = preds_y

submission.to_csv("/content/drive/MyDrive/1데이콘/Track1알고리즘부문:K리그-서울시립대공개AI경진대회/submission4.csv", index=False)
print("Saved: baseline_submit.csv")

Saved: baseline_submit.csv
