In [696]:
import pandas as pd
import numpy as np

DATA_PATH = "LoLesports_data/"
SEED = 42

teams_train = pd.read_csv(f"{DATA_PATH}teams_train.csv")
teams_test = pd.read_csv(f"{DATA_PATH}teams_test.csv")

teams_train_target = pd.read_csv(f"{DATA_PATH}teams_train_target.csv")
teams_test_target = pd.read_csv(f"{DATA_PATH}teams_test_target.csv")

featured_train = pd.read_csv(f"{DATA_PATH}featured_train.csv")
featured_test = pd.read_csv(f"{DATA_PATH}featured_test.csv")
featured_data = pd.concat([featured_train, featured_test], ignore_index=True)
featured_data.drop("gameid", axis=1, inplace=True)

## 상대 팀 추가

In [697]:
temp_opp_teams = teams_train.groupby("gameid")["teamname"].transform(lambda x: x.iloc[::-1].values).to_frame("opp_teamname")
teams_train = pd.concat([teams_train, temp_opp_teams], axis=1)
temp_opp_teams = teams_test.groupby("gameid")["teamname"].transform(lambda x: x.iloc[::-1].values).to_frame("opp_teamname")
teams_test = pd.concat([teams_test, temp_opp_teams], axis=1)

## 날짜 추가

In [698]:
teams_train["date"] = pd.to_datetime(teams_train["date"])
teams_test["date"] = pd.to_datetime(teams_test["date"])

teams_train["year"] = teams_train["date"].dt.year
teams_train["month"] = teams_train["date"].dt.month
teams_train["day"] = teams_train["date"].dt.day

teams_test["year"] = teams_test["date"].dt.year
teams_test["month"] = teams_test["date"].dt.month
teams_test["day"] = teams_test["date"].dt.day

## 데이터 타입 변경

In [699]:
cols = ["league", "split", "teamname", "opp_teamname", "ban1", "ban2", "ban3", "ban4", "ban5", "pick1", "pick2", "pick3", "pick4", "pick5"]

teams_train[cols] = teams_train[cols].astype("category")
teams_test[cols] = teams_test[cols].astype("category")

# 예측 모델 로드 및 예측

In [700]:
stats_columns = [
    "result",
    "gamelength",
    "kills",
    "deaths",
    "assists",
    "firstblood",
    "team kpm",
    "ckpm",
    "firstdragon",
    "firstherald",
    "void_grubs",
    "firstbaron",
    "firsttower",
    "towers",
    "firstmidtower",
    "firsttothreetowers",
    "turretplates",
    "inhibitors",
    "damagetochampions",
    "dpm",
    "damagetakenperminute",
    "damagemitigatedperminute",
    "wardsplaced",
    "wpm",
    "wardskilled",
    "wcpm",
    "controlwardsbought",
    "visionscore",
    "vspm",
]

In [701]:
train_data = pd.concat([teams_train, teams_test], ignore_index=True)
train_data.drop("gameid", axis=1, inplace=True)
target_data = pd.concat([teams_train_target, teams_test_target], ignore_index=True)

## 입력 데이터 특성 추가

In [702]:
teamname = "Dplus KIA"
opp_teamname = "OKSavingsBank BRION"
patch = 14.23
league = "LCK"
side = "Blue"
year = 2024
month = 12
day = 8
ban1 = "Renekton"
ban2 = "Vi"
ban3 = "Skarner"
ban4 = "Ashe"
ban5 = "Kalista"
pick1 = "K'Sante"
pick2 = "Viego"
pick3 = "Aurora"
pick4 = "Varus"
pick5 = "Maokai"

input_data = {
    "patch": patch,
    "side": side,
    "league": league,
    "teamname": teamname,
    "opp_teamname": opp_teamname,
    "ban1": ban1,
    "ban2": ban2,
    "ban3": ban3,
    "ban4": ban4,
    "ban5": ban5,
    "pick1": pick1,
    "pick2": pick2,
    "pick3": pick3,
    "pick4": pick4,
    "pick5": pick5,
    "year": year,
    "month": month,
    "day": day,
}

In [703]:
input_team_data = train_data[train_data['teamname'] == teamname].sort_values(['year', 'month', 'day']).reset_index(drop=True)
input_opp_data = train_data[train_data['teamname'] == opp_teamname].sort_values(['year', 'month', 'day']).reset_index(drop=True)

recent10_stats = {}
for col in stats_columns:
    if len(input_team_data) > 0:
        team_recent10 = train_data[train_data['teamname'] == teamname][col].rolling(window=10, min_periods=1).mean().iloc[-1]
    else:
        team_recent10 = 0.5 if col == 'result' else 0
    recent10_stats[f'recent10_{col}'] = team_recent10
    
    if len(input_opp_data) > 0:
        opp_recent10 = train_data[train_data['teamname'] == opp_teamname][col].rolling(window=10, min_periods=1).mean().iloc[-1]
    else:
        opp_recent10 = 0.5 if col == 'result' else 0
    recent10_stats[f'opp_recent10_{col}'] = opp_recent10

for feature in recent10_stats:
    input_data[feature] = recent10_stats[feature]

In [704]:
head_to_head = train_data[
    ((train_data['teamname'] == teamname) & (train_data['opp_teamname'] == opp_teamname)) |
    ((train_data['teamname'] == opp_teamname) & (train_data['opp_teamname'] == teamname))
].sort_values(['year', 'month', 'day'])

if len(head_to_head) > 0:
    team_wins = head_to_head[
        ((head_to_head['teamname'] == teamname) & (head_to_head['result'] == 1) |
        (head_to_head['teamname'] == opp_teamname) & (head_to_head['result'] == 0))
    ].shape[0]
    h2h_winrate = team_wins / len(head_to_head)
else:
    h2h_winrate = 0.5

input_data['h2h_winrate'] = h2h_winrate

In [705]:
league_stats = {}

team_league_games = train_data[
    (train_data['teamname'] == teamname) & 
    (train_data['league'] == input_data['league'])
]
if len(team_league_games) > 0:
    team_league_winrate = team_league_games['result'].mean()
else:
    team_league_winrate = 0.5

input_data['league_winrate'] = team_league_winrate

In [706]:
input_data = pd.DataFrame([input_data.values()], columns=input_data.keys())
input_data

Unnamed: 0,patch,side,league,teamname,opp_teamname,ban1,ban2,ban3,ban4,ban5,...,recent10_wcpm,opp_recent10_wcpm,recent10_controlwardsbought,opp_recent10_controlwardsbought,recent10_visionscore,opp_recent10_visionscore,recent10_vspm,opp_recent10_vspm,h2h_winrate,league_winrate
0,14.23,Blue,LCK,Dplus KIA,OKSavingsBank BRION,Renekton,Vi,Skarner,Ashe,Kalista,...,1.78643,1.69032,57.7,45.2,319.4,281.2,9.00584,8.75083,0.8,0.573333


In [707]:
input_data["side"] = input_data["side"].map({"Blue": 0, "Red": 1})
cat_input_data = input_data.copy()
cat_featured_data = featured_data.copy()

cat_cols = ["teamname", "opp_teamname", "ban1", "ban2", "ban3", "ban4", "ban5", "pick1", "pick2", "pick3", "pick4", "pick5"]
cat_input_data[cat_cols] = cat_input_data[cat_cols].astype("category")

In [708]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder


def preprocess(input_data, train_data):
    champion_columns_teams = [
        "ban1",
        "ban2",
        "ban3",
        "ban4",
        "ban5",
        "pick1",
        "pick2",
        "pick3",
        "pick4",
        "pick5",
    ]

    champions = sorted(train_data[champion_columns_teams].stack().unique())

    champions_df = pd.DataFrame({"champion": champions})
    champions_df = champions_df.dropna().reset_index(drop=True)

    le = LabelEncoder()
    champions_df["champion_encoded"] = le.fit_transform(champions_df["champion"])

    for col in champion_columns_teams:
        input_data[col] = le.transform(input_data[col])

    encoder = OneHotEncoder()
    encoder.fit(train_data[["league"]])
    league_encoded = encoder.transform(input_data[["league"]]).toarray()
    league_cols = [f"league_{col}" for col in encoder.categories_[0]]
    input_data = pd.concat(
        [input_data, pd.DataFrame(league_encoded, columns=league_cols)], axis=1
    )
    input_data.drop("league", axis=1, inplace=True)

    le_team = LabelEncoder()
    all_team_names = pd.concat(
        [
            train_data["teamname"],
            train_data["opp_teamname"],
        ]
    ).unique()
    le_team.fit(all_team_names)

    input_data["teamname"] = le_team.transform(input_data["teamname"])
    input_data["opp_teamname"] = le_team.transform(input_data["opp_teamname"])

    return input_data


input_data = preprocess(input_data, train_data)
featured_data = preprocess(featured_data, train_data)

In [709]:
from sklearn.preprocessing import MinMaxScaler


def scale(input_data, featured_data):
    scaler = MinMaxScaler()
    numeric_cols = input_data.select_dtypes("number").columns
    
    scaler.fit(featured_data[numeric_cols])
    input_data[numeric_cols] = scaler.transform(input_data[numeric_cols])

    return input_data


input_data = scale(input_data, featured_data)
cat_input_data = scale(cat_input_data, featured_data)
cat_featured_data = scale(cat_featured_data, featured_data)

In [710]:
import joblib
import json
from catboost import CatBoostClassifier, Pool

stacking_model = joblib.load("output/stacking_0107.pkl")

with open("output/cat_features.json", "r") as f:
    cat_cols = json.load(f)

cat_model = CatBoostClassifier()
cat_model.load_model("output/cat_0107.cbm")
cat_input_data = Pool(cat_input_data, cat_features=cat_cols)

In [711]:
pred_stacking = stacking_model.predict_proba(input_data)
pred_cat = cat_model.predict_proba(cat_input_data)

In [712]:
pred = (pred_stacking + pred_cat) / 2
print(f"{teamname} 승리 확률: {round(pred[0][1] * 100, 1)}%")
print(f"{opp_teamname} 승리 확률: {round(pred[0][0] * 100, 1)}%")

Dplus KIA 승리 확률: 64.1%
OKSavingsBank BRION 승리 확률: 35.9%
