In [3]:
from google.colab import drive
drive.mount('/content/drive')

DATA_PATH = "/content/drive/MyDrive/datathon/data/"

SEED = 42

Mounted at /content/drive


In [4]:
import pandas as pd
import numpy as np
import torch
from tqdm.auto import tqdm
import random
import os

def reset_seeds(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [5]:
train_tr = pd.read_csv(f"{DATA_PATH}players_train(analyze).csv") # 학습용 승부예측 데이터
#train_target = pd.read_csv(f"{DATA_PATH}train_target_1226.csv") # 학습용 정답 데이터
test_tr = pd.read_csv(f"{DATA_PATH}players_test(analyze).csv") # 테스트용 승부예측 데이터
#test_target = pd.read_csv(f"{DATA_PATH}test_target_1226.csv") # 테스트용 정답 데이터

In [6]:
train_tr.shape, test_tr.shape

((50000, 87), (11670, 87))

# 전처리

In [7]:
train_ft = train_tr.copy()
test_ft = test_tr.copy()

In [8]:
train_ft.columns

Index(['gameid', 'league', 'split', 'playoffs', 'date', 'game', 'patch',
       'side', 'position', 'playername', 'teamname', 'champion', 'ban1',
       'ban2', 'ban3', 'ban4', 'ban5', 'gamelength', 'result', 'kills',
       'deaths', 'assists', 'doublekills', 'triplekills', 'quadrakills',
       'pentakills', 'firstblood', 'firstbloodkill', 'firstbloodassist',
       'firstbloodvictim', 'team kpm', 'ckpm', 'barons', 'opp_barons',
       'inhibitors', 'opp_inhibitors', 'damagetochampions', 'dpm',
       'damageshare', 'damagetakenperminute', 'damagemitigatedperminute',
       'wardsplaced', 'wpm', 'wardskilled', 'wcpm', 'controlwardsbought',
       'visionscore', 'vspm', 'totalgold', 'earnedgold', 'earned gpm',
       'earnedgoldshare', 'goldspent', 'total cs', 'minionkills',
       'monsterkills', 'cspm', 'goldat10', 'xpat10', 'csat10', 'opp_goldat10',
       'opp_xpat10', 'opp_csat10', 'golddiffat10', 'xpdiffat10', 'csdiffat10',
       'killsat10', 'assistsat10', 'deathsat10', 'opp_k

- player

In [9]:
fill_unknown_cols = [
    "split",
    "ban1",
    "ban2",
    "ban3",
    "ban4",
    "ban5",
]

train_ft[fill_unknown_cols] = train_ft[fill_unknown_cols].fillna("unknown")
test_ft[fill_unknown_cols] = test_ft[fill_unknown_cols].fillna("unknown")


In [10]:
train_ft.shape, test_ft.shape

((50000, 87), (11670, 87))

In [11]:
missing_columns = train_ft.columns[train_ft.isnull().sum() > 0]
null_samples = train_ft[train_ft[missing_columns].isnull().any(axis=1)]

for idx, row in null_samples.iterrows():
    player_history = train_ft[
        (train_ft["playername"] == row["playername"])
        & (train_ft["gameid"] < row["gameid"])
    ].sort_values("gameid", ascending=False)

    for col in missing_columns:
        if pd.isnull(row[col]):
            prev_5_mean = player_history[col].head(5).mean()
            if pd.notnull(prev_5_mean):
                train_ft.loc[idx, col] = prev_5_mean
            else:
                position_mean = train_ft[
                    (train_ft["position"] == row["position"])
                ][col].mean()
                train_ft.loc[idx, col] = position_mean

In [12]:
train_ft.isna().sum().sum()

0

In [13]:
missing_columns = test_ft.columns[test_ft.isnull().sum() > 0]
null_samples =  test_ft[ test_ft[missing_columns].isnull().any(axis=1)]

for idx, row in null_samples.iterrows():
    player_history = train_ft[
        (train_ft["playername"] == row["playername"])
        & (train_ft["gameid"] < row["gameid"])
    ].sort_values("gameid", ascending=False)

    for col in missing_columns:
        if pd.isnull(row[col]):
            prev_5_mean = player_history[col].head(5).mean()
            if pd.notnull(prev_5_mean):
                 test_ft.loc[idx, col] = prev_5_mean
            else:
                position_mean = train_ft[
                    (train_ft["position"] == row["position"])
                ][col].mean()
                test_ft.loc[idx, col] = position_mean

In [14]:
test_ft.isna().sum().sum()

0

# feature

In [15]:
# 선수들의 챔피언별 kda
# kda 계산
def calculate_kda(row):
    if row['deaths'] == 0: #데스가 0이면 1로 계산
        return (row['kills'] + row['assists']) / 1
    else:
        return (row['kills'] + row['assists']) / row['deaths']

# KDA 계산 및 정렬
train_ft['kda'] = train_ft.apply(calculate_kda, axis=1)
kda_sorted = train_ft.sort_values('kda', ascending=False)

# 결과 출력 (선수, 챔피언, KDA)
print(kda_sorted[['playername', 'champion', 'kda']])

      playername  champion   kda
48264       Orca      Hwei  33.0
48261        665  Xin Zhao  30.0
2304     Kaiwing     Yuumi  29.0
17389      scuro     Yuumi  28.0
46166    Disamis      Rell  28.0
...          ...       ...   ...
3862     Keaiduo    Viktor   0.0
7520      Hasmed      Ornn   0.0
43255    Odoamne      Gwen   0.0
3860        Huni      Gwen   0.0
18460       Dove    Aatrox   0.0

[50000 rows x 3 columns]


In [16]:
# 선수별 KDA 평균 계산
player_kda_mean = train_ft.groupby('playername')['kda'].mean()

player_kda_mean

Unnamed: 0_level_0,kda
playername,Unnamed: 1_level_1
1116,6.874500
113,4.121457
1Jiang,4.743561
1ssue,3.504762
2T,3.108931
...,...
xiaotu,1.766387
xyno,1.616071
yjy,2.204630
zay,5.840799


In [17]:
#이건 그냥 kda관련 데이터보다 해본거 피처x
def get_champion_kda(df, player_name):
    player_df = df[df['playername'] == player_name]
    champion_kda = player_df.groupby('champion')['kda'].mean()
    return champion_kda

player_champion_kda = get_champion_kda(train_ft, 'Zeus')
player_champion_kda.sort_values(ascending=False)

Unnamed: 0_level_0,kda
champion,Unnamed: 1_level_1
Akali,14.0
Maokai,13.0
Poppy,10.0
Karma,10.0
Fiora,6.357143
Rumble,6.21875
Sejuani,6.2
Gragas,5.996212
Twisted Fate,5.666667
Sion,5.560606


In [18]:
def calculate_win_rate_by_kda(df):
    # Group by player and calculate average KDA and win rate
    player_stats = df.groupby('playername').agg({'kda': 'mean', 'result': 'mean'})
    player_stats = player_stats.rename(columns={'result': 'win_rate'})
    return player_stats

player_win_rates = calculate_win_rate_by_kda(train_ft)
player_win_rates

Unnamed: 0_level_0,kda,win_rate
playername,Unnamed: 1_level_1,Unnamed: 2_level_1
1116,6.874500,0.600000
113,4.121457,0.404762
1Jiang,4.743561,0.590909
1ssue,3.504762,0.285714
2T,3.108931,0.352273
...,...,...
xiaotu,1.766387,0.102564
xyno,1.616071,0.125000
yjy,2.204630,0.166667
zay,5.840799,0.468750


In [19]:
# 포지션 선수별 챔피언에 따른 kda

pos_pn = train_ft[(train_ft['playername'] == 'Keria')]
pos_pn_cham = pos_pn.groupby(['playername', 'champion'])['assists'].mean() #kda 변경

pos_pn_cham.sort_values(ascending=False) #내림차순 정렬 출력

Unnamed: 0_level_0,Unnamed: 1_level_0,assists
playername,champion,Unnamed: 2_level_1
Keria,Lee Sin,14.0
Keria,Neeko,14.0
Keria,Sona,13.0
Keria,Morgana,12.0
Keria,Annie,11.0
Keria,Kalista,11.0
Keria,Nami,10.875
Keria,Rumble,10.5
Keria,Yuumi,10.4375
Keria,Renata Glasc,10.263158


In [20]:
train_ft.columns

Index(['gameid', 'league', 'split', 'playoffs', 'date', 'game', 'patch',
       'side', 'position', 'playername', 'teamname', 'champion', 'ban1',
       'ban2', 'ban3', 'ban4', 'ban5', 'gamelength', 'result', 'kills',
       'deaths', 'assists', 'doublekills', 'triplekills', 'quadrakills',
       'pentakills', 'firstblood', 'firstbloodkill', 'firstbloodassist',
       'firstbloodvictim', 'team kpm', 'ckpm', 'barons', 'opp_barons',
       'inhibitors', 'opp_inhibitors', 'damagetochampions', 'dpm',
       'damageshare', 'damagetakenperminute', 'damagemitigatedperminute',
       'wardsplaced', 'wpm', 'wardskilled', 'wcpm', 'controlwardsbought',
       'visionscore', 'vspm', 'totalgold', 'earnedgold', 'earned gpm',
       'earnedgoldshare', 'goldspent', 'total cs', 'minionkills',
       'monsterkills', 'cspm', 'goldat10', 'xpat10', 'csat10', 'opp_goldat10',
       'opp_xpat10', 'opp_csat10', 'golddiffat10', 'xpdiffat10', 'csdiffat10',
       'killsat10', 'assistsat10', 'deathsat10', 'opp_k

In [21]:
#선수별 최근 5경기 kda10분지표를 전반 kda 15분지표를 후반으로 계산하고 이후 다음경기 kda 예측

def predict_kda(df, player_name):
    player_df = df[df['playername'] == player_name]

    # 최근 5경기
    player_df = player_df.sort_values('gameid', ascending=False).head(5)

    #전후반 KDA계산(만약 데스 0이면 1로 계산)
    player_df['kda_at_10'] = (player_df['killsat10'] + player_df['assistsat10']) / player_df['deathsat10'].replace(0,1)
    player_df['kda_at_15'] = (player_df['killsat15'] + player_df['assistsat15']) / player_df['deathsat15'].replace(0,1)

    #kda 평균
    mean_kda_10 = player_df['kda_at_10'].mean()
    mean_kda_15 = player_df['kda_at_15'].mean()

    # 예측값 = 후반에서 전반 지표를 빼고 15분지표에 더함 -> 이부분은 계산 수정 할수도 있음
    kda_diff = mean_kda_15 - mean_kda_10
    predicted_kda = mean_kda_15 + kda_diff

    return mean_kda_10, mean_kda_15, predicted_kda

# Example usage for all players:
for player_name in train_ft['playername'].unique():
    kda_10, kda_15, predicted_kda = predict_kda(train_ft, player_name)
    print(f" 선수: {player_name}")
    print(f" 최근 5경기 전반 KDA: {kda_10:.2f}")
    print(f" 최근 5경기 후반 KDA: {kda_15:.2f}")
    print(f" 이후 예측 KDA: {predicted_kda:.2f}")
    print("-" * 20)

 선수: Kingen
 최근 5경기 전반 KDA: 0.20
 최근 5경기 후반 KDA: 1.00
 이후 예측 KDA: 1.80
--------------------
 선수: Pyosik
 최근 5경기 전반 KDA: 1.30
 최근 5경기 후반 KDA: 2.20
 이후 예측 KDA: 3.10
--------------------
 선수: Zeka
 최근 5경기 전반 KDA: 0.80
 최근 5경기 후반 KDA: 2.00
 이후 예측 KDA: 3.20
--------------------
 선수: Deft
 최근 5경기 전반 KDA: 1.50
 최근 5경기 후반 KDA: 3.70
 이후 예측 KDA: 5.90
--------------------
 선수: BeryL
 최근 5경기 전반 KDA: 1.00
 최근 5경기 후반 KDA: 1.60
 이후 예측 KDA: 2.20
--------------------
 선수: Dove
 최근 5경기 전반 KDA: 0.40
 최근 5경기 후반 KDA: 1.40
 이후 예측 KDA: 2.40
--------------------
 선수: Croco
 최근 5경기 전반 KDA: 0.40
 최근 5경기 후반 KDA: 0.80
 이후 예측 KDA: 1.20
--------------------
 선수: Clozer
 최근 5경기 전반 KDA: 0.80
 최근 5경기 후반 KDA: 2.00
 이후 예측 KDA: 3.20
--------------------
 선수: Envyy
 최근 5경기 전반 KDA: 0.20
 최근 5경기 후반 KDA: 0.90
 이후 예측 KDA: 1.60
--------------------
 선수: Kael
 최근 5경기 전반 KDA: 0.40
 최근 5경기 후반 KDA: 0.60
 이후 예측 KDA: 0.80
--------------------
 선수: Zeus
 최근 5경기 전반 KDA: 0.00
 최근 5경기 후반 KDA: 0.90
 이후 예측 KDA: 1.80
--------------------
 

In [22]:
train_ft.columns

Index(['gameid', 'league', 'split', 'playoffs', 'date', 'game', 'patch',
       'side', 'position', 'playername', 'teamname', 'champion', 'ban1',
       'ban2', 'ban3', 'ban4', 'ban5', 'gamelength', 'result', 'kills',
       'deaths', 'assists', 'doublekills', 'triplekills', 'quadrakills',
       'pentakills', 'firstblood', 'firstbloodkill', 'firstbloodassist',
       'firstbloodvictim', 'team kpm', 'ckpm', 'barons', 'opp_barons',
       'inhibitors', 'opp_inhibitors', 'damagetochampions', 'dpm',
       'damageshare', 'damagetakenperminute', 'damagemitigatedperminute',
       'wardsplaced', 'wpm', 'wardskilled', 'wcpm', 'controlwardsbought',
       'visionscore', 'vspm', 'totalgold', 'earnedgold', 'earned gpm',
       'earnedgoldshare', 'goldspent', 'total cs', 'minionkills',
       'monsterkills', 'cspm', 'goldat10', 'xpat10', 'csat10', 'opp_goldat10',
       'opp_xpat10', 'opp_csat10', 'golddiffat10', 'xpdiffat10', 'csdiffat10',
       'killsat10', 'assistsat10', 'deathsat10', 'opp_k

In [23]:
train_ft["date"] = pd.to_datetime(train_ft["date"])
test_ft["date"] = pd.to_datetime(test_ft["date"])

train_ft["year"] = train_ft["date"].dt.year
test_ft["year"] = test_ft["date"].dt.year

In [24]:
# 15분 cs지표를 15로  나눠 1분당 cs 계산
def calculate_avg_cs_per_minute(df):
    df['avg_cs_per_minute'] = df['csat15'] / 15
    return df

train_ft = calculate_avg_cs_per_minute(train_ft)
test_ft = calculate_avg_cs_per_minute(test_ft)

# 선수의 연도별 평균분당cs
print(train_ft.groupby(['playername', 'year'])['avg_cs_per_minute'].mean())

playername  year
1116        2023    9.633333
            2024    9.361404
113         2023    5.247619
1Jiang      2024    8.612121
1ssue       2022    1.698413
                      ...   
xyno        2024    7.916667
yjy         2023    7.248148
zay         2023    1.034722
            2024    1.404167
zorenous    2024    8.233333
Name: avg_cs_per_minute, Length: 1352, dtype: float64


In [31]:
train_ft.groupby(['playername', 'year'])['avg_cs_per_minute'].mean().loc['Chovy']

Unnamed: 0_level_0,avg_cs_per_minute
year,Unnamed: 1_level_1
2022,10.183019
2023,9.886216
2024,10.018803


In [26]:
#player는 테스트 컬럼 정리해야함