In [2]:
from google.colab import drive
drive.mount('/content/drive')

DATA_PATH = "/content/drive/MyDrive/datathon/data/"

SEED = 42

Mounted at /content/drive


In [3]:
import pandas as pd
import numpy as np
import torch
from tqdm.auto import tqdm
import random
import os

def reset_seeds(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [4]:
train_tr = pd.read_csv(f"{DATA_PATH}players_train(analyze).csv") # 학습용 승부예측 데이터
#train_target = pd.read_csv(f"{DATA_PATH}train_target_1226.csv") # 학습용 정답 데이터
test_tr = pd.read_csv(f"{DATA_PATH}players_test(analyze).csv") # 테스트용 승부예측 데이터
#test_target = pd.read_csv(f"{DATA_PATH}test_target_1226.csv") # 테스트용 정답 데이터

In [6]:
train_tr.shape, test_tr.shape

((50000, 87), (11670, 87))

# 전처리

In [8]:
train_ft = train_tr.copy()
test_ft = test_tr.copy()

In [9]:
train_ft.columns

Index(['gameid', 'league', 'split', 'playoffs', 'date', 'game', 'patch',
       'side', 'position', 'playername', 'teamname', 'champion', 'ban1',
       'ban2', 'ban3', 'ban4', 'ban5', 'gamelength', 'result', 'kills',
       'deaths', 'assists', 'doublekills', 'triplekills', 'quadrakills',
       'pentakills', 'firstblood', 'firstbloodkill', 'firstbloodassist',
       'firstbloodvictim', 'team kpm', 'ckpm', 'barons', 'opp_barons',
       'inhibitors', 'opp_inhibitors', 'damagetochampions', 'dpm',
       'damageshare', 'damagetakenperminute', 'damagemitigatedperminute',
       'wardsplaced', 'wpm', 'wardskilled', 'wcpm', 'controlwardsbought',
       'visionscore', 'vspm', 'totalgold', 'earnedgold', 'earned gpm',
       'earnedgoldshare', 'goldspent', 'total cs', 'minionkills',
       'monsterkills', 'cspm', 'goldat10', 'xpat10', 'csat10', 'opp_goldat10',
       'opp_xpat10', 'opp_csat10', 'golddiffat10', 'xpdiffat10', 'csdiffat10',
       'killsat10', 'assistsat10', 'deathsat10', 'opp_k

- player

In [10]:
fill_unknown_cols = [
    "split",
    "ban1",
    "ban2",
    "ban3",
    "ban4",
    "ban5",
]

train_ft[fill_unknown_cols] = train_ft[fill_unknown_cols].fillna("unknown")
test_ft[fill_unknown_cols] = test_ft[fill_unknown_cols].fillna("unknown")


In [12]:
train_ft.shape, test_ft.shape

((50000, 87), (11670, 87))

In [13]:
missing_columns = train_ft.columns[train_ft.isnull().sum() > 0]
null_samples = train_ft[train_ft[missing_columns].isnull().any(axis=1)]

for idx, row in null_samples.iterrows():
    player_history = train_ft[
        (train_ft["playername"] == row["playername"])
        & (train_ft["gameid"] < row["gameid"])
    ].sort_values("gameid", ascending=False)

    for col in missing_columns:
        if pd.isnull(row[col]):
            prev_5_mean = player_history[col].head(5).mean()
            if pd.notnull(prev_5_mean):
                train_ft.loc[idx, col] = prev_5_mean
            else:
                position_mean = train_ft[
                    (train_ft["position"] == row["position"])
                ][col].mean()
                train_ft.loc[idx, col] = position_mean

In [14]:
train_ft.isna().sum().sum()

0

In [16]:
missing_columns = test_ft.columns[test_ft.isnull().sum() > 0]
null_samples =  test_ft[ test_ft[missing_columns].isnull().any(axis=1)]

for idx, row in null_samples.iterrows():
    player_history = train_ft[
        (train_ft["playername"] == row["playername"])
        & (train_ft["gameid"] < row["gameid"])
    ].sort_values("gameid", ascending=False)

    for col in missing_columns:
        if pd.isnull(row[col]):
            prev_5_mean = player_history[col].head(5).mean()
            if pd.notnull(prev_5_mean):
                 test_ft.loc[idx, col] = prev_5_mean
            else:
                position_mean = train_ft[
                    (train_ft["position"] == row["position"])
                ][col].mean()
                test_ft.loc[idx, col] = position_mean

In [17]:
test_ft.isna().sum().sum()

0

# feature

In [21]:
# 선수들의 챔피언별 kda
# kda 계산
def calculate_kda(row):
    if row['deaths'] == 0: #데스가 0이면 1로 계산
        return (row['kills'] + row['assists']) / 1
    else:
        return (row['kills'] + row['assists']) / row['deaths']

# KDA 계산 및 정렬
train_ft['kda'] = train_ft.apply(calculate_kda, axis=1)
kda_sorted = train_ft.sort_values('kda', ascending=False)

# 결과 출력 (선수, 챔피언, KDA)
print(kda_sorted[['playername', 'champion', 'kda']])

      playername  champion   kda
48264       Orca      Hwei  33.0
48261        665  Xin Zhao  30.0
2304     Kaiwing     Yuumi  29.0
17389      scuro     Yuumi  28.0
46166    Disamis      Rell  28.0
...          ...       ...   ...
3862     Keaiduo    Viktor   0.0
7520      Hasmed      Ornn   0.0
43255    Odoamne      Gwen   0.0
3860        Huni      Gwen   0.0
18460       Dove    Aatrox   0.0

[50000 rows x 3 columns]


In [25]:
# 선수별 KDA 평균 계산
player_kda_mean = train_ft.groupby('playername')['kda'].mean()

player_kda_mean

Unnamed: 0_level_0,kda
playername,Unnamed: 1_level_1
1116,6.874500
113,4.121457
1Jiang,4.743561
1ssue,3.504762
2T,3.108931
...,...
xiaotu,1.766387
xyno,1.616071
yjy,2.204630
zay,5.840799


In [53]:
#이건 그냥 kda관련 데이터보다 해본거 피처x
def get_champion_kda(df, player_name):
    player_df = df[df['playername'] == player_name]
    champion_kda = player_df.groupby('champion')['kda'].mean()
    return champion_kda

player_champion_kda = get_champion_kda(train_ft, 'Zeus')
player_champion_kda

Unnamed: 0_level_0,kda
champion,Unnamed: 1_level_1
Aatrox,5.05463
Akali,14.0
Camille,3.502083
Fiora,6.357143
Gangplank,5.380952
Gnar,5.022917
Gragas,5.996212
Graves,0.857143
Gwen,4.608974
Irelia,4.625


In [56]:
def calculate_win_rate_by_kda(df):
    # Group by player and calculate average KDA and win rate
    player_stats = df.groupby('playername').agg({'kda': 'mean', 'result': 'mean'})
    player_stats = player_stats.rename(columns={'result': 'win_rate'})
    return player_stats

player_win_rates = calculate_win_rate_by_kda(train_ft)
player_win_rates

Unnamed: 0_level_0,kda,win_rate
playername,Unnamed: 1_level_1,Unnamed: 2_level_1
1116,6.874500,0.600000
113,4.121457,0.404762
1Jiang,4.743561,0.590909
1ssue,3.504762,0.285714
2T,3.108931,0.352273
...,...,...
xiaotu,1.766387,0.102564
xyno,1.616071,0.125000
yjy,2.204630,0.166667
zay,5.840799,0.468750
