In [1]:
from google.colab import drive
drive.mount('/content/drive')

DATA_PATH = "/content/drive/MyDrive/datathon/data/"

SEED = 42

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import torch
from tqdm.auto import tqdm
import random
import os




def reset_seeds(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [3]:
train_tr = pd.read_csv(f"/content/drive/MyDrive/datathon/data/players_train.csv") # 학습용 승부예측 데이터
train_target = pd.read_csv(f"/content/drive/MyDrive/datathon/data/players_train_target.csv") # 학습용 정답 데이터
test_tr = pd.read_csv(f"/content/drive/MyDrive/datathon/data/players_test.csv") # 테스트용 승부예측 데이터
test_target = pd.read_csv(f"/content/drive/MyDrive/datathon/data/players_test_target.csv") # 테스트용 정답 데이터

In [4]:
train_tr.shape, test_tr.shape

((50000, 87), (11670, 87))

In [5]:
train_ft = train_tr.copy()
test_ft = test_tr.copy()

In [6]:
train_ft.columns

Index(['gameid', 'league', 'split', 'playoffs', 'date', 'game', 'patch',
       'side', 'position', 'playername', 'teamname', 'champion', 'ban1',
       'ban2', 'ban3', 'ban4', 'ban5', 'gamelength', 'result', 'kills',
       'deaths', 'assists', 'doublekills', 'triplekills', 'quadrakills',
       'pentakills', 'firstblood', 'firstbloodkill', 'firstbloodassist',
       'firstbloodvictim', 'team kpm', 'ckpm', 'barons', 'opp_barons',
       'inhibitors', 'opp_inhibitors', 'damagetochampions', 'dpm',
       'damageshare', 'damagetakenperminute', 'damagemitigatedperminute',
       'wardsplaced', 'wpm', 'wardskilled', 'wcpm', 'controlwardsbought',
       'visionscore', 'vspm', 'totalgold', 'earnedgold', 'earned gpm',
       'earnedgoldshare', 'goldspent', 'total cs', 'minionkills',
       'monsterkills', 'cspm', 'goldat10', 'xpat10', 'csat10', 'opp_goldat10',
       'opp_xpat10', 'opp_csat10', 'golddiffat10', 'xpdiffat10', 'csdiffat10',
       'killsat10', 'assistsat10', 'deathsat10', 'opp_k

In [7]:
fill_unknown_cols = [
    "split",
    "ban1",
    "ban2",
    "ban3",
    "ban4",
    "ban5",
]

train_ft[fill_unknown_cols] = train_ft[fill_unknown_cols].fillna("unknown")
test_ft[fill_unknown_cols] = test_ft[fill_unknown_cols].fillna("unknown")

In [8]:
train_ft.shape, test_ft.shape

((50000, 87), (11670, 87))

In [9]:
missing_columns = train_ft.columns[train_ft.isnull().sum() > 0]
null_samples = train_ft[train_ft[missing_columns].isnull().any(axis=1)]

for idx, row in null_samples.iterrows():
    player_history = train_ft[
        (train_ft["playername"] == row["playername"])
        & (train_ft["gameid"] < row["gameid"])
    ].sort_values("gameid", ascending=False)

    for col in missing_columns:
        if pd.isnull(row[col]):
            prev_5_mean = player_history[col].head(5).mean()
            if pd.notnull(prev_5_mean):
                train_ft.loc[idx, col] = prev_5_mean
            else:
                position_mean = train_ft[
                    (train_ft["position"] == row["position"])
                ][col].mean()
                train_ft.loc[idx, col] = position_mean

In [10]:
train_ft.isna().sum().sum()

0

In [11]:
missing_columns = test_ft.columns[test_ft.isnull().sum() > 0]
null_samples =  test_ft[ test_ft[missing_columns].isnull().any(axis=1)]

for idx, row in null_samples.iterrows():
    player_history = train_ft[
        (train_ft["playername"] == row["playername"])
        & (train_ft["gameid"] < row["gameid"])
    ].sort_values("gameid", ascending=False)

    for col in missing_columns:
        if pd.isnull(row[col]):
            prev_5_mean = player_history[col].head(5).mean()
            if pd.notnull(prev_5_mean):
                 test_ft.loc[idx, col] = prev_5_mean
            else:
                position_mean = train_ft[
                    (train_ft["position"] == row["position"])
                ][col].mean()
                test_ft.loc[idx, col] = position_mean

In [12]:
test_ft.isna().sum().sum()

0

In [13]:
train_data = pd.concat([train_ft, test_ft], ignore_index=True)  # 데이터프레임 병합

In [None]:
# def select_key_player(data):
#     key_players = data.groupby('teamname').apply(
#         lambda group: group.loc[group['kills'].idxmax()] if 'kills' in group.columns else None
#     )
#     return key_players

# key_players = select_key_player(train_data)  # key player 선정

# print(key_players)

                                   gameid league    split  playoffs  \
teamname                                                              
100 Thieves         ESPORTSTMNT03_2653320    LCS   Spring         0   
AS Esports          ESPORTSTMNT03_2884725    VCS   Summer         0   
Antic Esports             LOLTMNT04_32399    PCS   Spring         1   
Astralis            ESPORTSTMNT06_2769531    LEC   Winter         0   
BNK FEARX           ESPORTSTMNT04_2450994    LCK   Summer         0   
...                                   ...    ...      ...       ...   
Vivo Keyd Stars     ESPORTSTMNT01_3327386  CBLOL  Split 1         0   
Weibo Gaming           10413-10413_game_4   WLDs  unknown         0   
West Point Esports  ESPORTSTMNT03_3198513    PCS   Summer         0   
paiN Gaming         ESPORTSTMNT01_3368322  CBLOL  Split 2         0   
İstanbul Wildcats   ESPORTSTMNT02_3042003   WLDs  unknown         0   

                                date  game  patch  side position   playernam

  key_players = data.groupby('teamname').apply(


In [14]:
# LCK 리그 데이터만 필터링
lck_data = train_data[train_data['league'] == 'LCK']

In [15]:
def select_key_player(data):
    def get_key_player(group):
        # 비교할 지표
        metrics = ['dpm', 'vspm', 'earned gpm', 'goldat10', 'xpat10', 'xpat15', 'goldat15']
        # 포지션별로 성과 지표 합계로 비교하여 key player 선정
        key_player_by_position = group.groupby('position').apply(
            lambda position_group: position_group.loc[
                position_group[metrics].sum(axis=1).idxmax()
            ]
        )
        return key_player_by_position

    key_players = data.groupby('teamname').apply(get_key_player).reset_index(drop=True)
    return key_players

key_players = select_key_player(lck_data)  # key player 선정

# 특정 teamname의 key player를 출력하는 함수 추가
def get_team_key_players(teamname, key_players):
    return key_players[key_players['teamname'] == teamname]

# 특정 팀의 key player 예시 출력
target_team = 'T1'  # 확인하고 싶은 팀 이름
team_key_players = get_team_key_players(target_team, key_players)

# 특정 팀의 key player 출력
def print_team_key_players(teamname, team_key_players):
    print(f"Key players for team {teamname}:")
    print(team_key_players)

print_team_key_players(target_team, team_key_players)


Key players for team T1:
                   gameid league   split  playoffs              date  game  \
50  ESPORTSTMNT02_2557691    LCK  Spring         0   2022-02-09 9:03     2   
51  ESPORTSTMNT04_2411536    LCK  Summer         0  2022-07-10 11:58     2   
52       LOLTMNT03_128221    LCK  Summer         0   2024-07-18 8:56     2   
53       LOLTMNT01_125566    LCK  Summer         0  2024-06-14 11:50     2   
54  ESPORTSTMNT04_2210474    LCK  Spring         0   2022-03-13 8:27     2   

    patch  side position playername  ... opp_csat15 golddiffat15 xpdiffat15  \
50  12.02  Blue      bot   Gumayusi  ...      115.0       4441.0     1813.0   
51  12.12   Red      jng       Oner  ...       80.0       2388.0     2352.0   
52  14.13  Blue      mid      Faker  ...      134.0       2293.0     1431.0   
53  14.11  Blue      sup      Keria  ...       24.0       2221.0     3790.0   
54  12.04   Red      top       Zeus  ...       99.0       4358.0     3637.0   

   csdiffat15 killsat15 assists

  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_players = data.groupby('teamname').apply(get_key_player).reset_index(drop=True)


In [16]:
def select_key_player(data):
    def get_key_player(group):
        # 비교할 지표
        metrics = ['dpm', 'vspm', 'earned gpm', 'goldat10', 'xpat10', 'xpat15', 'goldat15']
        # 포지션별로 성과 지표 합계로 비교하여 key player 선정
        key_player_by_position = group.groupby('position').apply(
            lambda position_group: position_group.loc[
                position_group[metrics].sum(axis=1).idxmax()
            ]
        )
        return key_player_by_position

    key_players = data.groupby('teamname').apply(get_key_player).reset_index(drop=True)
    return key_players

# 주요 선수 선정
key_players = select_key_player(lck_data)

# 연도별 주요 선수를 확인하는 함수
def get_team_key_players_by_year(teamname, key_players):
    # 특정 팀 필터링 및 연도 추가
    team_key_players = key_players[key_players['teamname'] == teamname].copy()
    team_key_players['year'] = team_key_players['date'].str[:4].astype(int)  # 연도 추출
    # 연도별 정렬
    return team_key_players.sort_values(by='year')

# 팀 이름 입력
target_team = 'T1'  # 확인하고 싶은 팀 이름
team_key_players_by_year = get_team_key_players_by_year(target_team, key_players)

# 연도별 주요 선수 출력 함수
def print_team_key_players_by_year(teamname, team_key_players_by_year):
    print(f"Key players for team {teamname} by year:")
    for year, group in team_key_players_by_year.groupby('year'):
        print(f"Year {year}:")
        print(group[['position', 'playername']])  # 포지션과 선수 이름만 출력

# 연도별 주요 선수 출력
print_team_key_players_by_year(target_team, team_key_players_by_year)


  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(


Key players for team T1 by year:
Year 2022:
   position playername
50      bot   Gumayusi
51      jng       Oner
54      top       Zeus
Year 2024:
   position playername
52      mid      Faker
53      sup      Keria


  key_player_by_position = group.groupby('position').apply(
  key_players = data.groupby('teamname').apply(get_key_player).reset_index(drop=True)


In [17]:
# 2023년 데이터 확인
train_data['year'] = train_data['date'].str[:4].astype(int)
data_2023 = train_data[train_data['year'] == 2023]

print(f"2023년 데이터 개수: {data_2023.shape[0]}")
print(data_2023['teamname'].unique())  # 2023년에 포함된 팀 이름 확인


2023년 데이터 개수: 21135
['Dplus KIA' 'DRX' 'T1' 'Gen.G' 'Kwangdong Freecs' 'Hanwha Life Esports'
 'Nongshim RedForce' 'OKSavingsBank Brion' 'KT Rolster' 'BNK FEARX' 'LOUD'
 'paiN Gaming' 'GiantX' 'G2 Esports' 'Liberty' 'KaBuM! Esports' 'Rogue'
 'Team BDS' 'Los Grandes' 'Fluxo' 'RED Canids' 'INTZ' 'Astralis'
 'Team Heretics' 'Vivo Keyd Stars' 'FURIA' 'MAD Lions KOI' 'SK Gaming'
 'Team Vitality' 'Fnatic' 'Cloud9' '100 Thieves' 'Evil Geniuses'
 'Golden Guardians' 'Team Liquid' 'FlyQuest' 'NRG Kia' 'Dignitas'
 'Immortals' 'TSM' 'CTBC Flying Oyster' 'Beyond Gaming' 'PSG Talon'
 'West Point Esports' 'Dewish Team' 'Impunity' 'J Team'
 'Deep Cross Gaming' 'Frank Esports' 'HELL PIGS' 'MGN Vikings Esports'
 'Team Secret' 'Team Whales' 'MGN Blue Esports' 'Team Flash' 'GAM Esports'
 'SBTC Esports' 'CERBERUS Esports' 'Chiefs Esports Club' 'Team Bliss'
 'DetonatioN FocusMe' 'Bilibili Gaming' 'Movistar R7' 'JD Gaming'
 'OKSavingsBank BRION' 'LNG Esports' 'EDward Gaming' 'Top Esports'
 'Weibo Gaming' 'PSG

In [18]:
# T1의 2023년 데이터 확인
data_t1_2023 = data_2023[data_2023['teamname'] == 'T1']
print(data_t1_2023)


                      gameid league    split  playoffs              date  \
20685  ESPORTSTMNT04_2660040    LCK   Spring         0  2023-01-18 11:00   
20686  ESPORTSTMNT04_2660040    LCK   Spring         0  2023-01-18 11:00   
20687  ESPORTSTMNT04_2660040    LCK   Spring         0  2023-01-18 11:00   
20688  ESPORTSTMNT04_2660040    LCK   Spring         0  2023-01-18 11:00   
20689  ESPORTSTMNT04_2660040    LCK   Spring         0  2023-01-18 11:00   
...                      ...    ...      ...       ...               ...   
41795  ESPORTSTMNT03_3227847   WLDs  unknown         0  2023-11-19 10:44   
41796  ESPORTSTMNT03_3227847   WLDs  unknown         0  2023-11-19 10:44   
41797  ESPORTSTMNT03_3227847   WLDs  unknown         0  2023-11-19 10:44   
41798  ESPORTSTMNT03_3227847   WLDs  unknown         0  2023-11-19 10:44   
41799  ESPORTSTMNT03_3227847   WLDs  unknown         0  2023-11-19 10:44   

       game  patch  side position playername  ... golddiffat15 xpdiffat15  \
20685     

In [19]:
# 2023년 T1 데이터에서 결측값 확인
missing_columns = data_t1_2023.columns[data_t1_2023.isnull().any()]
print(f"결측값이 있는 컬럼: {missing_columns}")
print(data_t1_2023[missing_columns].isnull().sum())


결측값이 있는 컬럼: Index([], dtype='object')
Series([], dtype: float64)


In [20]:
# 주요 선수 선정 과정에서 2023년 데이터 확인
selected_key_players = select_key_player(data_2023)
print(selected_key_players[selected_key_players['teamname'] == 'T1'])


  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby

                    gameid league    split  playoffs              date  game  \
280  ESPORTSTMNT04_2673879    MSI  unknown         0  2023-05-10 13:52     3   
281  ESPORTSTMNT01_3328186    LCK   Spring         0   2023-03-19 6:51     2   
282  ESPORTSTMNT01_3372716    LCK   Summer         0   2023-06-15 9:10     2   
283  ESPORTSTMNT01_3372716    LCK   Summer         0   2023-06-15 9:10     2   
284  ESPORTSTMNT02_3162355    LCK   Spring         0  2023-02-23 12:33     2   

     patch side position playername  ... golddiffat15 xpdiffat15 csdiffat15  \
280  13.08  Red      bot   Gumayusi  ...       3657.0        3.0       43.0   
281  13.05  Red      jng       Oner  ...       2973.0      914.0       12.0   
282  13.11  Red      mid      Faker  ...        348.0      610.0        7.0   
283  13.11  Red      sup      Keria  ...        936.0     1865.0      -17.0   
284  13.03  Red      top       Zeus  ...       1668.0      738.0       46.0   

    killsat15 assistsat15 deathsat15 opp_kil

  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_players = data.groupby('teamname').apply(get_key_player).reset_index(drop=True)


In [21]:
def select_key_player(data):
    def get_key_player(group):
        metrics = ['dpm', 'vspm', 'earned gpm', 'goldat10', 'xpat10', 'xpat15', 'goldat15']
        group = group.fillna(0)  # 결측값 처리
        key_player_by_position = group.groupby('position', group_keys=False).apply(
            lambda position_group: position_group.loc[
                position_group[metrics].sum(axis=1).idxmax()
            ]
        )
        return key_player_by_position

    key_players = data.groupby('teamname', group_keys=False).apply(get_key_player).reset_index(drop=True)
    return key_players


In [22]:
def select_key_player(data):
    def get_key_player(group):
        # 비교할 지표
        metrics = ['dpm', 'vspm', 'earned gpm', 'goldat10', 'xpat10', 'xpat15', 'goldat15']
        # 포지션별로 성과 지표 합계로 비교하여 key player 선정
        key_player_by_position = group.groupby('position').apply(
            lambda position_group: position_group.loc[
                position_group[metrics].sum(axis=1).idxmax()
            ]
        )
        return key_player_by_position

    key_players = data.groupby('teamname').apply(get_key_player).reset_index(drop=True)
    return key_players

# 주요 선수 선정
key_players = select_key_player(lck_data)

# 연도별 주요 선수를 확인하는 함수
def get_team_key_players_by_year(teamname, key_players):
    # 특정 팀 필터링 및 연도 추가
    team_key_players = key_players[key_players['teamname'] == teamname].copy()
    team_key_players['year'] = team_key_players['date'].str[:4].astype(int)  # 연도 추출
    # 연도별 정렬
    return team_key_players.sort_values(by='year')

# 팀 이름 입력
target_team = 'T1'  # 확인하고 싶은 팀 이름
team_key_players_by_year = get_team_key_players_by_year(target_team, key_players)

# 연도별 주요 선수 출력 함수
def print_team_key_players_by_year(teamname, team_key_players_by_year):
    print(f"Key players for team {teamname} by year:")
    for year, group in team_key_players_by_year.groupby('year'):
        print(f"Year {year}:")
        print(group[['position', 'playername']])  # 포지션과 선수 이름만 출력

# 연도별 주요 선수 출력
print_team_key_players_by_year(target_team, team_key_players_by_year)


Key players for team T1 by year:
Year 2022:
   position playername
50      bot   Gumayusi
51      jng       Oner
54      top       Zeus
Year 2024:
   position playername
52      mid      Faker
53      sup      Keria


  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_player_by_position = group.groupby('position').apply(
  key_players = data.groupby('teamname').apply(get_key_player).reset_index(drop=True)


In [23]:
def select_key_player_by_patch(data):
    def get_key_player(group):
        # 비교할 지표
        metrics = ['dpm', 'vspm', 'earned gpm', 'goldat10', 'xpat10', 'xpat15', 'goldat15']
        # 포지션별로 성과 지표 합계로 비교하여 key player 선정
        key_player_by_position = group.groupby('position', group_keys=False).apply(
            lambda position_group: position_group.loc[
                position_group[metrics].sum(axis=1).idxmax()
            ]
        )
        return key_player_by_position

    # patch별 주요 선수 선정
    key_players_by_patch = data.groupby(['teamname', 'patch'], group_keys=False).apply(get_key_player).reset_index(drop=True)
    return key_players_by_patch

# 주요 선수 선정 실행
key_players_by_patch = select_key_player_by_patch(lck_data)

# 특정 팀의 patch별 주요 선수 확인
def get_team_key_players_by_patch(teamname, key_players_by_patch):
    # 특정 팀 필터링
    team_key_players = key_players_by_patch[key_players_by_patch['teamname'] == teamname]
    # patch별 정렬
    return team_key_players.sort_values(by='patch')

# 팀 이름 입력
target_team = 'T1'  # 확인하고 싶은 팀 이름
team_key_players_by_patch = get_team_key_players_by_patch(target_team, key_players_by_patch)

# Patch별 주요 선수 출력
def print_team_key_players_by_patch(teamname, team_key_players_by_patch):
    print(f"Key players for team {teamname} by patch:")
    for patch, group in team_key_players_by_patch.groupby('patch'):
        print(f"Patch {patch}:")
        print(group[['position', 'playername']])  # 포지션과 선수 이름만 출력

# 결과 출력
print_team_key_players_by_patch(target_team, team_key_players_by_patch)


  key_player_by_position = group.groupby('position', group_keys=False).apply(
  key_player_by_position = group.groupby('position', group_keys=False).apply(
  key_player_by_position = group.groupby('position', group_keys=False).apply(
  key_player_by_position = group.groupby('position', group_keys=False).apply(
  key_player_by_position = group.groupby('position', group_keys=False).apply(
  key_player_by_position = group.groupby('position', group_keys=False).apply(
  key_player_by_position = group.groupby('position', group_keys=False).apply(
  key_player_by_position = group.groupby('position', group_keys=False).apply(
  key_player_by_position = group.groupby('position', group_keys=False).apply(
  key_player_by_position = group.groupby('position', group_keys=False).apply(
  key_player_by_position = group.groupby('position', group_keys=False).apply(
  key_player_by_position = group.groupby('position', group_keys=False).apply(
  key_player_by_position = group.groupby('position', group_keys=

Key players for team T1 by patch:
Patch 12.01:
     position playername
1355      bot   Gumayusi
1356      jng       Oner
1357      mid      Faker
1358      sup      Keria
1359      top       Zeus
Patch 12.02:
     position playername
1364      top       Zeus
1362      mid      Faker
1363      sup      Keria
1360      bot   Gumayusi
1361      jng       Oner
Patch 12.03:
     position playername
1365      bot   Gumayusi
1366      jng       Oner
1367      mid      Faker
1368      sup      Keria
1369      top       Zeus
Patch 12.04:
     position playername
1374      top       Zeus
1372      mid      Faker
1373      sup      Keria
1370      bot   Gumayusi
1371      jng       Oner
Patch 12.05:
     position playername
1375      bot   Gumayusi
1376      jng       Oner
1377      mid      Faker
1378      sup      Keria
1379      top       Zeus
Patch 12.1:
     position playername
1382      mid      Faker
1384      top       Zeus
1383      sup      Keria
1381      jng       Oner
1380      bot 

  key_players_by_patch = data.groupby(['teamname', 'patch'], group_keys=False).apply(get_key_player).reset_index(drop=True)
