In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np

DATA_PATH = "/content/drive/MyDrive/Datathon_leeya/data/"
SEED = 42

In [None]:
teams_train = pd.read_csv(f"{DATA_PATH}teams_train.csv")
teams_test = pd.read_csv(f"{DATA_PATH}teams_test.csv")
players_train = pd.read_csv(f"{DATA_PATH}players_train.csv")
players_test = pd.read_csv(f"{DATA_PATH}players_test.csv")

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
from datetime import datetime
import pytz

In [None]:
league_locations = {
    "LCK": "Asia/Seoul",
    "LEC": "Europe/Berlin",
    "LCS": "America/Los_Angeles",
    "CBLOL": "America/Sao_Paulo",
    "PCS": "Asia/Taipei",
    "VCS": "Asia/Ho_Chi_Minh",
    "MSI": {
        2022: "Asia/Seoul",
        2023: "Europe/London",
        2024: "Asia/Shanghai"
    },
    "WLDs": {
        2022: "America/Los_Angeles",
        2023: "Asia/Seoul",
        2024: "Europe/Berlin"
    }
}


- local time

In [None]:
for i in range(len(players_train)):
    date_str = players_train.loc[i, 'date']

    if len(date_str.split(' ')[1].split(':')[0]) == 1:
        date_str = date_str.replace(' ', ' 0', 1)

    if len(date_str.split(':')) == 2:
        date_str += ':00'

    players_train.loc[i, 'date'] = date_str

In [None]:
utc = pytz.timezone('UTC')

# 결과 저장할 리스트
local_times = []

# 각 행에 대해 처리
for i in range(len(players_train)):
    utc_time_str = players_train.loc[i, 'date']  # UTC 시간 문자열
    league = players_train.loc[i, 'league']  # 리그 이름

    # UTC 시간을 datetime 객체로 변환
    utc_time = datetime.strptime(utc_time_str, '%Y-%m-%d %H:%M:%S')
    utc_time = utc.localize(utc_time)

    # 연도 추출
    year = utc_time.year

    # 리그의 시간대 가져오기
    if league in league_locations:
        if isinstance(league_locations[league], dict):  # MSI나 WLDs처럼 연도별로 다른 경우
            local_tz = pytz.timezone(league_locations[league].get(year, "UTC"))  # 연도 없으면 기본 UTC
        else:
            local_tz = pytz.timezone(league_locations[league])  # 일반 리그의 시간대
        local_time = utc_time.astimezone(local_tz)  # 로컬 시간대로 변환
        local_time_str = local_time.strftime('%Y-%m-%d %H:%M:%S')  # 문자열로 변환
    else:
        local_time_str = utc_time_str  # 시간대가 없으면 기본 UTC 사용

    # 결과 저장
    local_times.append(local_time_str)

# 변환된 시간 추가
players_train['local_time'] = local_times


In [None]:
for i in range(len(players_test)):
    date_str = players_test.loc[i, 'date']

    if len(date_str.split(' ')[1].split(':')[0]) == 1:
        date_str = date_str.replace(' ', ' 0', 1)

    if len(date_str.split(':')) == 2:
        date_str += ':00'

    players_test.loc[i, 'date'] = date_str

utc = pytz.timezone('UTC')

# 결과 저장할 리스트
local_times = []

# 각 행에 대해 처리
for i in range(len(players_test)):
    utc_time_str = players_test.loc[i, 'date']  # UTC 시간 문자열
    league = players_test.loc[i, 'league']  # 리그 이름

    # UTC 시간을 datetime 객체로 변환
    utc_time = datetime.strptime(utc_time_str, '%Y-%m-%d %H:%M:%S')
    utc_time = utc.localize(utc_time)

    # 연도 추출
    year = utc_time.year

    # 리그의 시간대 가져오기
    if league in league_locations:
        if isinstance(league_locations[league], dict):  # MSI나 WLDs처럼 연도별로 다른 경우
            local_tz = pytz.timezone(league_locations[league].get(year, "UTC"))  # 연도 없으면 기본 UTC
        else:
            local_tz = pytz.timezone(league_locations[league])  # 일반 리그의 시간대
        local_time = utc_time.astimezone(local_tz)  # 로컬 시간대로 변환
        local_time_str = local_time.strftime('%Y-%m-%d %H:%M:%S')  # 문자열로 변환
    else:
        local_time_str = utc_time_str  # 시간대가 없으면 기본 UTC 사용

    # 결과 저장
    local_times.append(local_time_str)

# 변환된 시간 추가
players_test['local_time'] = local_times


In [None]:
for i in range(len(teams_test)):
    date_str = teams_test.loc[i, 'date']

    if len(date_str.split(' ')[1].split(':')[0]) == 1:
        date_str = date_str.replace(' ', ' 0', 1)

    if len(date_str.split(':')) == 2:
        date_str += ':00'

    teams_test.loc[i, 'date'] = date_str

utc = pytz.timezone('UTC')

# 결과 저장할 리스트
local_times = []

# 각 행에 대해 처리
for i in range(len(teams_test)):
    utc_time_str = teams_test.loc[i, 'date']  # UTC 시간 문자열
    league = teams_test.loc[i, 'league']  # 리그 이름

    # UTC 시간을 datetime 객체로 변환
    utc_time = datetime.strptime(utc_time_str, '%Y-%m-%d %H:%M:%S')
    utc_time = utc.localize(utc_time)

    # 연도 추출
    year = utc_time.year

    # 리그의 시간대 가져오기
    if league in league_locations:
        if isinstance(league_locations[league], dict):  # MSI나 WLDs처럼 연도별로 다른 경우
            local_tz = pytz.timezone(league_locations[league].get(year, "UTC"))  # 연도 없으면 기본 UTC
        else:
            local_tz = pytz.timezone(league_locations[league])  # 일반 리그의 시간대
        local_time = utc_time.astimezone(local_tz)  # 로컬 시간대로 변환
        local_time_str = local_time.strftime('%Y-%m-%d %H:%M:%S')  # 문자열로 변환
    else:
        local_time_str = utc_time_str  # 시간대가 없으면 기본 UTC 사용

    # 결과 저장
    local_times.append(local_time_str)

# 변환된 시간 추가
teams_test['local_time'] = local_times


In [None]:
for i in range(len(teams_train)):
    date_str = teams_train.loc[i, 'date']

    if len(date_str.split(' ')[1].split(':')[0]) == 1:
        date_str = date_str.replace(' ', ' 0', 1)

    if len(date_str.split(':')) == 2:
        date_str += ':00'

    teams_train.loc[i, 'date'] = date_str

utc = pytz.timezone('UTC')

# 결과 저장할 리스트
local_times = []

# 각 행에 대해 처리
for i in range(len(teams_train)):
    utc_time_str = teams_train.loc[i, 'date']  # UTC 시간 문자열
    league = teams_train.loc[i, 'league']  # 리그 이름

    # UTC 시간을 datetime 객체로 변환
    utc_time = datetime.strptime(utc_time_str, '%Y-%m-%d %H:%M:%S')
    utc_time = utc.localize(utc_time)

    # 연도 추출
    year = utc_time.year

    # 리그의 시간대 가져오기
    if league in league_locations:
        if isinstance(league_locations[league], dict):  # MSI나 WLDs처럼 연도별로 다른 경우
            local_tz = pytz.timezone(league_locations[league].get(year, "UTC"))  # 연도 없으면 기본 UTC
        else:
            local_tz = pytz.timezone(league_locations[league])  # 일반 리그의 시간대
        local_time = utc_time.astimezone(local_tz)  # 로컬 시간대로 변환
        local_time_str = local_time.strftime('%Y-%m-%d %H:%M:%S')  # 문자열로 변환
    else:
        local_time_str = utc_time_str  # 시간대가 없으면 기본 UTC 사용

    # 결과 저장
    local_times.append(local_time_str)

# 변환된 시간 추가
teams_train['local_time'] = local_times


In [None]:
players_train.head(2)

Unnamed: 0,gameid,league,split,playoffs,date,game,patch,side,position,playername,teamname,champion,ban1,ban2,ban3,ban4,ban5,gamelength,result,kills,deaths,assists,doublekills,triplekills,quadrakills,pentakills,firstblood,firstbloodkill,firstbloodassist,firstbloodvictim,team kpm,ckpm,barons,opp_barons,inhibitors,opp_inhibitors,damagetochampions,dpm,damageshare,damagetakenperminute,damagemitigatedperminute,wardsplaced,wpm,wardskilled,wcpm,controlwardsbought,visionscore,vspm,totalgold,earnedgold,earned gpm,earnedgoldshare,goldspent,total cs,minionkills,monsterkills,cspm,goldat10,xpat10,csat10,opp_goldat10,opp_xpat10,opp_csat10,golddiffat10,xpdiffat10,csdiffat10,killsat10,assistsat10,deathsat10,opp_killsat10,opp_assistsat10,opp_deathsat10,goldat15,xpat15,csat15,opp_goldat15,opp_xpat15,opp_csat15,golddiffat15,xpdiffat15,csdiffat15,killsat15,assistsat15,deathsat15,opp_killsat15,opp_assistsat15,opp_deathsat15
0,ESPORTSTMNT01_2700815,LCK,Spring,0,2022-01-12 6:20,1,12.01,Blue,top,Kingen,DRX,Graves,Diana,Caitlyn,Twisted Fate,LeBlanc,Viktor,2195,0,0,5,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1367,0.5194,0.0,0.0,0.0,1.0,18483,505.2301,0.369379,474.5877,645.1298,11,0.3007,5,0.1367,9,35,0.9567,13189,8435,230.5695,0.210996,12875,315.0,270.0,45,8.6105,3170.0,4619.0,83.0,3223.0,4640.0,89.0,-53.0,-21.0,-6.0,0.0,0.0,0.0,0.0,0.0,0.0,4763.0,6943.0,127.0,4965.0,7658.0,135.0,-202.0,-715.0,-8.0,0.0,0.0,1.0,0.0,1.0,0.0
1,ESPORTSTMNT01_2700815,LCK,Spring,0,2022-01-12 6:20,1,12.01,Blue,jng,Pyosik,DRX,Viego,Diana,Caitlyn,Twisted Fate,LeBlanc,Viktor,2195,0,1,3,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1367,0.5194,0.0,2.0,0.0,1.0,3799,103.8451,0.075922,640.0456,568.0182,13,0.3554,15,0.41,13,58,1.5854,10792,6038,165.0478,0.151037,10375,178.0,27.0,151,4.8656,3075.0,3131.0,63.0,3036.0,3180.0,64.0,39.0,-49.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,5107.0,5355.0,89.0,4867.0,5626.0,104.0,240.0,-271.0,-15.0,1.0,2.0,0.0,1.0,0.0,1.0


- start time / end time

In [None]:
#players_train['gamelength'] 얘 60초 단위

players_train['start_time'] = players_train['local_time']
players_train = players_train.drop(columns=['local_time'])


In [None]:
players_train.head(2)

In [None]:
players_train['gamelength'] = pd.to_numeric(players_train['gamelength'])
players_train['start_time'] = pd.to_datetime(players_train['start_time'])

players_train['end_time'] = players_train['start_time'] + pd.to_timedelta(players_train['gamelength'], unit='s')

In [None]:
players_train.head(2)

- 시간 정규화 > cos sin

In [None]:
# 시간을  0~23 범위로 정규화 > 시간 주기성
players_train['hour'] = players_train['start_time'].dt.hour
players_train['hour_sin'] = np.sin(2 * np.pi * players_train['hour'] / 24)
players_train['hour_cos'] = np.cos(2 * np.pi * players_train['hour'] / 24)


- 4시간 단위 그룹으로 승률 계산 > 흠 근데 이거 숫자가 다 0.5로 나와서 통계분석 ㄱㄱ했는데 무의미하게 나옴 ㅜㅜ 다시 검토 할 예정

In [None]:
# 4시간 단위 그룹으로 승률 계산
hour_group_winrate = (
    players_train.groupby(players_train['hour'] // 4)['result']
    .mean()
    .reset_index()
    .rename(columns={'result': 'win_rate', 'hour': 'hour_group'})
)

players_train['win_rate'] = players_train['hour'].apply(
    lambda x: hour_group_winrate.loc[hour_group_winrate['hour_group'] == x // 4, 'win_rate'].values[0]
)


In [None]:
from scipy.stats import f_oneway

# 통계분석
hourly_groups = [group['result'].values for _, group in players_train.groupby(players_train['hour'] // 4)]

# ANOVA 검정
f_stat, p_value = f_oneway(*hourly_groups)
print(f"F-statistic: {f_stat}, P-value: {p_value}")


In [None]:
players_train['start_time'] = pd.to_datetime(players_train['start_time'])
players_train['date'] = players_train['start_time'].dt.date

# 같은 날 경기 수
players_train['games_per_day'] = players_train.groupby(['playername', 'date'])['start_time'].transform('count')

# 연속 경기 시간 간격
players_train['prev_game_time'] = players_train.groupby('playername')['start_time'].shift(1)
players_train['time_since_last_game'] = (players_train['start_time'] - players_train['prev_game_time']).dt.total_seconds() / 3600

# 연속 경기 여부 (6시간 이내면 연속 경기로 ㄱㄱ)
players_train['is_back_to_back'] = (players_train['time_since_last_game'] <= 6).astype(int)

In [None]:
players_train['time_since_last_game'] = players_train['time_since_last_game'].fillna(0)
players_train.head(5)

Unnamed: 0,gameid,league,split,playoffs,date,game,patch,side,position,playername,teamname,champion,ban1,ban2,ban3,ban4,ban5,gamelength,result,kills,deaths,assists,doublekills,triplekills,quadrakills,pentakills,firstblood,firstbloodkill,firstbloodassist,firstbloodvictim,team kpm,ckpm,barons,opp_barons,inhibitors,opp_inhibitors,damagetochampions,dpm,damageshare,damagetakenperminute,damagemitigatedperminute,wardsplaced,wpm,wardskilled,wcpm,controlwardsbought,visionscore,vspm,totalgold,earnedgold,earned gpm,earnedgoldshare,goldspent,total cs,minionkills,monsterkills,cspm,goldat10,xpat10,csat10,opp_goldat10,opp_xpat10,opp_csat10,golddiffat10,xpdiffat10,csdiffat10,killsat10,assistsat10,deathsat10,opp_killsat10,opp_assistsat10,opp_deathsat10,goldat15,xpat15,csat15,opp_goldat15,opp_xpat15,opp_csat15,golddiffat15,xpdiffat15,csdiffat15,killsat15,assistsat15,deathsat15,opp_killsat15,opp_assistsat15,opp_deathsat15,start_time,end_time,hour,hour_sin,hour_cos,games_per_day,prev_game_time,time_since_last_game,is_back_to_back
0,ESPORTSTMNT01_2700815,LCK,Spring,0,2022-01-12,1,12.01,Blue,top,Kingen,DRX,Graves,Diana,Caitlyn,Twisted Fate,LeBlanc,Viktor,2195,0,0,5,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1367,0.5194,0.0,0.0,0.0,1.0,18483,505.2301,0.369379,474.5877,645.1298,11,0.3007,5,0.1367,9,35,0.9567,13189,8435,230.5695,0.210996,12875,315.0,270.0,45,8.6105,3170.0,4619.0,83.0,3223.0,4640.0,89.0,-53.0,-21.0,-6.0,0.0,0.0,0.0,0.0,0.0,0.0,4763.0,6943.0,127.0,4965.0,7658.0,135.0,-202.0,-715.0,-8.0,0.0,0.0,1.0,0.0,1.0,0.0,2022-01-12 15:20:00,2022-01-12 15:56:35,15,-0.707107,-0.707107,2,NaT,0.0,0
1,ESPORTSTMNT01_2700815,LCK,Spring,0,2022-01-12,1,12.01,Blue,jng,Pyosik,DRX,Viego,Diana,Caitlyn,Twisted Fate,LeBlanc,Viktor,2195,0,1,3,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1367,0.5194,0.0,2.0,0.0,1.0,3799,103.8451,0.075922,640.0456,568.0182,13,0.3554,15,0.41,13,58,1.5854,10792,6038,165.0478,0.151037,10375,178.0,27.0,151,4.8656,3075.0,3131.0,63.0,3036.0,3180.0,64.0,39.0,-49.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,5107.0,5355.0,89.0,4867.0,5626.0,104.0,240.0,-271.0,-15.0,1.0,2.0,0.0,1.0,0.0,1.0,2022-01-12 15:20:00,2022-01-12 15:56:35,15,-0.707107,-0.707107,2,NaT,0.0,0
2,ESPORTSTMNT01_2700815,LCK,Spring,0,2022-01-12,1,12.01,Blue,mid,Zeka,DRX,Ryze,Diana,Caitlyn,Twisted Fate,LeBlanc,Viktor,2195,0,0,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1367,0.5194,0.0,0.0,0.0,0.0,7658,209.3303,0.153044,490.6879,271.6811,19,0.5194,5,0.1367,7,39,1.0661,14226,9472,258.9157,0.236936,14075,351.0,304.0,47,9.5945,3361.0,4914.0,92.0,3326.0,4770.0,91.0,35.0,144.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,5589.0,7592.0,144.0,5151.0,7796.0,151.0,438.0,-204.0,-7.0,0.0,0.0,0.0,0.0,0.0,0.0,2022-01-12 15:20:00,2022-01-12 15:56:35,15,-0.707107,-0.707107,2,NaT,0.0,0
3,ESPORTSTMNT01_2700815,LCK,Spring,0,2022-01-12,1,12.01,Blue,bot,Deft,DRX,Aphelios,Diana,Caitlyn,Twisted Fate,LeBlanc,Viktor,2195,0,3,2,2,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.1367,0.5194,0.0,0.0,0.0,0.0,15003,410.1048,0.299832,302.7062,462.9431,20,0.5467,8,0.2187,5,58,1.5854,16249,11495,314.2141,0.287539,14825,352.0,314.0,38,9.6219,3259.0,3186.0,86.0,3092.0,3109.0,80.0,167.0,77.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,7171.0,5691.0,140.0,4322.0,4431.0,116.0,2849.0,1260.0,24.0,2.0,2.0,0.0,0.0,0.0,2.0,2022-01-12 15:20:00,2022-01-12 15:56:35,15,-0.707107,-0.707107,2,NaT,0.0,0
4,ESPORTSTMNT01_2700815,LCK,Spring,0,2022-01-12,1,12.01,Blue,sup,BeryL,DRX,Sona,Diana,Caitlyn,Twisted Fate,LeBlanc,Viktor,2195,0,1,2,4,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.1367,0.5194,0.0,0.0,0.0,0.0,5095,139.2711,0.101823,319.4897,231.0888,64,1.7494,24,0.656,21,115,3.1435,9291,4537,124.0182,0.113491,8375,36.0,30.0,6,0.9841,2256.0,2720.0,6.0,2163.0,2467.0,0.0,93.0,253.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,4568.0,4744.0,11.0,3136.0,3274.0,4.0,1432.0,1470.0,7.0,1.0,3.0,0.0,0.0,0.0,1.0,2022-01-12 15:20:00,2022-01-12 15:56:35,15,-0.707107,-0.707107,2,NaT,0.0,0


In [None]:
players_train = players_train.drop(columns=['prev_game_time'])

In [None]:
a = players_train['start_time'].dt.time
a.min(), a.max()

In [None]:
players_train_target = pd.read_csv(f"{DATA_PATH}players_train_target.csv")

In [88]:
players_train_target.head(1)

Unnamed: 0,gameid,playername,result
0,ESPORTSTMNT01_2700815,Kingen,0


- 오전 / 낮 / 오후 기준 승률 (아직 진행중인 코드)

In [87]:
players_train_target['Morning'] = (players_train_target['hour'] < 12).astype(int)
players_train_target['Afternoon'] = ((players_train_target['hour'] >= 12) & (players_train_target['hour'] < 18)).astype(int)
players_train_target['Evening'] = (players_train_target['hour'] >= 18).astype(int)

morning_winrate = players_train_target.loc[players_train_target['Morning'] == 1, 'result'].mean()
afternoon_winrate = players_train_target.loc[players_train_target['Afternoon'] == 1, 'result'].mean()
evening_winrate = players_train_target.loc[players_train_target['Evening'] == 1, 'result'].mean()

# 승률 결과 저장
time_period_winrate = pd.DataFrame({
    'time_period': ['Morning', 'Afternoon', 'Evening'],
    'win_rate': [morning_winrate, afternoon_winrate, evening_winrate]
})


KeyError: 'hour'