In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import torch
from tqdm.auto import tqdm
import random
import os

def reset_seeds(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [None]:
DATA_PATH = "/content/drive/MyDrive/LoL dataset/"
SEED = 42

# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device

In [None]:
data2024 = pd.read_csv(f'{DATA_PATH}2024_LoL_esports_match_data_from_OraclesElixir.csv')

  data2024 = pd.read_csv(f'{DATA_PATH}2024_LoL_esports_match_data_from_OraclesElixir.csv')


In [None]:
data2024.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 117552 entries, 0 to 117551
Columns: 161 entries, gameid to opp_deathsat25
dtypes: float64(118), int64(20), object(23)
memory usage: 144.4+ MB


In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [None]:
data2024.head(5)

In [None]:
columns_to_drop = ['datacompleteness', 'url','goldat20', 'xpat20', 'csat20',
    'opp_goldat20', 'opp_xpat20', 'opp_csat20', 'golddiffat20', 'xpdiffat20',
    'csdiffat20', 'killsat20', 'assistsat20', 'deathsat20', 'opp_killsat20',
    'opp_assistsat20', 'opp_deathsat20', 'goldat25', 'xpat25', 'csat25',
    'opp_goldat25', 'opp_xpat25', 'opp_csat25', 'golddiffat25', 'xpdiffat25',
    'csdiffat25', 'killsat25', 'assistsat25', 'deathsat25', 'opp_killsat25',
    'opp_assistsat25', 'opp_deathsat25']
data2024 = data2024.drop(columns=columns_to_drop)

In [None]:
columns_to_zero = ['doublekills',
                   'triplekills',
                   'quadrakills',
                   'pentakills',
                   'firstblood',
                   'firstbloodkill' ,
                   'firstbloodassist',
                   'firstbloodvictim']

columns_to_mean = [
    'firstdragon', 'dragons', 'opp_dragons', 'elementaldrakes', 'opp_elementaldrakes',
    'infernals', 'mountains', 'clouds', 'oceans', 'chemtechs', 'hextechs',
    'dragons (type unknown)', 'elders', 'opp_elders', 'firstherald', 'heralds',
    'opp_heralds', 'void_grubs', 'opp_void_grubs', 'firstbaron', 'barons',
    'opp_barons', 'firsttower', 'towers', 'opp_towers', 'firstmidtower',
    'firsttothreetowers', 'turretplates', 'opp_turretplates', 'inhibitors',
    'opp_inhibitors', 'damageshare', 'damagemitigatedperminute', 'earnedgoldshare',
    'gspd', 'gpr', 'total cs', 'minionkills', 'monsterkillsownjungle',
    'monsterkillsenemyjungle', 'cspm', 'goldat10', 'xpat10', 'csat10',
    'opp_goldat10', 'opp_xpat10', 'opp_csat10', 'golddiffat10', 'xpdiffat10',
    'csdiffat10', 'killsat10', 'assistsat10', 'deathsat10', 'opp_killsat10',
    'opp_assistsat10', 'opp_deathsat10', 'goldat15', 'xpat15', 'csat15',
    'opp_goldat15', 'opp_xpat15', 'opp_csat15', 'golddiffat15', 'xpdiffat15',
    'csdiffat15', 'killsat15', 'assistsat15', 'deathsat15', 'opp_killsat15',
    'opp_assistsat15', 'opp_deathsat15',
]

columns_to_unknown = [
    'split', 'patch', 'playername', 'playerid', 'teamid', 'champion',
    'ban1', 'ban2', 'ban3', 'ban4', 'ban5',
    'pick1', 'pick2', 'pick3', 'pick4', 'pick5'
]

In [None]:
for i in columns_to_unknown:
    data2024[i] = data2024[i].fillna('Unknown')

In [None]:
for i in columns_to_zero:
    data2024[i] = data2024[i].fillna(0)

In [None]:
for col in columns_to_mean:
    group_mean = data2024.groupby('playerid')[col].transform('mean')
    data2024.loc[data2024['playerid'] == 'Unknown', col] = 0
    data2024[col] = data2024[col].fillna(group_mean)
    data2024[col] = data2024[col].fillna(0)


In [None]:
data2024.head(50)

In [None]:
data2024.isnull().sum().sum()

In [None]:
columns_to_drop_from2017 = ['datacompleteness', 'url']
data2023 = data2023.drop(columns=columns_to_drop_from2017)
data2022 = data2022.drop(columns=columns_to_drop_from2017)
data2021 = data2021.drop(columns=columns_to_drop_from2017)
data2020 = data2020.drop(columns=columns_to_drop_from2017)
data2019 = data2019.drop(columns=columns_to_drop_from2017)
data2018 = data2018.drop(columns=columns_to_drop_from2017)
data2017 = data2017.drop(columns=columns_to_drop_from2017)


In [None]:
data2023.isnull().sum().sum()

In [None]:
data2022.isnull().sum().sum()

In [None]:
data2022.isnull().sum()

In [None]:
,data2021.isnull().sum().sum(),data2020.isnull().sum().sum(),data2019.isnull().sum().sum(),data2018.isnull().sum().sum(),data2017.isnull().sum().sum()

In [None]:
data2021.head(20)

In [None]:
columns_to_mean = [
    'firstdragon', 'dragons', 'opp_dragons', 'elementaldrakes', 'opp_elementaldrakes',
    'infernals', 'mountains', 'clouds', 'oceans', 'chemtechs', 'hextechs',
    'dragons (type unknown)', 'elders', 'opp_elders', 'firstherald', 'heralds',
    'opp_heralds', 'void_grubs', 'opp_void_grubs', 'firstbaron', 'barons',
    'opp_barons', 'firsttower', 'towers', 'opp_towers', 'firstmidtower',
    'firsttothreetowers', 'turretplates', 'opp_turretplates', 'inhibitors',
    'opp_inhibitors', 'damageshare', 'damagemitigatedperminute', 'earnedgoldshare',
    'gspd', 'gpr', 'total cs', 'minionkills', 'monsterkillsownjungle',
    'monsterkillsenemyjungle', 'cspm', 'goldat10', 'xpat10', 'csat10',
    'opp_goldat10', 'opp_xpat10', 'opp_csat10', 'golddiffat10', 'xpdiffat10',
    'csdiffat10', 'killsat10', 'assistsat10', 'deathsat10', 'opp_killsat10',
    'opp_assistsat10', 'opp_deathsat10', 'goldat15', 'xpat15', 'csat15',
    'opp_goldat15', 'opp_xpat15', 'opp_csat15', 'golddiffat15', 'xpdiffat15',
    'csdiffat15', 'killsat15', 'assistsat15', 'deathsat15', 'opp_killsat15',
    'opp_assistsat15', 'opp_deathsat15', 'damagetochampions', 'dpm', 'damagetakenperminute',
    'wardsplaced', 'wpm', 'wardskilled', 'wcpm', 'controlwardsbought','visionscore',
    'vspm', 'earnedgold', 'earned gpm', 'goldspent', 'monsterkills','totalgold'
]

In [None]:
columns_to_unknown = [
    'split', 'patch', 'playername', 'playerid', 'teamid', 'champion',
    'ban1', 'ban2', 'ban3', 'ban4', 'ban5',
    'pick1', 'pick2', 'pick3', 'pick4', 'pick5','gameid','game','teamname'
]

In [None]:
for col in columns_to_mean:
    group_mean = data2017.groupby('playerid')[col].transform('mean')
    data2017.loc[data2017['playerid'] == 'Unknown', col] = 0
    data2017[col] = data2017[col].fillna(group_mean)
    data2017[col] = data2017[col].fillna(0)

In [None]:
for i in columns_to_unknown:
    data2017[i] = data2017[i].fillna('Unknown')

In [None]:
data2017.isnull().sum().sum()

In [None]:
data2024.isnull().sum().sum()

In [None]:
data2023.isnull().sum().sum()

In [None]:
data2024.to_csv(f'{DATA_PATH}결측치제거_data2024.csv', index=False)
data2023.to_csv(f'{DATA_PATH}결측치제거_data2023.csv', index=False)


In [None]:
data2021.to_csv(f'{DATA_PATH}결측치제거_data2021.csv', index=False)
data2022.to_csv(f'{DATA_PATH}결측치제거_data2022.csv', index=False)

In [None]:
data2020.to_csv(f'{DATA_PATH}결측치제거_data2020.csv', index=False)
data2019.to_csv(f'{DATA_PATH}결측치제거_data2019.csv', index=False)

In [None]:
data2018.to_csv(f'{DATA_PATH}결측치제거_data2018.csv', index=False)
data2017.to_csv(f'{DATA_PATH}결측치제거_data2017.csv', index=False)

In [None]:
data2024 = pd.read_csv(f'{DATA_PATH}결측치제거_data2024.csv')
data2023 = pd.read_csv(f'{DATA_PATH}결측치제거_data2023.csv')
data2022 = pd.read_csv(f'{DATA_PATH}결측치제거_data2022.csv')
data2021 = pd.read_csv(f'{DATA_PATH}결측치제거_data2021.csv')
data2020 = pd.read_csv(f'{DATA_PATH}결측치제거_data2020.csv')
data2019 = pd.read_csv(f'{DATA_PATH}결측치제거_data2019.csv')
data2018 = pd.read_csv(f'{DATA_PATH}결측치제거_data2018.csv')
data2017 = pd.read_csv(f'{DATA_PATH}결측치제거_data2017.csv')

  data2024 = pd.read_csv(f'{DATA_PATH}결측치제거_data2024.csv')
  data2023 = pd.read_csv(f'{DATA_PATH}결측치제거_data2023.csv')
  data2022 = pd.read_csv(f'{DATA_PATH}결측치제거_data2022.csv')
  data2021 = pd.read_csv(f'{DATA_PATH}결측치제거_data2021.csv')
  data2017 = pd.read_csv(f'{DATA_PATH}결측치제거_data2017.csv')


In [None]:
len(data2024.columns),len(data2023.columns),len(data2022.columns),len(data2021.columns),len(data2020.columns),len(data2019.columns)

In [None]:
data_list = [
    data2024, data2023, data2022, data2021, data2020, data2019, data2018, data2017
]

final_data = pd.concat(data_list, axis=0, ignore_index=True)
final_data.to_csv(f'{DATA_PATH}final_data.csv', index=False)


In [None]:
final_data = pd.read_csv(f'{DATA_PATH}final_data.csv')

  final_data = pd.read_csv(f'{DATA_PATH}final_data.csv')


In [None]:
final_data.shape

(905124, 129)

In [None]:
final_data.isnull().sum().sum()

0

In [None]:
from sklearn.model_selection import train_test_split

#train / test 데이터 분리
train_data, test_data = train_test_split(final_data, test_size=0.2, random_state=42)

train_data.shape, test_data.shape


((724099, 129), (181025, 129))

In [None]:
final_data.head()

In [None]:
# target 분리
train = train_data.drop(columns=['result'])
train_target = train_data['result']

test = test_data.drop(columns=['result'])
test_target = test_data['result']

print(train.shape,train_target.shape,test.shape,test_target.shape)


(724099, 128) (724099,) (181025, 128) (181025,)


In [None]:
# Train Features와 Target 저장
train.to_csv(f'{DATA_PATH}train_features.csv', index=False)
train_target.to_csv(f'{DATA_PATH}train_target.csv', index=False)

# Test Features와 Target 저장
test.to_csv(f'{DATA_PATH}test_features.csv', index=False)
test_target.to_csv(f'{DATA_PATH}test_target.csv', index=False)

print("Train/Test 데이터가 저장되었습니다!")


Train/Test 데이터가 저장되었습니다!


In [None]:
cols = train.select_dtypes("object").columns.tolist()

In [None]:
cols

['gameid',
 'league',
 'split',
 'date',
 'game',
 'patch',
 'side',
 'position',
 'playername',
 'playerid',
 'teamname',
 'teamid',
 'champion',
 'ban1',
 'ban2',
 'ban3',
 'ban4',
 'ban5',
 'pick1',
 'pick2',
 'pick3',
 'pick4',
 'pick5']

In [None]:
%pip install category_encoders


Collecting category_encoders
  Downloading category_encoders-2.6.4-py2.py3-none-any.whl.metadata (8.0 kB)
Downloading category_encoders-2.6.4-py2.py3-none-any.whl (82 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.0/82.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: category_encoders
Successfully installed category_encoders-2.6.4


In [None]:
import category_encoders as ce

In [None]:
enc = ce.one_hot.OneHotEncoder()
tmp = enc.fit_transform(train[cols])
train_ft = pd.concat([train,tmp],axis =1)

tmp = enc.transform(test[cols])
test_ft = pd.concat([test,tmp],axis =1)

train_ft.shape, test_ft.shape

