In [None]:
from google.colab import drive
drive.mount('/content/drive')

ValueError: mount failed

In [None]:
DATA_PATH = "/content/drive/MyDrive/data/"
DATA_PATH

'/content/drive/MyDrive/data/'

In [None]:
import pandas as pd
import numpy as np
import torch
from tqdm.auto import tqdm
import random
import os
from IPython.display import display

def reset_seeds(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

file_path = '/content/drive/MyDrive/data/test_1226.csv'
test = pd.read_csv(file_path)
file_path = '/content/drive/MyDrive/data/train_1226.csv'
train = pd.read_csv(file_path)
file_path = '/content/drive/MyDrive/data/train_target_1226.csv'
train_target = pd.read_csv(file_path)
file_path = '/content/drive/MyDrive/data/test_target_1226.csv'
test_target = pd.read_csv(file_path)

In [None]:
train.shape, test.shape

((266040, 18), (67380, 18))

In [None]:
import matplotlib
from matplotlib import font_manager

!sudo apt-get install -y fonts-nanum
!sudo fc-cache -fv

fontpaths = ["/usr/share/fonts/truetype/nanum/"]
font_files = font_manager.findSystemFonts(fontpaths=fontpaths)

for ff in font_files:
    font_manager.fontManager.addfont(ff)

matplotlib.rc('font', family="NanumGothic")
matplotlib.rcParams['axes.unicode_minus'] = False

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  fonts-nanum
0 upgraded, 1 newly installed, 0 to remove and 49 not upgraded.
Need to get 10.3 MB of archives.
After this operation, 34.1 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 fonts-nanum all 20200506-1 [10.3 MB]
Fetched 10.3 MB in 1s (7,060 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 1.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package fonts-nanum.
(Reading database ... 123634 files and dire

#리그 및 연도

In [None]:
# 리그와 연도 데이터 예제
train['league_encoded'] = train['league'].astype('category').cat.codes
test['league_encoded'] = test['league'].astype('category').cat.codes
train['season_category'] = train['year'].apply(lambda x: 'Spring' if x <= 6 else 'Summer')
test['season_category'] = test['year'].apply(lambda x: 'Spring' if x <= 6 else 'Summer')

In [None]:
valid_pick_rows_train = train.dropna(subset=['pick1', 'pick2', 'pick3', 'pick4', 'pick5'], how='all')
valid_pick_rows_test = test.dropna(subset=['pick1', 'pick2', 'pick3', 'pick4', 'pick5'], how='all')

# 필터링된 train 데이터에서 밴 및 픽 데이터를 하나의 열로 변환
champion_data_filtered_train = valid_pick_rows_train.melt(
    id_vars=['patch'],  # 패치 정보를 유지
    value_vars=['ban1', 'ban2', 'ban3', 'ban4', 'ban5', 'pick1', 'pick2', 'pick3', 'pick4', 'pick5'],
    var_name='type',  # 데이터 유형을 나타내는 열 이름 (ban/pick)
    value_name='champion'  # 챔피언 이름을 저장할 열 이름
)
# 변환된 train 데이터에서 NaN(결측값) 제거
champion_data_filtered_train = champion_data_filtered_train.dropna(subset=['champion'])

# train 데이터에서 패치와 챔피언별로 그룹화하여 선택 횟수 계산
champion_counts_filtered_train = champion_data_filtered_train.groupby(['patch', 'champion']).size().reset_index(name='count')

# train 데이터에서 패치별로 챔피언 선택 횟수 기준으로 정렬
top_champions_by_patch_filtered_train = champion_counts_filtered_train.sort_values(['patch', 'count'], ascending=[True, False])

display(top_champions_by_patch_filtered_train)


Unnamed: 0,patch,champion,count
123,12.01,Xin Zhao,464
11,12.01,Caitlyn,421
38,12.01,Jinx,411
15,12.01,Corki,393
103,12.01,Thresh,377
...,...,...,...
5493,14.05,Shaco,1
5503,14.05,Taric,1
5508,14.05,Twitch,1
5514,14.05,Vex,1


In [None]:
# train 데이터에서 픽 데이터가 하나라도 존재하는 행 필터링
valid_pick_rows_train = train.dropna(subset=['pick1', 'pick2', 'pick3', 'pick4', 'pick5'], how='all')

# train 데이터에서 선수별 픽 데이터 추출
player_champion_data = valid_pick_rows_train.melt(
    id_vars=['playerid', 'patch'],  # 선수 ID와 패치 정보를 유지
    value_vars=['pick1', 'pick2', 'pick3', 'pick4', 'pick5'],  # 픽 데이터만 사용
    var_name='pick_position',  # 픽 위치 (1~5)
    value_name='champion'  # 챔피언 이름
).dropna(subset=['champion'])  # 결측값 제거

# 선수별 챔피언 픽 빈도 계산
player_champion_counts = player_champion_data.groupby(['playerid', 'champion']).size().reset_index(name='pick_count')

# 패치별 챔피언 픽 빈도 계산
patch_champion_counts = player_champion_data.groupby(['patch', 'champion']).size().reset_index(name='pick_count')

# 선수별 픽 빈도를 사용하여 간단한 숙련도 점수 계산 (예: 픽 빈도 정규화)
player_champion_counts['proficiency_score'] = player_champion_counts['pick_count'] / player_champion_counts['pick_count'].max()

# 결과 출력
print("Player Champion Pick Counts:")
print(player_champion_counts)

print("\nPatch Champion Pick Counts:")
print(patch_champion_counts)


Player Champion Pick Counts:
Empty DataFrame
Columns: [playerid, champion, pick_count, proficiency_score]
Index: []

Patch Champion Pick Counts:
      patch  champion  pick_count
0     12.01    Aatrox          22
1     12.01     Akali          99
2     12.01    Akshan          23
3     12.01   Alistar          30
4     12.01  Aphelios         211
...     ...       ...         ...
5273  14.05       Zac           5
5274  14.05      Zeri         156
5275  14.05     Ziggs          19
5276  14.05    Zilean           3
5277  14.05      Zyra           1

[5278 rows x 3 columns]


In [None]:
from itertools import combinations
import pandas as pd

# 1. Train 데이터에서 챔피언 조합 생성
def generate_champion_combinations(row):
    picks = [row[f'pick{i}'] for i in range(1, 6) if pd.notna(row[f'pick{i}'])]
    return list(combinations(picks, 2))  # 2개씩 조합

# 챔피언 조합 생성
train['champion_combinations'] = train.apply(generate_champion_combinations, axis=1)

# 2. 조합별 등장 횟수 계산
synergy_data = train.explode('champion_combinations')  # 조합을 행별로 확장
synergy_data['champion_combinations'] = synergy_data['champion_combinations'].astype(str)  # 튜플을 문자열로 변환
combination_counts = synergy_data['champion_combinations'].value_counts().reset_index()
combination_counts.columns = ['champion_combinations', 'pick_count']

# 3. 팀 시너지 점수 계산
def calculate_team_synergy(combinations, combination_counts):
    synergy_scores = []
    for combo in combinations:
        combo_str = str(combo)
        if combo_str in combination_counts['champion_combinations'].values:
            synergy_scores.append(
                combination_counts.loc[
                    combination_counts['champion_combinations'] == combo_str, 'pick_count'
                ].values[0]
            )
    return sum(synergy_scores) / len(synergy_scores) if synergy_scores else 0

# 각 팀의 시너지 점수 계산
train['team_synergy'] = train['champion_combinations'].apply(
    lambda x: calculate_team_synergy(x, combination_counts)
)

# 4. 결과 출력
print(train[['team_synergy']].head())


   team_synergy
0           0.0
1           0.0
2           0.0
3           0.0
4           0.0
