In [1]:
import pandas as pd
import numpy as np
from utils import *
from data_processing import *

# load general data
general_pacific = pd.read_csv('champions-tour-2024-pacific-kickoff_data\general_data_champions-tour-2024-pacific-kickoff.csv')
general_americas = pd.read_csv('champions-tour-2024-americas-kickoff_data\general_data_champions-tour-2024-americas-kickoff.csv')
general_emea = pd.read_csv('champions-tour-2024-emea-kickoff_data\general_data_champions-tour-2024-emea-kickoff.csv')

# performance data
performance_pacific = pd.read_csv('champions-tour-2024-pacific-kickoff_data\performance_data_champions-tour-2024-pacific-kickoff.csv')
performance_americas = pd.read_csv('champions-tour-2024-americas-kickoff_data\performance_data_champions-tour-2024-americas-kickoff.csv')
performance_emea = pd.read_csv('champions-tour-2024-emea-kickoff_data\performance_data_champions-tour-2024-emea-kickoff.csv')

# economic data
economy_pacific = pd.read_csv('champions-tour-2024-pacific-kickoff_data\economy_data_champions-tour-2024-pacific-kickoff.csv')
economy_americas = pd.read_csv('champions-tour-2024-americas-kickoff_data\economy_data_champions-tour-2024-americas-kickoff.csv')
economy_emea = pd.read_csv('champions-tour-2024-emea-kickoff_data\economy_data_champions-tour-2024-emea-kickoff.csv')

# picks and bans
pick_ban_pacific = pd.read_csv('champions-tour-2024-pacific-kickoff_data\pick_ban_data_champions-tour-2024-pacific-kickoff.csv')
pick_ban_americas = pd.read_csv('champions-tour-2024-americas-kickoff_data\pick_ban_data_champions-tour-2024-americas-kickoff.csv')
pick_ban_emea = pd.read_csv('champions-tour-2024-emea-kickoff_data\pick_ban_data_champions-tour-2024-emea-kickoff.csv')



<h1>Individual Data</h1>
<h2>General Data</h2>

In [2]:
df_pacific_general = general_feature_creation_for_teams(general_pacific)
df_americas_general = general_feature_creation_for_teams(general_americas)
df_emea_general = general_feature_creation_for_teams(general_emea)

In [3]:
df_general = create_dataframe(df_emea_general,df_americas_general,df_pacific_general)
# Assuming the last column of the data is the target variable (e.g., region labels)
X_general, y_general = df_general.iloc[:, :-1], df_general.index.get_level_values(0)   # Features & Target variable (region labels)

<h3>Feature Selection & Visualization General Data</h3>

In [4]:
k_best_feature_general = selectKbest(X_general, y_general)

Selected Features:
Index(['avrg_acs_per_team_atk', 'std_d_per_team', 'std_d_per_team_atk',
       'std_d_per_team_dfs', 'std_adr_per_team', 'std_hs%_per_team',
       'std_fk_per_team', 'avrg_fk_per_team_atk', 'std_fk_per_team_atk',
       'avrg_fk_per_team_dfs'],
      dtype='object')


In [5]:
visualize_mean_feature_for_each_region(df_general,k_best_feature_general)

Plots saved as PNG files.


<h2>Performance data</h2>

In [6]:
performance_feature_emea = performance_feature_creation_for_teams(performance_emea, economy_emea)
performance_feature_americas = performance_feature_creation_for_teams(performance_americas, economy_americas)
performance_feature_pacific = performance_feature_creation_for_teams(performance_pacific, economy_pacific)

In [8]:
df_performance = create_dataframe(performance_feature_emea,performance_feature_americas,performance_feature_pacific)
X_performance, y_performance = df_performance.iloc[:, :-1], df_performance.index.get_level_values(0)   # Features & Target variable (region labels)

In [13]:
k_best_feature_performance = selectKbest(X_performance, y_performance, k=14)
visualize_mean_feature_for_each_region(df_performance,k_best_feature_performance)

Selected Features:
Index(['2k_mean', '2k_std', '3k_std', '4k_mean', '5k_mean', '5k_std',
       '1v1_std', '1v2_mean', '1v2_std', '1v3_std', '1v5_mean', '1v5_std',
       'econ_std', 'pl_mean'],
      dtype='object')
Plots saved as PNG files.


In [64]:
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC

df_concatenated = pd.concat([performance_feature_emea, performance_feature_americas, performance_feature_pacific], keys=['EMEA', 'Americas', 'Pacific'])

# Assuming the last column of your data is the target variable (e.g., region labels)
X = df_concatenated.iloc[:, :-1]  # Features
y = df_concatenated.index.get_level_values(0)  # Target variable (region labels)
# Create a Support Vector Classifier as the estimator
estimator = SVC(kernel="linear")

# Create RFECV object
rfecv = RFECV(estimator=estimator, cv=StratifiedKFold(5), scoring='accuracy')  # 5-fold cross-validation

# Fit RFECV to the data
rfecv.fit(X, y)

# Get selected features
selected_indices = rfecv.support_

# Get names of selected features
selected_features = X.columns[selected_indices]

# Print selected features
print("Selected Features:")
print(selected_features)

# Print optimal number of features
print("Optimal number of features: {}".format(rfecv.n_features_))

Selected Features:
Index(['econ_mean', 'econ_std', 'pl_mean', 'pl_std', 'de_mean'], dtype='object')
Optimal number of features: 5
