In [6]:
# Initialize a new Git repository
!git init



Initialized empty Git repository in C:/Users/Nic/.git/


In [5]:
import os
print("Full path to the notebook: ", os.path.join(os.getcwd(), "MarchePlz.ipynb"))


Full path to the notebook:  C:\Users\Nic\MarchePlz.ipynb


In [40]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.tree import DecisionTreeClassifier, export_graphviz
import graphviz
from sklearn import tree
import warnings

# Ignore warnings
warnings.filterwarnings('ignore')

In [41]:

# Load the dataset
url = 'https://raw.githubusercontent.com/nic01as1/HltvData/main/OMiT%20vs.%20Strife%20at%20ESL%20Challenger%20League%20Season%2047%20North%20America%20Relegation%20_%20HLTV.org.xlsx'
raw_data = pd.read_excel(url, engine='openpyxl')

# Drop rows with missing scores
raw_data = raw_data.dropna(subset=['Score Team1', 'Score Team2'])


In [44]:

raw_data['Score Team1'] = pd.to_numeric(raw_data['Score Team1'], errors='coerce')
raw_data['Score Team2'] = pd.to_numeric(raw_data['Score Team2'], errors='coerce')

# Create the 'Winning Team' and 'Losing Team' columns
raw_data['Winning Team'] = np.where(raw_data['Score Team1'] > raw_data['Score Team2'], raw_data['Team1'],
                                    np.where(raw_data['Score Team1'] < raw_data['Score Team2'], raw_data['Team2'], 'Draw'))
raw_data['Losing Team'] = np.where(raw_data['Score Team1'] < raw_data['Score Team2'], raw_data['Team1'],
                                   np.where(raw_data['Score Team1'] > raw_data['Score Team2'], raw_data['Team2'], 'Draw'))

# Remove rows where the game was a draw
raw_data = raw_data[raw_data['Winning Team'] != 'Draw'].copy()

# Calculate win ratios
win_counts = raw_data.groupby(['Map', 'Winning Team']).size().reset_index(name='Wins')
team_games = pd.melt(raw_data, id_vars=['Map'], value_vars=['Team1', 'Team2'], var_name='Team', value_name='TeamName')
total_games = team_games.groupby(['Map', 'TeamName']).size().reset_index(name='Total Games')
win_ratios = pd.merge(win_counts, total_games, left_on=['Map', 'Winning Team'], right_on=['Map', 'TeamName'])
win_ratios['Win Ratio'] = (win_ratios['Wins'] / win_ratios['Total Games']) * 100
win_ratios.rename(columns={'Winning Team': 'Team', 'Win Ratio': 'Win_Ratio'}, inplace=True)
raw_data = pd.merge(raw_data, win_ratios[['Map', 'Team', 'Win_Ratio']], 
                    left_on=['Map', 'Winning Team'], 
                    right_on=['Map', 'Team'],
                    how='left').rename(columns={'Win_Ratio': 'Win_Ratio_Winning'})
raw_data = pd.merge(raw_data, win_ratios[['Map', 'Team', 'Win_Ratio']], 
                    left_on=['Map', 'Losing Team'], 
                    right_on=['Map', 'Team'],
                    how='left').rename(columns={'Win_Ratio': 'Win_Ratio_Losing'})
raw_data.drop(columns=['Team_x', 'Team_y'], inplace=True)

# Calculate the difference in rounds
raw_data['Round_Difference'] = abs(raw_data['Score Team1'] - raw_data['Score Team2']).fillna(0)
raw_data['Above_13_Rounds'] = np.where((raw_data['Score Team1'] >= 14) | (raw_data['Score Team2'] >= 14), 1, 0)



In [45]:
# Encode categorical variables using OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)
encoded_teams = encoder.fit_transform(raw_data[['Team1', 'Team2', 'Map']])
encoded_columns = encoder.get_feature_names_out(['Team1', 'Team2', 'Map'])
encoded_df = pd.DataFrame(encoded_teams, columns=encoded_columns)

# Combine encoded features with the original dataset
raw_data = pd.concat([raw_data.reset_index(drop=True), encoded_df], axis=1)

# Ensure numerical features are included in the final dataset
features = list(encoded_columns) + ['Round_Difference', 'Win_Ratio_Winning', 'Win_Ratio_Losing']
X = raw_data[features]
y = raw_data['Winning Team']

In [46]:
print(raw_data.head(50))

        Map              Team1  Score Team1              Team2  Score Team2  \
0   Vertigo               OMiT         13.0             Strife          2.0   
1    Mirage               OMiT          8.0             Strife         13.0   
2   Inferno               OMiT         13.0             Strife          9.0   
3      Nuke         RED Canids         11.0                KOI         13.0   
4   Vertigo               paiN         10.0  Ninjas in Pyjamas         13.0   
5    Mirage             Legacy         13.0             Akimbo          3.0   
6   Ancient             Legacy         13.0             Akimbo          2.0   
7   Inferno           Imperial         11.0                KOI         13.0   
8   Vertigo              Fluxo         16.0             Sharks         19.0   
9   Inferno              Fluxo         13.0             Sharks          9.0   
10   Mirage              Fluxo         13.0             Sharks          3.0   
11   Mirage         Take Flyte          3.0         

In [48]:
# Encode the target variable
le_team = LabelEncoder()
y = le_team.fit_transform(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Train the Decision Tree model
dt = DecisionTreeClassifier(criterion='entropy', max_depth=2, random_state=1)
dt.fit(X_train, y_train)


In [65]:

# Calculate and display feature importances
final_fi = pd.DataFrame()
for i, column in enumerate(X.columns):
    print('Importance of feature {}:, {:.3f}'.format(column, dt.feature_importances_[i]))
    
    fi = pd.DataFrame({'Variable': [column], 'Feature Importance Score': [dt.feature_importances_[i]]})
    
    try:
        final_fi = pd.concat([final_fi, fi], ignore_index=True)
    except NameError:
        final_fi = fi

# Ordering the data
final_fi = final_fi.sort_values('Feature Importance Score', ascending=False).reset_index(drop=True)

# Display the final feature importance DataFrame
print(final_fi)

Importance of feature Team1_-72c:, 0.000
Importance of feature Team1_00NATION:, 0.000
Importance of feature Team1_15 Average:, 0.000
Importance of feature Team1_1WIN:, 0.000
Importance of feature Team1_1WIN Academy:, 0.000
Importance of feature Team1_1WIN Gang:, 0.000
Importance of feature Team1_2 Dollar Spaghetti:, 0.000
Importance of feature Team1_2GAME:, 0.000
Importance of feature Team1_2ez:, 0.000
Importance of feature Team1_30 Seconds To Win:, 0.000
Importance of feature Team1_3DMAX:, 0.000
Importance of feature Team1_4Point5:, 0.000
Importance of feature Team1_500:, 0.000
Importance of feature Team1_5W:, 0.000
Importance of feature Team1_777:, 0.000
Importance of feature Team1_7AM:, 0.000
Importance of feature Team1_9 Pandas:, 0.000
Importance of feature Team1_9INE:, 0.000
Importance of feature Team1_9z:, 0.000
Importance of feature Team1_ABT:, 0.000
Importance of feature Team1_ACME:, 0.000
Importance of feature Team1_ADEPTS:, 0.000
Importance of feature Team1_AIRLYA:, 0.000
Imp