In [7]:
# Add the notebook file to the staging area
!git add MarchePlz.ipynb

# Commit the file with a message
!git commit -m "Add Jupyter Notebook"




[master (root-commit) dffd937] Add Jupyter Notebook
 1 file changed, 1800 insertions(+)
 create mode 100644 MarchePlz.ipynb


In [8]:
# Add the remote repository URL
!git remote add origin https://github.com/nic01as1/HltvData.git

# Push the changes to GitHub
!git push -u origin master



branch 'master' set up to track 'origin/master'.


remote: 
remote: Create a pull request for 'master' on GitHub by visiting:        
remote:      https://github.com/nic01as1/HltvData/pull/new/master        
remote: 
To https://github.com/nic01as1/HltvData.git
 * [new branch]      master -> master


In [5]:
import os
print("Full path to the notebook: ", os.path.join(os.getcwd(), "MarchePlz.ipynb"))


Full path to the notebook:  C:\Users\Nic\MarchePlz.ipynb


In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
import warnings

In [2]:
# Ignore warnings
warnings.filterwarnings('ignore')

# Print the full path to the notebook
print("Full path to the notebook:", os.path.join(os.getcwd(), "MarchePlz.ipynb"))

# Load the dataset
url = 'https://raw.githubusercontent.com/nic01as1/HltvData/main/OMiT%20vs.%20Strife%20at%20ESL%20Challenger%20League%20Season%2047%20North%20America%20Relegation%20_%20HLTV.org.xlsx'
raw_data = pd.read_excel(url, engine='openpyxl')

# Drop rows with missing scores
raw_data = raw_data.dropna(subset=['Score Team1', 'Score Team2'])


Full path to the notebook: C:\Users\Nic\MarchePlz.ipynb


In [10]:

# Convert scores to numeric
raw_data['Score Team1'] = pd.to_numeric(raw_data['Score Team1'], errors='coerce')
raw_data['Score Team2'] = pd.to_numeric(raw_data['Score Team2'], errors='coerce')

# Create the 'Winning Team' column
raw_data['Winning Team'] = np.where(raw_data['Score Team1'] > raw_data['Score Team2'], raw_data['Team1'], raw_data['Team2'])

# Remove rows where scores are tied (draws)
raw_data = raw_data[raw_data['Score Team1'] != raw_data['Score Team2']]

# Add 'Round Difference'
raw_data['Round Difference'] = np.abs(raw_data['Score Team1'] - raw_data['Score Team2'])

# Calculate historical win rates
win_counts = {}
game_counts = {}

for _, row in raw_data.iterrows():
    team1, team2, winning_team = row['Team1'], row['Team2'], row['Winning Team']

    # Initialize dictionaries if needed
    if team1 not in win_counts:
        win_counts[team1] = {}
        game_counts[team1] = {}
    if team2 not in win_counts:
        win_counts[team2] = {}
        game_counts[team2] = {}

    # Update game counts
    game_counts[team1][team2] = game_counts[team1].get(team2, 0) + 1
    game_counts[team2][team1] = game_counts[team2].get(team1, 0) + 1

    # Update win counts
    if winning_team == team1:
        win_counts[team1][team2] = win_counts[team1].get(team2, 0) + 1
    else:
        win_counts[team2][team1] = win_counts[team2].get(team1, 0) + 1

# Add historical win rates to the dataset
raw_data['Historical_Win_Rate_Team1_vs_Team2'] = raw_data.apply(
    lambda row: win_counts[row['Team1']].get(row['Team2'], 0) / game_counts[row['Team1']].get(row['Team2'], 1),
    axis=1
)
raw_data['Historical_Win_Rate_Team2_vs_Team1'] = raw_data.apply(
    lambda row: win_counts[row['Team2']].get(row['Team1'], 0) / game_counts[row['Team2']].get(row['Team1'], 1),
    axis=1
)

# Calculate overall team win rates
team_wins = raw_data['Winning Team'].value_counts()
total_games = raw_data[['Team1', 'Team2']].melt(value_name='Team').value_counts('Team')
overall_win_rates = (team_wins / total_games).fillna(0).to_dict()
raw_data['Overall_Win_Rate_Team1'] = raw_data['Team1'].map(overall_win_rates)
raw_data['Overall_Win_Rate_Team2'] = raw_data['Team2'].map(overall_win_rates)

# Calculate win rates on the specific map
map_wins = raw_data.groupby(['Map', 'Winning Team']).size().unstack(fill_value=0)
melted_teams = raw_data.melt(id_vars=['Map'], value_vars=['Team1', 'Team2'], var_name='Team_Type', value_name='Team')
map_games = melted_teams.groupby(['Map', 'Team']).size().unstack(fill_value=0)
map_win_rates = (map_wins / map_games).fillna(0).stack().to_dict()
raw_data['Win_Rate_Team1_on_Map'] = raw_data.apply(lambda row: map_win_rates.get((row['Map'], row['Team1']), 0), axis=1)
raw_data['Win_Rate_Team2_on_Map'] = raw_data.apply(lambda row: map_win_rates.get((row['Map'], row['Team2']), 0), axis=1)


In [11]:
# One-hot encode the 'Map' column
encoder = OneHotEncoder(sparse_output=False)
encoded_maps = encoder.fit_transform(raw_data[['Map']])
encoded_columns = encoder.get_feature_names_out(['Map'])
encoded_df = pd.DataFrame(encoded_maps, columns=encoded_columns)

# Combine encoded maps with the main dataset
raw_data = pd.concat([raw_data.reset_index(drop=True), encoded_df], axis=1)

# Define features and target
features = list(encoded_columns) + [
    'Round Difference',
    'Historical_Win_Rate_Team1_vs_Team2',
    'Historical_Win_Rate_Team2_vs_Team1',
    'Overall_Win_Rate_Team1',
    'Overall_Win_Rate_Team2',
    'Win_Rate_Team1_on_Map',
    'Win_Rate_Team2_on_Map'
]
X = raw_data[features]
y = raw_data['Winning Team']


In [16]:

# Encode the target variable (Winning Team)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=1)

# Impute missing values in features
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Train the Logistic Regression model
log_reg = LogisticRegression(random_state=1, max_iter=1000)
log_reg.fit(X_train_imputed, y_train)

# Predict probabilities for the test set
y_pred_probs = log_reg.predict_proba(X_test_imputed)


# Evaluate the model
accuracy = accuracy_score(y_test, log_reg.predict(X_test_imputed))
print(f"Accuracy: {accuracy:.2f}")

# Example input section
team_a = "Vitality"
team_b = "Astralis"
map_name = "Vertigo"

# Prepare input for prediction
example_input = pd.DataFrame(columns=X_train.columns)  # Ensure exact column match
example_input.loc[0] = 0  # Initialize all features to 0

# Fill in known values
example_input['Historical_Win_Rate_Team1_vs_Team2'] = win_counts.get(team_a, {}).get(team_b, 0) / game_counts.get(team_a, {}).get(team_b, 1)
example_input['Historical_Win_Rate_Team2_vs_Team1'] = win_counts.get(team_b, {}).get(team_a, 0) / game_counts.get(team_b, {}).get(team_a, 1)
example_input['Overall_Win_Rate_Team1'] = overall_win_rates.get(team_a, 0)
example_input['Overall_Win_Rate_Team2'] = overall_win_rates.get(team_b, 0)
example_input['Win_Rate_Team1_on_Map'] = map_win_rates.get((map_name, team_a), 0)
example_input['Win_Rate_Team2_on_Map'] = map_win_rates.get((map_name, team_b), 0)
example_input[f'Map_{map_name}'] = 1  # Set the selected map to 1

# Impute missing values in the input
example_input_imputed = imputer.transform(example_input)

# Predict probabilities
example_probabilities = log_reg.predict_proba(example_input_imputed)

# Extract probabilities for the two teams
team_a_index = label_encoder.transform([team_a])[0]
team_b_index = label_encoder.transform([team_b])[0]
team_a_prob = example_probabilities[0, team_a_index] * 100  # Convert to %
team_b_prob = example_probabilities[0, team_b_index] * 100  # Convert to %

print(f"Probability of {team_a} winning: {team_a_prob:.2f}%")
print(f"Probability of {team_b} winning: {team_b_prob:.2f}%")


Accuracy: 0.02
Probability of Vitality winning: 0.09%
Probability of Astralis winning: 0.12%
