Each row of the dataset is a single game with the following features (in the order in the vector):

- Team won the game (1 or -1)
- Cluster ID (related to location)
- Game mode (eg All Pick)
- Game type (eg. Ranked)
- till end: Each element is an indicator for a hero.
Value of 1 indicates that a player from team '1' played as that hero and '-1' for the other team.
Hero can be selected by only one player each game. This means that each row has five '1' and five '-1' values.

We don't need these columns in futher development, so we dropped them:
- Cluster ID – represents the region of the game.
- Game mode – indicates the mode (e.g., All Pick, Captains Mode).
- Game type – ranked or unranked.


In [1]:
import xgboost as xgb
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import joblib
from tabulate import tabulate
import json

In [8]:

# Load the dataset (replace with your actual file path)
dataset_path = "./dota2Train.csv"
df = pd.read_csv(dataset_path)

with open("./heroes.json", "r") as f:
    hero_data = json.load(f)
    hero_id_to_name = {str(hero["id"]): hero["localized_name"] for hero in hero_data["heroes"]}
    
# Define column names

non_hero_columns = ['winner', 'cluster_id', 'game_mode', 'game_type']
num_heroes = df.shape[1] - len(non_hero_columns)
hero_ids = list(range(1, num_heroes + 1))
hero_names = [hero_id_to_name.get(str(hero_id), f"Unknown_Hero_{hero_id}") for hero_id in hero_ids]
hero_columns = [f'hero_{i}' for i in range(num_heroes)]
df.columns = ['winner', 'cluster_id', 'game_mode', 'game_type'] + hero_names

# Drop unnecessary columns
df = df.drop(columns=['cluster_id', 'game_mode', 'game_type'])

# Display dataset with better formatting
print("\n===== Dota 2 Training Dataset Overview =====\n")
print(tabulate(df.head(), headers='keys', tablefmt='fancy_grid'))


===== Dota 2 Training Dataset Overview =====

╒════╤══════════╤═════════════╤═══════╤════════╤═══════════════╤══════════════════╤═══════════════╤═══════════════╤══════════════╤══════════╤═════════════╤════════════════╤══════════════════╤════════╤═════════╤═════════╤═════════════╤════════════════╤════════╤════════╤═══════════════════╤══════════════╤════════╤══════════╤═══════════════════╤════════╤════════╤═════════════════╤═══════════╤══════════════╤════════════════╤════════╤════════╤══════════╤══════════╤══════════╤═════════════╤═══════════╤═══════════════╤═════════════════╤══════════════╤═════════════════╤═════════════════╤═════════════════╤════════════════════╤═════════╤════════════════════╤═════════╤════════╤═════════════════╤══════════╤═════════════╤═══════════╤════════════════════╤═══════════════╤═════════════╤══════════╤══════════════╤═══════════════╤══════════╤═════════════════╤═══════════════╤═════════════════╤══════════╤══════════╤════════════╤════════╤═══════════╤═══════════

In [None]:
# **Ensure valid drafts (5 heroes per team)**
def filter_valid_drafts(df):
    team_1_heroes = df.iloc[:, 1:].apply(lambda row: (row == 1).sum(), axis=1)
    team_2_heroes = df.iloc[:, 1:].apply(lambda row: (row == -1).sum(), axis=1)
    return df[(team_1_heroes == 5) & (team_2_heroes == 5)]

df = filter_valid_drafts(df)

# **Generate Training Data for Draft Stage**
draft_samples = []
labels = []

for _, row in df.iterrows():
    ally_picks = []
    enemy_picks = []

    for hero in hero_columns:
        if row[hero] == 1:
            ally_picks.append(hero)
        elif row[hero] == -1:
            enemy_picks.append(hero)

    for i in range(len(ally_picks)):  # Create different draft states
        current_state = {hero: 0 for hero in hero_columns}
        for picked_hero in ally_picks[:i]:
            current_state[picked_hero] = 1
        for picked_hero in enemy_picks:
            current_state[picked_hero] = -1
        draft_samples.append(list(current_state.values()))
        labels.append(hero_columns.index(ally_picks[i]))  # Next hero to pick

# **Convert to DataFrame**
X = pd.DataFrame(draft_samples, columns=hero_columns)
y = np.array(labels)

# **Encode hero labels**
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# **Split data**
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# **Train XGBoost Model**
model = xgb.XGBClassifier(
    objective='multi:softprob',
    num_class=len(label_encoder.classes_),
    eval_metric='mlogloss',
    tree_method="hist"  # Faster training
)
model.fit(X_train, y_train)

# **Evaluate Model**
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy:.4f}')

# **Save Model & Label Encoder**
joblib.dump(model, "xgboost_dota_draft_model.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")
print("✅ Model training completed and saved!")


In [9]:
# **Load Model & Label Encoder**
model = joblib.load("xgboost_dota_draft_model.pkl")
label_encoder = joblib.load("label_encoder.pkl")

# **Function to Recommend Top 3 Heroes**
def recommend_next_heroes(current_picks, enemy_picks, top_n=3):
    """
    Given the current draft state (ally picks) and enemy picks,
    predict the best next heroes considering counter picks.
    """
    if len(current_picks) >= 5:  # Only check ally picks limit
        return "Draft complete: No more heroes can be picked."
    
    draft_state = {hero: 0 for hero in hero_columns}
    for hero in current_picks:
        hero_col = f'hero_{hero}'
        if hero_col in draft_state:
            draft_state[hero_col] = 1
    for hero in enemy_picks:
        hero_col = f'hero_{hero}'
        if hero_col in draft_state:
            draft_state[hero_col] = -1
    
    draft_array = np.array([list(draft_state.values())])
    hero_probs = model.predict_proba(draft_array)[0]
    sorted_heroes = np.argsort(hero_probs)[::-1]  # Sort heroes by probability
    
    recommended_heroes = []
    for recommended_hero in sorted_heroes:
        real_hero = label_encoder.inverse_transform([recommended_hero])[0]  # Convert back to original hero ID
        if real_hero not in current_picks and real_hero not in enemy_picks:
            hero_name = hero_id_to_name.get(str(real_hero), f"Hero_{real_hero}")
            recommended_heroes.append(hero_name)
            if len(recommended_heroes) == top_n:
                break

    if recommended_heroes:
        return f"Recommended Heroes: {recommended_heroes}"
    return "No available heroes can be recommended."




In [10]:
# **Example Usage**
sample_ally_picks = [45, 8, 5, 9]  # Example ally picks
sample_enemy_picks = [4, 34, 67, 13]  # Example enemy picks

print("Testing Hero Recommendation with Ally and Enemy Picks...")
print("Ally Picks:", [hero_id_to_name.get(str(h), f"Hero_{h}") for h in sample_ally_picks])
print("Enemy Picks:", [hero_id_to_name.get(str(h), f"Hero_{h}") for h in sample_enemy_picks])
print(recommend_next_heroes(sample_ally_picks, sample_enemy_picks))

Testing Hero Recommendation with Ally and Enemy Picks...
Ally Picks: ['Pugna', 'Juggernaut', 'Crystal Maiden', 'Mirana']
Enemy Picks: ['Bloodseeker', 'Tinker', 'Spectre', 'Puck']
Recommended Heroes: ['Broodmother', 'Gyrocopter', "Nature's Prophet"]
