In [6]:
# loading and preparing the data
import json
import pandas as pd
from typing import List

json_path = 'dataset.json'
with open(json_path, 'r') as file:
    data : List = json.load(file)

# remove 'hero_id' keys
for d in data:
    for p in d['radiant']:
        del p['hero_id']
    for p in d['dire']:
        del p['hero_id']

# flatten then create a dataframe
flattened_data = [
    {
        **{f'radiant_player_{i+1}_{key}': player[key] for i, player in enumerate(match['radiant']) for key in player},
        **{f'dire_player_{i+1}_{key}': player[key] for i, player in enumerate(match['dire']) for key in player},
        'radiant_lineup': match['radiant_lineup'],
        'dire_lineup': match['dire_lineup'],
        'duration': match['duration'],
        'win': match['win']
    }
    for match in data
]

df = pd.DataFrame(flattened_data)

print(df.shape)

X = df.drop(columns=['win', 'radiant_lineup', 'dire_lineup'], axis=1)
y = df['win']

(1737, 74)


In [7]:
# random forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# create and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# test accuracy
accuracy = model.score(X_test, y_test)
print(f"Random Forest Accuracy: {accuracy:.2f}")

Random Forest Accuracy: 0.99


In [8]:
# svm
from sklearn import svm
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
svm_X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(svm_X, y, test_size=0.2, random_state=42)

svm_model = svm.SVC(kernel='linear')
svm_model.fit(X_train, y_train)

accuracy = svm_model.score(X_test, y_test)
print(f"SVM Accuracy: {accuracy:.2f}")

SVM Accuracy: 0.99


In [9]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

accuracy = metrics.accuracy_score(y_test, y_pred)
print(f"Decision Tree Accuracy: {accuracy:.2f}")

print("Feature importances:", clf.feature_importances_)

Decision Tree Accuracy: 0.94
Feature importances: [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 6.30170553e-01 3.55534677e-03 6.52902835e-03
 0.00000000e+00 9.51746063e-04 0.00000000e+00 0.00000000e+00
 3.80542364e-02 0.00000000e+00 0.00000000e+00 2.85571287e-03
 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.18489210e-01
 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.44922365e-02
 0.00000000e+00 0.00000000e+00 3.66203160e-03 0.00000000e+00
 0.00000000e+00 0.00000000e+00 7.91397376e-03 1.93452692e-03
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 5.75148398e-04 1.93452692e-03
 6.65263244e-02 0.00000000e+00 0.00000000e+00 2.75508970e-03
 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.04827177e-02
 0.00000000e+00 6.52902835e-03 0.00000000e+00 2.85175951e-03
 0.00000000e+00 0.00000000e+00 1.49120594e-02 0.00000000e+00
 0.00000000e+00 0.00000000e+00 1.28977166e-02 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00