In [10]:
import pandas as pd
import numpy as np
from spektral.data import Dataset, Graph

pd.set_option('display.max_columns', None)
pd.set_option('mode.chained_assignment', None) # default='warn'

In [3]:
# Load combined data
df_raw = pd.read_csv('../data/combined.csv')
df = df_raw.copy()

# Load hero feature data
df_features = pd.read_csv('../data/features.csv')
df_features = df_features.set_index('hero_id')

In [11]:
# Snippet of df
df.head()

Unnamed: 0,match_id,match_seq_num,radiant_win,start_time,duration,avg_mmr,num_mmr,lobby_type,game_mode,avg_rank_tier,num_rank_tier,cluster,hero0_pick,hero1_pick,hero2_pick,hero3_pick,hero4_pick,hero5_pick,hero6_pick,hero7_pick,hero8_pick,hero9_pick,hero0_slot,hero1_slot,hero2_slot,hero3_slot,hero4_slot,hero5_slot,hero6_slot,hero7_slot,hero8_slot,hero9_slot
0,6447015200,5388383445,True,1645660804,2502,3667.0,3.0,7,22,47,6,172,94.0,87.0,40.0,71.0,22.0,128.0,48.0,57.0,4.0,137.0,0.0,1.0,2.0,3.0,4.0,128.0,129.0,130.0,131.0,132.0
1,6447015219,5388383380,False,1645660804,2487,2360.0,1.0,7,22,32,6,236,137.0,136.0,63.0,75.0,12.0,36.0,94.0,76.0,14.0,5.0,0.0,1.0,2.0,3.0,4.0,128.0,129.0,130.0,131.0,132.0
2,6447015314,5388392814,False,1645660809,3611,3992.0,2.0,7,22,52,5,182,137.0,32.0,7.0,79.0,74.0,1.0,47.0,22.0,14.0,8.0,0.0,1.0,2.0,3.0,4.0,128.0,129.0,130.0,131.0,132.0
3,6447015315,5388383693,True,1645660809,2511,2698.0,1.0,7,22,33,5,184,113.0,23.0,50.0,59.0,119.0,11.0,84.0,137.0,19.0,53.0,0.0,1.0,2.0,3.0,4.0,128.0,129.0,130.0,131.0,132.0
4,6447015300,5388382266,False,1645660809,2431,3300.0,1.0,0,4,23,3,273,25.0,30.0,70.0,4.0,7.0,88.0,1.0,68.0,2.0,96.0,0.0,1.0,2.0,3.0,4.0,128.0,129.0,130.0,131.0,132.0


In [74]:
# Snippet of df
df_features.head()

Unnamed: 0_level_0,hero,attack_type,primary_attribute,strength,strength_rate,agility,agility_rate,intelligence,intelligence_rate,movement_speed,armor,damage_min,damage_max,range,attack_speed,base_attack_time,attack_point,attack_backswing,vision_day,vision_night,turn_rate,collision_size,health_regen
hero_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
102,Abaddon,Melee,Strength,22,2.8,23,1.5,18,2.0,325,2.83,50,60,150,120,1.7,0.56,0.41,1800,800,0.6,24,3.2
73,Alchemist,Melee,Strength,25,2.9,22,1.5,25,1.8,305,2.67,52,58,150,100,1.7,0.35,0.65,1800,800,0.6,24,2.75
68,Ancient Apparition,Ranged,Intelligence,20,1.9,20,2.2,23,3.4,285,2.33,44,54,675,100,1.7,0.45,0.3,1800,800,0.6,24,2.25
1,Anti-Mage,Melee,Agilitiy,23,1.6,24,2.8,12,1.8,310,4.0,53,57,150,100,1.4,0.3,0.6,1800,800,0.6,24,2.55
113,Arc Warden,Ranged,Agilitiy,22,2.6,20,2.5,24,2.6,285,2.33,47,57,625,100,1.7,0.3,0.7,1800,800,0.7,24,2.45


In [115]:
class DotaDataset(Dataset):
    def __init__(self, df_combined: pd.DataFrame, features: pd.DataFrame,  **kwargs):
        '''Generates a list of Graph objects
        df_combined: match result (radiant_win), hero picks (hero0_pick ...), hero slots (hero0_slot...) [dataframe]
        features: all hero ids (hero_id), desired features (feature1,2,3,etc., name not important) [dataframe]
        '''
        graphs_radiant = [self.get_graph(index, match, features, 'radiant') for index, match in df_combined.iterrows()]
        graphs_dire = [self.get_graph(index, match, features, 'dire') for index, match in df_combined.iterrows()]
        self.graphs = graphs_radiant + graphs_dire
        # self.graphs = graphs_radiant
        super().__init__(**kwargs)
        
    def get_graph(self, index, match, features, team):
        '''Generates a single graph based on a single match'''
        # Status
        if (index+1)%1000==0:
            print(f'Graph {team} {index+1}')

        # Extract hero ids and match result, depending on radiant or dire perspective
        # Reduce match df to columns: hero0_slot, ..., hero9_slot
        slots = match[[f'hero{i}_slot' for i in range(0,10)]]

        # Based on team: select slots filter , assign match result
        if team=='radiant':
            slots = slots[slots<5] # radiant slots 0,1,2,3,4
            #heroes = match[['hero0','hero1','hero2','hero3','hero4']].values
            y = float(match['radiant_win'])
        elif team=='dire':
            slots = slots[slots>127] # dire slots 128,129,130,131,132
            #heroes = match[['hero5','hero6','hero7','hero8','hero9']].values
            y = 1-float(match['radiant_win'])
        else:
            raise ValueError('Incorrect team specified in "get_graph" matchod. Use "radiant" or "dire"')

        # Pick columns based on team slots determined above
        pick_columns = [f'{herox[:5]}_pick' for herox in slots.index]
        heroes = match[pick_columns].values

        # Create feature matrix
        x = features.loc[heroes].iloc[:,3:]
        x = x.to_numpy(dtype='float')
        
        # Adjacency matrix
        a = np.ones([5,5], dtype='float32')

        g = Graph(x=x, a=a, y=y)
        return g
        
    def read(self):
        return self.graphs

In [None]:
# Generate DotaDataset for 100000 matches at a time
count = 0
total = len(df)
step = 100000

for i in range(0,np.ceil(total/step)

In [116]:
df_test = df.iloc[0:2000].copy()

In [117]:
d = DotaDataset(df_test, df_features)

Graph radiant 1000
Graph radiant 2000
Graph dire 1000
Graph dire 2000


In [118]:
d[2000].x

array([[2.00e+01, 3.50e+00, 1.60e+01, 1.90e+00, 2.10e+01, 2.20e+00,
        3.00e+02, 3.67e+00, 4.60e+01, 5.20e+01, 5.00e+02, 1.00e+02,
        1.60e+00, 3.50e-01, 1.23e+00, 1.80e+03, 8.00e+02, 6.00e-01,
        2.40e+01, 2.75e+00],
       [2.10e+01, 2.20e+00, 2.40e+01, 3.40e+00, 2.30e+01, 1.90e+00,
        3.25e+02, 6.00e+00, 5.00e+01, 5.60e+01, 3.30e+02, 1.00e+02,
        1.70e+00, 3.50e-01, 5.40e-01, 1.80e+03, 8.00e+02, 6.00e-01,
        2.40e+01, 2.60e+00],
       [2.30e+01, 3.10e+00, 1.50e+01, 2.00e+00, 1.50e+01, 1.80e+00,
        3.15e+02, 4.50e+00, 5.40e+01, 6.40e+01, 1.50e+02, 1.00e+02,
        1.70e+00, 4.33e-01, 5.67e-01, 1.80e+03, 8.00e+02, 6.00e-01,
        2.40e+01, 2.55e+00],
       [2.40e+01, 2.70e+00, 2.20e+01, 3.10e+00, 1.70e+01, 2.00e+00,
        3.00e+02, 5.67e+00, 5.70e+01, 6.30e+01, 1.50e+02, 1.00e+02,
        1.70e+00, 4.30e-01, 7.40e-01, 1.80e+03, 8.00e+02, 6.00e-01,
        2.40e+01, 2.65e+00],
       [2.60e+01, 4.30e+00, 1.50e+01, 1.00e+00, 1.60e+01, 1.40e+00,


In [114]:
df.iloc[0]

match_id         6447015200
match_seq_num    5388383445
radiant_win            True
start_time       1645660804
duration               2502
avg_mmr              3667.0
num_mmr                 3.0
lobby_type                7
game_mode                22
avg_rank_tier            47
num_rank_tier             6
cluster                 172
hero0_pick             94.0
hero1_pick             87.0
hero2_pick             40.0
hero3_pick             71.0
hero4_pick             22.0
hero5_pick            128.0
hero6_pick             48.0
hero7_pick             57.0
hero8_pick              4.0
hero9_pick            137.0
hero0_slot              0.0
hero1_slot              1.0
hero2_slot              2.0
hero3_slot              3.0
hero4_slot              4.0
hero5_slot            128.0
hero6_slot            129.0
hero7_slot            130.0
hero8_slot            131.0
hero9_slot            132.0
Name: 0, dtype: object

In [71]:
team = 'dire'
match = df.iloc[0]
slots = match[[f'hero{i}_slot' for i in range(0,10)]]

# Based on team: select slots filter , assign match result
if team=='radiant':
    slots = slots[slots<5] # radiant slots 0,1,2,3,4
    #heroes = match[['hero0','hero1','hero2','hero3','hero4']].values
    y = float(match['radiant_win'])
elif team=='dire':
    slots = slots[slots>127] # radiant slots 0,1,2,3,4
    #heroes = match[['hero5','hero6','hero7','hero8','hero9']].values
    y = 1-float(match['radiant_win'])
else:
    raise ValueError('Incorrect team specified in "get_graph" matchod. Use "radiant" or "dire"')

# Pick columns based on team slots determined above
pick_columns = [f'{herox[:5]}_pick' for herox in slots.index]
heroes = match[pick_columns].values

In [72]:
heroes

array([94.0, 87.0, 40.0, 71.0, 22.0], dtype=object)

In [83]:
x = df_features.loc[heroes].iloc[:,4:]

In [84]:
x

Unnamed: 0_level_0,strength_rate,agility,agility_rate,intelligence,intelligence_rate,movement_speed,armor,damage_min,damage_max,range,attack_speed,base_attack_time,attack_point,attack_backswing,vision_day,vision_night,turn_rate,collision_size,health_regen
hero_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
94,1.5,22,3.4,23,3.4,275,2.67,48,54,600,100,1.7,0.5,0.6,1800,800,0.6,24,1.95
87,2.4,15,1.4,20,2.9,295,2.5,49,53,625,100,1.7,0.4,0.5,1800,800,0.6,24,2.35
40,2.1,24,2.8,19,1.8,280,4.0,42,45,450,115,1.7,0.3,0.7,1800,800,0.6,24,2.15
71,3.0,17,1.7,14,1.8,295,4.83,59,69,150,100,1.9,0.6,0.3,1800,800,0.6,24,4.05
22,2.1,11,1.2,22,3.3,315,3.83,55,63,380,100,1.7,0.35,0.55,1800,800,0.6,24,2.15


In [49]:
test = test[[f'hero{i}_slot' for i in range(0,10)]]
test

hero0_slot      0.0
hero1_slot      1.0
hero2_slot      2.0
hero3_slot      3.0
hero4_slot      4.0
hero5_slot    128.0
hero6_slot    129.0
hero7_slot    130.0
hero8_slot    131.0
hero9_slot    132.0
Name: 0, dtype: object

In [50]:
test>127

hero0_slot    False
hero1_slot    False
hero2_slot    False
hero3_slot    False
hero4_slot    False
hero5_slot     True
hero6_slot     True
hero7_slot     True
hero8_slot     True
hero9_slot     True
Name: 0, dtype: bool

In [33]:
test = test[test<5]
test

hero0_slot    0.0
hero1_slot    1.0
hero2_slot    2.0
hero3_slot    3.0
hero4_slot    4.0
Name: 0, dtype: object

In [45]:
placeholders = [f'{placeholder[:5]}_pick' for placeholder in test.index]
placeholders

['hero0_pick', 'hero1_pick', 'hero2_pick', 'hero3_pick', 'hero4_pick']

In [44]:
temp_p = placeholders[0:3]
temp_p

['hero0', 'hero1', 'hero2']