In [1]:
import pandas as pd
import numpy as np
from spektral.data import Dataset, Graph
import pickle
import os

In [2]:
# Load combined data
df_raw = pd.read_csv('../data/combined.csv')
df = df_raw.copy()

# Load hero feature data
df_features = pd.read_csv('../data/features.csv')
df_features = df_features.set_index('hero_id')

In [22]:
class DotaDataset(Dataset):
    def __init__(self, df_combined: pd.DataFrame, features: pd.DataFrame,  **kwargs):
        '''Generates a list of Graph objects
        df_combined: match result (radiant_win), hero picks (hero0_pick ...), hero slots (hero0_slot...) [dataframe]
        features: all hero ids (hero_id), desired features (feature1,2,3,etc., name not important) [dataframe]
        '''
        graphs_radiant = [self.get_graph(index, match, features, 'radiant') for index, match in df_combined.iterrows()]
        graphs_dire = [self.get_graph(index, match, features, 'dire') for index, match in df_combined.iterrows()]
        self.graphs = graphs_radiant + graphs_dire
        # self.graphs = graphs_radiant
        super().__init__(**kwargs)
        
    def get_graph(self, index, match, features, team):
        '''Generates a single graph based on a single match'''
        # Status
        if (index+1)%1000==0:
            print(f'Graph {team} {index+1}')

        # Extract hero ids and match result, depending on radiant or dire perspective
        # Reduce match df to columns: hero0_slot, ..., hero9_slot
        slots = match[[f'hero{i}_slot' for i in range(0,10)]]

        # Based on team: select slots filter , assign match result
        if team=='radiant':
            slots = slots[slots<5] # radiant slots 0,1,2,3,4
            #heroes = match[['hero0','hero1','hero2','hero3','hero4']].values
            y = float(match['radiant_win'])
        elif team=='dire':
            slots = slots[slots>127] # dire slots 128,129,130,131,132
            #heroes = match[['hero5','hero6','hero7','hero8','hero9']].values
            y = 1-float(match['radiant_win'])
        else:
            raise ValueError('Incorrect team specified in "get_graph" matchod. Use "radiant" or "dire"')

        # Pick columns based on team slots determined above
        pick_columns = [f'{herox[:5]}_pick' for herox in slots.index]
        heroes = match[pick_columns].values
        heroes = [hero for hero in heroes if hero!=0] # remove hero id 0 (these are invalid)
        
        # Create feature matrix
        x = features.loc[heroes].iloc[:,3:]
        x = x.to_numpy(dtype='float')
        
        # Adjacency matrix
        a = np.ones([5,5], dtype='float32')

        g = Graph(x=x, a=a, y=y)
        return g
        
    def read(self):
        return self.graphs

In [28]:
# Load graph dataset 50000 matches at a time
dir = '../data/graphs_v1/'
count = 0
total = len(df)
step = 50000

for i in range(0,int(np.ceil(total/step))):
    start = i*step
    end = start+step-1 if (start+step)<total else total-1
    path = dir+f'graphs_v1_{start}-{end}.pkl'
    print(path)
    file = open(path,'rb')
    if i==0:
        graphs = pickle.load(file)
    else:
        graphs = graphs + pickle.load(file)
    # if i>0:
    #     break
# for filename in os.listdir('../data/graphs_v1/'):
#     print(filename)
#     # f = os.path.join(directory, filename)
    # # checking if it is a file
    # if os.path.isfile(f):
    #     print(f)

../data/graphs_v1/graphs_v1_0-49999.pkl
../data/graphs_v1/graphs_v1_50000-99999.pkl
../data/graphs_v1/graphs_v1_100000-149999.pkl
../data/graphs_v1/graphs_v1_150000-199999.pkl
../data/graphs_v1/graphs_v1_200000-249999.pkl
../data/graphs_v1/graphs_v1_250000-299999.pkl
../data/graphs_v1/graphs_v1_300000-349999.pkl
../data/graphs_v1/graphs_v1_350000-399999.pkl
../data/graphs_v1/graphs_v1_400000-449999.pkl
../data/graphs_v1/graphs_v1_450000-499999.pkl
../data/graphs_v1/graphs_v1_500000-549999.pkl
../data/graphs_v1/graphs_v1_550000-599999.pkl
../data/graphs_v1/graphs_v1_600000-649999.pkl
../data/graphs_v1/graphs_v1_650000-699999.pkl
../data/graphs_v1/graphs_v1_700000-749999.pkl
../data/graphs_v1/graphs_v1_750000-799999.pkl
../data/graphs_v1/graphs_v1_800000-849999.pkl
../data/graphs_v1/graphs_v1_850000-899999.pkl
../data/graphs_v1/graphs_v1_900000-949999.pkl
../data/graphs_v1/graphs_v1_950000-999999.pkl
../data/graphs_v1/graphs_v1_1000000-1049999.pkl
../data/graphs_v1/graphs_v1_1050000-1099

In [30]:
len(graphs)/2

5600752.0

In [36]:
sys.getsizeof(graphs[0].y)

24