In [4]:
import pandas as pd
import numpy as np
from spektral.data import Dataset, Graph
import pickle
from importlib import reload
import dataset
reload(dataset)

pd.set_option('display.max_columns', None)
pd.set_option('mode.chained_assignment', None) # default='warn'

In [2]:
# Load combined data
df_raw = pd.read_csv('../data/combined.csv')
df = df_raw.copy()

# Load hero feature data
df_features = pd.read_csv('../data/features.csv')
df_features = df_features.set_index('hero_id')

### Graph Data Creation

#### DotaV1

In [None]:
# Generate DotaV1 dataset for 50000 matches at a time
count = 0
total = len(df)
step = 50000

for i in range(0,int(np.ceil(total/step))):
    start = i*step
    end = start+step if (start+step)<total else total
    df_current = df.iloc[start:end]
    print(f'Start: {start} End: {end-1}')
    graphs_current = dataset.DotaV1(df_current, df_features)
    
    filehandler = open(f'../data/graphs_v1/graphs_v1_{start}-{end-1}.pkl','wb')
    pickle.dump(graphs_current, filehandler)

#### DotaV2

In [None]:
# Generate DotaV2 dataset for 50000 matches at a time
count = 0
total = len(df)
step = 50000

for i in range(0,int(np.ceil(total/step))):
    start = i*step
    end = start+step if (start+step)<total else total
    df_current = df.iloc[start:end]
    print(f'Start: {start} End: {end-1}')
    graphs_current = dataset.DotaV2(df_current, df_features)
    
    filehandler = open(f'../data/graphs_v2/graphs_v2_{start}-{end-1}.pkl','wb')
    pickle.dump(graphs_current, filehandler)

### Feature Scaling

#### DotaV1

In [None]:
# Load DotaV1 graphs
dir = '../data/graphs_v1/'
count = 0
total = len(df)
step = 50000

for i in range(0,int(np.ceil(total/step))):
    start = i*step
    end = start+step-1 if (start+step)<total else total-1
    path = dir+f'graphs_v1_{start}-{end}.pkl'
    print(path)
    file = open(path,'rb')
    if i==0:
        graphs = pickle.load(file)
    else:
        graphs = graphs + pickle.load(file)

print('DotaV1 graph dataset loaded')

In [None]:
# MinMax Scaler model to normalise features from 0-1
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(df_features.iloc[:,3:].to_numpy())

# Loop through each graph and scale feature matrix and drop attack_backswing feature
# print('Scaling graph dataset feature matrices:')   
for i in range(0,len(graphs)):
    if(i%100000==0):
        print(i)
    graphs[i].x = scaler.transform(graphs[i].x) # scale feature matrix

In [None]:
# Save scaled graphs
for i in range(0,int(np.ceil(total/step))):
    start = i*step
    end = start+step if (start+step)<total else total
    print(f'Start: {start} End: {end-1}')
    graphs_current = graphs[2*start:2*end] #2* because it needs 0-100000 to include radiant 50000 and dire 50000
    
    filehandler = open(f'../data/graphs_v1_scaled/graphs_v1_scaled_{start}-{end-1}.pkl','wb')
    pickle.dump(graphs_current, filehandler)

#### DotaV2

In [None]:
# Load DotaV2 graphs
dir = '../data/graphs_v2/'
count = 0
total = len(df)
step = 50000

for i in range(0,int(np.ceil(total/step))):
    start = i*step
    end = start+step-1 if (start+step)<total else total-1
    path = dir+f'graphs_v2_{start}-{end}.pkl'
    print(path)
    file = open(path,'rb')
    if i==0:
        graphs = pickle.load(file)
    else:
        graphs = graphs + pickle.load(file)

print('DotaV2 graph dataset loaded')

In [None]:
# MinMax Scaler model to normalise features from 0-1
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(df_features.iloc[:,3:].to_numpy())

# Loop through each graph and scale feature matrix and drop attack_backswing feature
# print('Scaling graph dataset feature matrices:')   
for i in range(0,len(graphs)):
    if(i%100000==0):
        print(i)
    graphs[i].x = scaler.transform(graphs[i].x) # scale feature matrix

In [None]:
# Save scaled graphs
for i in range(0,int(np.ceil(total/step))):
    start = i*step
    end = start+step if (start+step)<total else total
    print(f'Start: {start} End: {end-1}')
    graphs_current = graphs[start:end]
    
    filehandler = open(f'../data/graphs_v2_scaled/graphs_v2_scaled_{start}-{end-1}.pkl','wb')
    pickle.dump(graphs_current, filehandler)