# Modelling 1

### Library and data imports

In [1]:
from importlib import reload
import pandas as pd
import numpy as np
from spektral.data import Dataset, Graph
import dataset
reload(dataset)
import pickle
import os

from spektral.data import BatchLoader
from spektral.layers import GATConv, GCNConv, GlobalAvgPool, GlobalMaxPool, GlobalSumPool, GlobalAttentionPool
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import binary_accuracy
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.config import list_physical_devices
from tensorflow.python.client import device_lib
import random

In [2]:
# Load combined data
df_raw = pd.read_csv('../data/combined.csv')
df = df_raw.copy()

# Load hero feature data
df_features = pd.read_csv('../data/features.csv')
df_features = df_features.set_index('hero_id')

# Load standard filter
df_filters = pd.read_csv('../models/filters.csv')

In [3]:
# Load graph dataset 50000 matches at a time
dir = '../data/graphs_v1_scaled/'
count = 0
total = len(df)
step = 50000

for i in range(0,int(np.ceil(total/step))):
    start = i*step
    end = start+step-1 if (start+step)<total else total-1
    path = dir+f'graphs_v1_scaled_{start}-{end}.pkl'
    print(path)
    file = open(path,'rb')
    if i==0:
        graphs = pickle.load(file)
    else:
        graphs = graphs + pickle.load(file)

../data/graphs_v1_scaled/graphs_v1_scaled_0-49999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_50000-99999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_100000-149999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_150000-199999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_200000-249999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_250000-299999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_300000-349999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_350000-399999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_400000-449999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_450000-499999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_500000-549999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_550000-599999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_600000-649999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_650000-699999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_700000-749999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_750000-799999.pkl
../data/graphs_v1_scaled/graphs_v1_scaled_800000

In [4]:
def get_filt_idx(filt):
    '''Returns indices of desired matches given a boolean array filter e.g. True, False, True returns [0,2]'''
    # DotaV1 data handling (two graphs for every match: 0-49999 radiant, 0-49999 dire, 50000-99999 radiant, etc.)
    step = 50000
    filt_vals = []
    for i in range(0,int(np.ceil(len(filt)/step))):
        start = i*step
        end = start+step
        # Add filters for match range twice, as matches repeated every 50000
        filt_vals = np.append(filt_vals, filt[start:end])
        filt_vals = np.append(filt_vals, filt[start:end])

    # Get indices of True values in filters
    filt_idx = [i for i, x in enumerate(filt_vals) if x]
    return filt_idx

### Standard Filtering

In [5]:
# Scaling - data now pre-scaled
# # MinMax Scaler model to normalise features from 0-1
# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler()
# scaler.fit(df_features.iloc[:,3:].to_numpy())

# # Loop through each graph and scale feature matrix and drop attack_backswing feature
# print('Scaling graph dataset feature matrices:')
# for i in range(0,len(graphs_filt)):
#     if(i%100000==0):
#         print(i)
#     graphs_filt[i].x = scaler.transform(graphs_filt[i].x) # scale feature matrix
#     graphs_filt[i].x = np.delete(graphs_filt[i].x, 14, 1) # remove attack_backswing as a feature

In [6]:
# Filter graph dataset
filt = df_filters['filt_std'].values
filt_idx = get_filt_idx(filt)
graphs_filt = graphs[filt_idx]
print('Standard filtering complete')

Standard filtering complete


In [7]:
# Remove attack_backswing feature
for i in range(0,len(graphs_filt)):
    # if(i%100000==0):
    graphs_filt[i].x = graphs_filt[i].x[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19]] # remove attack_backswing as a feature
print('Attack backswing feature removed')

Attack backswing feature removed


In [8]:
# Train/valid/test split
d = graphs_filt # Graph data

np.random.seed(10)
idxs = np.random.permutation(len(d))
split_va, split_te = int(0.64 * len(d)), int(0.8 * len(d)) #64% training, 16% validation, 20% test
idx_tr, idx_va, idx_te = np.split(idxs, [split_va, split_te])
data_tr = d[idx_tr]
data_va = d[idx_va]
data_te = d[idx_te]

print(f'Training data: {np.round(len(data_tr)/len(graphs_filt),2)*100}%')
print(f'Validation data: {np.round(len(data_va)/len(graphs_filt),2)*100}%')
print(f'Test data: {np.round(len(data_te)/len(graphs_filt),2)*100}%')

Training data: 64.0%
Validation data: 16.0%
Test data: 20.0%


In [1]:
# Confirm GPUs are being identified (requires tf environment, not the pipenv dotaprediction)
# print("Num GPUs Available: ", list_physical_devices('GPU'))
# print(device_lib.list_local_devices())

### Model 1.0 - Baseline

In [99]:
# Configuration
learning_rate = 0.001  # Learning rate
epochs = 50  # Number of training epochs
es_patience = 10  # Patience for early stopping
batch_size = 256  # Batch size

# Data loaders
loader_tr = BatchLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_va = BatchLoader(data_va, batch_size=batch_size)
loader_te = BatchLoader(data_te, batch_size=batch_size)

# Build model
class Net_1_0(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(19, activation='relu')
        self.flatten = Flatten()
        self.dense = Dense(d.n_labels, activation="sigmoid")

    def call(self, inputs):
        x, a = inputs
        # x.shape is [n_batches, 5, n_ features]
        # a.shape is [n_batches, 5, 5]

        x = self.conv1([x, a])
        # x.shape is [n_batches, 5, channels] where I have set channels = features (20)

        x = self.flatten(x)
        # x.shape is [n_batches, 5*channels]

        x = self.dense(x)
        # output.shape is [n_batches, 1]
        
        return x

model_1_0 = Net_1_0()
optimizer = Adam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy()
model_1_0.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_accuracy'])

fit_log_1_0 = model_1_0.fit(loader_tr.load(), steps_per_epoch=loader_tr.steps_per_epoch, epochs=epochs, validation_data=loader_va.load(), validation_steps=loader_va.steps_per_epoch)


you are shuffling a 'DotaV1' object which is not a subclass of 'Sequence'; `shuffle` is not guaranteed to behave correctly. E.g., non-numpy array/tensor objects with view semantics may contain duplicates after shuffling.



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [100]:
import plotly.graph_objects as go

epochs = list(range(1,len(fit_log_1_0.history['binary_accuracy'])+1))
training_accuracy = fit_log_1_0.history['binary_accuracy']
validation_accuracy = fit_log_1_0.history['val_binary_accuracy']

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=epochs,
    y=training_accuracy,
    mode='lines',
    name='Training'))
fig.add_trace(go.Scatter(
    x=epochs,
    y=validation_accuracy,
    mode='lines',
    name='Validation'))
fig.update_layout(
    template='none',
    xaxis_title='Epochs',
    yaxis_title="Accuracy")
fig.update_xaxes(tick0=0, dtick=1)
# fig.update_yaxes(range=[0.5,1])
fig.show()
fig.write_image(f'../images/model_1_0_accuracy.png', scale=5)
pd.DataFrame({'epoch': epochs, 'training_accuracy':training_accuracy, 'validation_accuracy':validation_accuracy}).to_csv('../images/model_1_0_accuracy.csv', index=False)

In [109]:
filehandler = open(f'../models/fit_log_1_0.pkl','wb')
pickle.dump(fit_log_1_0, filehandler)
filehandler = open(f'../models/model_1_0.pkl','wb')
pickle.dump(model_1_0, filehandler)

INFO:tensorflow:Assets written to: ram://639f241c-8626-491f-b1e7-68ee9353ea29/assets
INFO:tensorflow:Assets written to: ram://d5d9e920-b04a-42b1-9c2f-a6666cc80e66/assets


### Model 1.1 - GATConv

In [101]:
# Configuration
learning_rate = 0.001  # Learning rate
epochs = 50  # Number of training epochs
es_patience = 10  # Patience for early stopping
batch_size = 256  # Batch size

# Data loaders
loader_tr = BatchLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_va = BatchLoader(data_va, batch_size=batch_size)
loader_te = BatchLoader(data_te, batch_size=batch_size)

# Build model
class Net_1_1(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = GATConv(19, activation='relu')
        self.flatten = Flatten()
        self.dense = Dense(d.n_labels, activation="sigmoid")

    def call(self, inputs):
        x, a = inputs
        # x.shape is [n_batches, 5, n_ features]
        # a.shape is [n_batches, 5, 5]

        x = self.conv1([x, a])
        # x.shape is [n_batches, 5, channels] where I have set channels = features (20)

        x =self.flatten(x)
        # x.shape is [n_batches, 5*channels]

        x = self.dense(x)
        # output.shape is [n_batches, 1]
        
        return x

model_1_1 = Net_1_1()
optimizer = Adam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy()
model_1_1.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_accuracy'])

fit_log_1_1 = model_1_1.fit(loader_tr.load(), steps_per_epoch=loader_tr.steps_per_epoch, epochs=epochs, validation_data=loader_va.load(), validation_steps=loader_va.steps_per_epoch)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [102]:
import plotly.graph_objects as go

epochs = list(range(1,len(fit_log_1_1.history['binary_accuracy'])+1))
training_accuracy = fit_log_1_1.history['binary_accuracy']
validation_accuracy = fit_log_1_1.history['val_binary_accuracy']

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=epochs,
    y=training_accuracy,
    mode='lines',
    name='Training'))
fig.add_trace(go.Scatter(
    x=epochs,
    y=validation_accuracy,
    mode='lines',
    name='Validation'))
fig.update_layout(
    template='none',
    xaxis_title='Epochs',
    yaxis_title="Accuracy")
fig.update_xaxes(tick0=0, dtick=1)
# fig.update_yaxes(range=[0.5,1])
fig.show()
fig.write_image(f'../images/model_1_1_accuracy.png', scale=5)
pd.DataFrame({'epoch': epochs, 'training_accuracy':training_accuracy, 'validation_accuracy':validation_accuracy}).to_csv('../images/model_1_1_accuracy.csv', index=False)

In [110]:
filehandler = open(f'../models/fit_log_1_1.pkl','wb')
pickle.dump(fit_log_1_1, filehandler)
filehandler = open(f'../models/model_1_1.pkl','wb')
pickle.dump(model_1_1, filehandler)



INFO:tensorflow:Assets written to: ram://956608be-fc57-4333-aef7-11622328db6f/assets


INFO:tensorflow:Assets written to: ram://956608be-fc57-4333-aef7-11622328db6f/assets


INFO:tensorflow:Assets written to: ram://597eca29-d923-4855-b598-ef1201bbb3d0/assets


INFO:tensorflow:Assets written to: ram://597eca29-d923-4855-b598-ef1201bbb3d0/assets


### Model 1.2 Average Pooling

In [103]:
# Configuration
learning_rate = 0.001  # Learning rate
epochs = 50  # Number of training epochs
es_patience = 10  # Patience for early stopping
batch_size = 256  # Batch size

# Data loaders
loader_tr = BatchLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_va = BatchLoader(data_va, batch_size=batch_size)
loader_te = BatchLoader(data_te, batch_size=batch_size)

# Build model
class Net_1_2(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(19, activation='relu')
        self.pool1 = GlobalAvgPool()
        self.flatten = Flatten()
        self.dense = Dense(d.n_labels, activation="sigmoid")

    def call(self, inputs):
        x, a = inputs
        # x.shape is [n_batches, 5, n_ features]
        # a.shape is [n_batches, 5, 5]

        x = self.conv1([x, a])
        # x.shape is [n_batches, 5, channels] where I have set channels = features (20)

        x = self.pool1(x)
        # x.shape is [n_batches, channels]

        x = self.dense(x)
        # output.shape is [n_batches, 1]
        
        return x

model_1_2 = Net_1_2()
optimizer = Adam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy()
model_1_2.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_accuracy'])

fit_log_1_2 = model_1_2.fit(loader_tr.load(), steps_per_epoch=loader_tr.steps_per_epoch, epochs=epochs, validation_data=loader_va.load(), validation_steps=loader_va.steps_per_epoch)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [104]:
import plotly.graph_objects as go

epochs = list(range(1,len(fit_log_1_2.history['binary_accuracy'])+1))
training_accuracy = fit_log_1_2.history['binary_accuracy']
validation_accuracy = fit_log_1_2.history['val_binary_accuracy']

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=epochs,
    y=training_accuracy,
    mode='lines',
    name='Training'))
fig.add_trace(go.Scatter(
    x=epochs,
    y=validation_accuracy,
    mode='lines',
    name='Validation'))
fig.update_layout(
    template='none',
    xaxis_title='Epochs',
    yaxis_title="Accuracy")
fig.update_xaxes(tick0=0, dtick=1)
# fig.update_yaxes(range=[0.5,1])
fig.show()
fig.write_image(f'../images/model_1_2_accuracy.png', scale=5)
pd.DataFrame({'epoch': epochs, 'training_accuracy':training_accuracy, 'validation_accuracy':validation_accuracy}).to_csv('../images/model_1_2_accuracy.csv', index=False)

In [111]:
filehandler = open(f'../models/fit_log_1_2.pkl','wb')
pickle.dump(fit_log_1_2, filehandler)
filehandler = open(f'../models/model_1_2.pkl','wb')
pickle.dump(model_1_2, filehandler)





INFO:tensorflow:Assets written to: ram://29ccb6c4-b7a8-4122-a360-a938ff6d948a/assets


INFO:tensorflow:Assets written to: ram://29ccb6c4-b7a8-4122-a360-a938ff6d948a/assets

Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.







INFO:tensorflow:Assets written to: ram://59ee2602-97ad-4795-9064-2876f2390e32/assets


INFO:tensorflow:Assets written to: ram://59ee2602-97ad-4795-9064-2876f2390e32/assets

Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.



### Model 1.3 - Max Pooling

In [105]:
# Configuration
learning_rate = 0.001  # Learning rate
epochs = 50  # Number of training epochs
es_patience = 10  # Patience for early stopping
batch_size = 256  # Batch size

# Data loaders
loader_tr = BatchLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_va = BatchLoader(data_va, batch_size=batch_size)
loader_te = BatchLoader(data_te, batch_size=batch_size)

# Build model
class Net_1_3(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(19, activation='relu')
        self.pool1 = GlobalMaxPool()
        self.dense = Dense(d.n_labels, activation="sigmoid")

    def call(self, inputs):
        x, a = inputs
        # x.shape is [n_batches, 5, n_ features]
        # a.shape is [n_batches, 5, 5]

        x = self.conv1([x, a])
        # x.shape is [n_batches, 5, channels] where I have set channels = features (20)

        x = self.pool1(x)
        # x.shape is [n_batches, channels]

        x = self.dense(x)
        # output.shape is [n_batches, 1]
        
        return x

model_1_3 = Net_1_3()
optimizer = Adam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy()
model_1_3.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_accuracy'])

fit_log_1_3 = model_1_3.fit(loader_tr.load(), steps_per_epoch=loader_tr.steps_per_epoch, epochs=epochs, validation_data=loader_va.load(), validation_steps=loader_va.steps_per_epoch)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [106]:
import plotly.graph_objects as go

epochs = list(range(1,len(fit_log_1_3.history['binary_accuracy'])+1))
training_accuracy = fit_log_1_3.history['binary_accuracy']
validation_accuracy = fit_log_1_3.history['val_binary_accuracy']

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=epochs,
    y=training_accuracy,
    mode='lines',
    name='Training'))
fig.add_trace(go.Scatter(
    x=epochs,
    y=validation_accuracy,
    mode='lines',
    name='Validation'))
fig.update_layout(
    template='none',
    xaxis_title='Epochs',
    yaxis_title="Accuracy")
fig.update_xaxes(tick0=0, dtick=1)
# fig.update_yaxes(range=[0.5,1])
fig.show()
fig.write_image(f'../images/model_1_3_accuracy.png', scale=5)
pd.DataFrame({'epoch': epochs, 'training_accuracy':training_accuracy, 'validation_accuracy':validation_accuracy}).to_csv('../images/model_1_3_accuracy.csv', index=False)

In [112]:
filehandler = open(f'../models/fit_log_1_3.pkl','wb')
pickle.dump(fit_log_1_3, filehandler)
filehandler = open(f'../models/model_1_3.pkl','wb')
pickle.dump(model_1_3, filehandler)

INFO:tensorflow:Assets written to: ram://e9df102b-d0e4-4da4-8204-d62605a8d8ac/assets


INFO:tensorflow:Assets written to: ram://e9df102b-d0e4-4da4-8204-d62605a8d8ac/assets

Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.



INFO:tensorflow:Assets written to: ram://76b5acdc-4d22-4a01-9499-96668dc37710/assets


INFO:tensorflow:Assets written to: ram://76b5acdc-4d22-4a01-9499-96668dc37710/assets

Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.



### Model 1.4 - Sum Pooling

In [10]:
# Configuration
learning_rate = 0.001  # Learning rate
epochs = 50  # Number of training epochs
es_patience = 10  # Patience for early stopping
batch_size = 256  # Batch size

# Data loaders
loader_tr = BatchLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_va = BatchLoader(data_va, batch_size=batch_size)
loader_te = BatchLoader(data_te, batch_size=batch_size)

# Build model
class Net_1_4(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(19, activation='relu')
        self.pool1 = GlobalSumPool()
        self.dense = Dense(d.n_labels, activation="sigmoid")

    def call(self, inputs):
        x, a = inputs
        # x.shape is [n_batches, 5, n_ features]
        # a.shape is [n_batches, 5, 5]

        x = self.conv1([x, a])
        # x.shape is [n_batches, 5, channels] where I have set channels = features (20)

        x = self.pool1(x)
        # x.shape is [n_batches, channels]

        x = self.dense(x)
        # output.shape is [n_batches, 1]
        
        return x

model_1_4 = Net_1_4()
optimizer = Adam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy()
model_1_4.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_accuracy'])

fit_log_1_4 = model_1_4.fit(loader_tr.load(), steps_per_epoch=loader_tr.steps_per_epoch, epochs=epochs, validation_data=loader_va.load(), validation_steps=loader_va.steps_per_epoch)

  np.random.shuffle(a)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [11]:
import plotly.graph_objects as go

epochs = list(range(1,len(fit_log_1_4.history['binary_accuracy'])+1))
training_accuracy = fit_log_1_4.history['binary_accuracy']
validation_accuracy = fit_log_1_4.history['val_binary_accuracy']

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=epochs,
    y=training_accuracy,
    mode='lines',
    name='Training'))
fig.add_trace(go.Scatter(
    x=epochs,
    y=validation_accuracy,
    mode='lines',
    name='Validation'))
fig.update_layout(
    template='none',
    xaxis_title='Epochs',
    yaxis_title="Accuracy")
fig.update_xaxes(tick0=0, dtick=1)
# fig.update_yaxes(range=[0.5,1])
fig.show()
fig.write_image(f'../images/model_1_4_accuracy.png', scale=5)
pd.DataFrame({'epoch': epochs, 'training_accuracy':training_accuracy, 'validation_accuracy':validation_accuracy}).to_csv('../images/model_1_4_accuracy.csv', index=False)

In [12]:
filehandler = open(f'../models/fit_log_1_4.pkl','wb')
pickle.dump(fit_log_1_4, filehandler)
filehandler = open(f'../models/model_1_4.pkl','wb')
pickle.dump(model_1_4, filehandler)

INFO:tensorflow:Assets written to: ram://7b05f3e4-e107-475b-a0a1-0b31bdb9debb/assets



Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.



INFO:tensorflow:Assets written to: ram://7d89233f-612d-4f45-b1d6-15caf5b93542/assets



Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.



### Model 1.5 - Attention Pooling

In [13]:
# Configuration
learning_rate = 0.001  # Learning rate
epochs = 50  # Number of training epochs
es_patience = 10  # Patience for early stopping
batch_size = 256  # Batch size

# Data loaders
loader_tr = BatchLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_va = BatchLoader(data_va, batch_size=batch_size)
loader_te = BatchLoader(data_te, batch_size=batch_size)

# Build model
class Net_1_5(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(19, activation='relu')
        self.pool1 = GlobalAttentionPool(19)
        self.dense = Dense(d.n_labels, activation="sigmoid")

    def call(self, inputs):
        x, a = inputs
        # x.shape is [n_batches, 5, n_ features]
        # a.shape is [n_batches, 5, 5]

        x = self.conv1([x, a])
        # x.shape is [n_batches, 5, channels] where I have set channels = features (20)

        x = self.pool1(x)
        # x.shape is [n_batches, channels]

        x = self.dense(x)
        # output.shape is [n_batches, 1]
        
        return x

model_1_5 = Net_1_5()
optimizer = Adam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy()
model_1_5.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_accuracy'])

fit_log_1_5 = model_1_5.fit(loader_tr.load(), steps_per_epoch=loader_tr.steps_per_epoch, epochs=epochs, validation_data=loader_va.load(), validation_steps=loader_va.steps_per_epoch)


you are shuffling a 'DotaV1' object which is not a subclass of 'Sequence'; `shuffle` is not guaranteed to behave correctly. E.g., non-numpy array/tensor objects with view semantics may contain duplicates after shuffling.



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [14]:
import plotly.graph_objects as go

epochs = list(range(1,len(fit_log_1_5.history['binary_accuracy'])+1))
training_accuracy = fit_log_1_5.history['binary_accuracy']
validation_accuracy = fit_log_1_5.history['val_binary_accuracy']

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=epochs,
    y=training_accuracy,
    mode='lines',
    name='Training'))
fig.add_trace(go.Scatter(
    x=epochs,
    y=validation_accuracy,
    mode='lines',
    name='Validation'))
fig.update_layout(
    template='none',
    xaxis_title='Epochs',
    yaxis_title="Accuracy")
fig.update_xaxes(tick0=0, dtick=1)
# fig.update_yaxes(range=[0.5,1])
fig.show()
fig.write_image(f'../images/model_1_5_accuracy.png', scale=5)
pd.DataFrame({'epoch': epochs, 'training_accuracy':training_accuracy, 'validation_accuracy':validation_accuracy}).to_csv('../images/model_1_5_accuracy.csv', index=False)

In [15]:
filehandler = open(f'../models/fit_log_1_5.pkl','wb')
pickle.dump(fit_log_1_5, filehandler)
filehandler = open(f'../models/model_1_5.pkl','wb')
pickle.dump(model_1_5, filehandler)



INFO:tensorflow:Assets written to: ram://ee72e0e3-707c-4145-8268-8824588a1440/assets


INFO:tensorflow:Assets written to: ram://ee72e0e3-707c-4145-8268-8824588a1440/assets


INFO:tensorflow:Assets written to: ram://ad7d20c4-5181-4e3a-abca-786392bddfb6/assets


INFO:tensorflow:Assets written to: ram://ad7d20c4-5181-4e3a-abca-786392bddfb6/assets


### Model 1.6 GATConv + Average Pooling

In [16]:
# Configuration
learning_rate = 0.001  # Learning rate
epochs = 50  # Number of training epochs
es_patience = 10  # Patience for early stopping
batch_size = 256  # Batch size

# Data loaders
loader_tr = BatchLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_va = BatchLoader(data_va, batch_size=batch_size)
loader_te = BatchLoader(data_te, batch_size=batch_size)

# Build model
class Net_1_6(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = GATConv(19, activation='relu')
        self.pool1 = GlobalAvgPool()
        self.dense = Dense(d.n_labels, activation="sigmoid")

    def call(self, inputs):
        x, a = inputs
        # x.shape is [n_batches, 5, n_ features]
        # a.shape is [n_batches, 5, 5]

        x = self.conv1([x, a])
        # x.shape is [n_batches, 5, channels] where I have set channels = features (20)

        x = self.pool1(x)
        # x.shape is [n_batches, channels]

        x = self.dense(x)
        # output.shape is [n_batches, 1]
        
        return x

model_1_6 = Net_1_6()
optimizer = Adam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy()
model_1_6.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_accuracy'])

fit_log_1_6 = model_1_6.fit(loader_tr.load(), steps_per_epoch=loader_tr.steps_per_epoch, epochs=epochs, validation_data=loader_va.load(), validation_steps=loader_va.steps_per_epoch)


you are shuffling a 'DotaV1' object which is not a subclass of 'Sequence'; `shuffle` is not guaranteed to behave correctly. E.g., non-numpy array/tensor objects with view semantics may contain duplicates after shuffling.



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [19]:
import plotly.graph_objects as go

epochs = list(range(1,len(fit_log_1_6.history['binary_accuracy'])+1))
training_accuracy = fit_log_1_6.history['binary_accuracy']
validation_accuracy = fit_log_1_6.history['val_binary_accuracy']

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=epochs,
    y=training_accuracy,
    mode='lines',
    name='Training'))
fig.add_trace(go.Scatter(
    x=epochs,
    y=validation_accuracy,
    mode='lines',
    name='Validation'))
fig.update_layout(
    template='none',
    xaxis_title='Epochs',
    yaxis_title="Accuracy")
fig.update_xaxes(tick0=0, dtick=1)
# fig.update_yaxes(range=[0.5,1])
fig.show()
fig.write_image(f'../images/model_1_6_accuracy.png', scale=5)
pd.DataFrame({'epoch': epochs, 'training_accuracy':training_accuracy, 'validation_accuracy':validation_accuracy}).to_csv('../images/model_1_6_accuracy.csv', index=False)

In [18]:
filehandler = open(f'../models/fit_log_1_6.pkl','wb')
pickle.dump(fit_log_1_6, filehandler)
filehandler = open(f'../models/model_1_6.pkl','wb')
pickle.dump(model_1_6, filehandler)



INFO:tensorflow:Assets written to: ram://8742329e-711a-454c-b91d-20d06fba93f4/assets


INFO:tensorflow:Assets written to: ram://8742329e-711a-454c-b91d-20d06fba93f4/assets

Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.



INFO:tensorflow:Assets written to: ram://abbef555-889d-4fe7-9229-fe444bf287db/assets


INFO:tensorflow:Assets written to: ram://abbef555-889d-4fe7-9229-fe444bf287db/assets

Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.



### Model 1.7 - Feature selection

In [18]:
# Loop through each graph and scale feature matrix and drop following features: (col numbers are after removing attack_backswing (14) earlier)

# base_attack_time (12)
# attack_point (13)
# vision_day (14)
# vision_night (15)
# turn_rate (16)
# collision_size (17)

print('Selecting specific columns:')
for i in range(0,len(graphs_filt)): # match 0 has only 15 features, reason not known, skipping this
    if(i%100000==0):
        print(i)

    graphs_filt[i].x = graphs_filt[i].x[:,[0,1,2,3,4,5,6,7,8,9,10,11,18]]

Selecting specific columns:
0
100000
200000
300000
400000
500000
600000
700000
800000
900000
1000000
1100000
1200000
1300000
1400000
1500000
1600000
1700000
1800000
1900000
2000000
2100000
2200000
2300000
2400000
2500000
2600000
2700000
2800000
2900000
3000000
3100000
3200000
3300000
3400000
3500000
3600000
3700000
3800000
3900000
4000000
4100000
4200000
4300000
4400000
4500000
4600000
4700000
4800000
4900000
5000000
5100000
5200000
5300000
5400000
5500000
5600000
5700000
5800000
5900000
6000000
6100000
6200000
6300000
6400000
6500000
6600000
6700000
6800000
6900000
7000000
7100000
7200000
7300000
7400000
7500000
7600000
7700000
7800000
7900000
8000000
8100000
8200000
8300000
8400000
8500000
8600000
8700000
8800000
8900000
9000000
9100000
9200000
9300000
9400000
9500000
9600000
9700000
9800000
9900000
10000000
10100000


In [19]:
# Train/valid/test split
d_fs = graphs_filt # Graph data

# np.random.seed(10)
idxs = np.random.permutation(len(d_fs))
split_va, split_te = int(0.64 * len(d_fs)), int(0.8 * len(d_fs)) #64% training, 16% validation, 20% test
# idx_tr, idx_va, idx_te = np.split(idxs, [split_va, split_te]) # use indices from earlier split
data_tr_fs = d_fs[idx_tr]
data_va_fs = d_fs[idx_va]
data_te_fs = d_fs[idx_te]

print(f'Training data: {np.round(len(data_tr_fs)/len(graphs_filt),2)*100}%')
print(f'Validation data: {np.round(len(data_va_fs)/len(graphs_filt),2)*100}%')
print(f'Test data: {np.round(len(data_te_fs)/len(graphs_filt),2)*100}%')

Training data: 64.0%
Validation data: 16.0%
Test data: 20.0%


In [20]:
# Configuration
learning_rate = 0.001  # Learning rate
epochs = 50  # Number of training epochs
es_patience = 10  # Patience for early stopping
batch_size = 256  # Batch size

# Data loaders
loader_tr = BatchLoader(data_tr_fs, batch_size=batch_size, epochs=epochs)
loader_va = BatchLoader(data_va_fs, batch_size=batch_size)
loader_te = BatchLoader(data_te_fs, batch_size=batch_size)

# Build model
class Net_1_7(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = GATConv(13, activation='relu')
        self.pool1 = GlobalAvgPool()
        self.dense = Dense(d.n_labels, activation="sigmoid")

    def call(self, inputs):
        x, a = inputs
        # x.shape is [n_batches, 5, n_ features]
        # a.shape is [n_batches, 5, 5]

        x = self.conv1([x, a])
        # x.shape is [n_batches, 5, channels] where I have set channels = features (20)

        x = self.pool1(x)
        # x.shape is [n_batches, channels]

        x = self.dense(x)
        # output.shape is [n_batches, 1]
        
        return x

model_1_7 = Net_1_7()
optimizer = Adam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy()
model_1_7.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_accuracy'])

fit_log_1_7 = model_1_7.fit(loader_tr.load(), steps_per_epoch=loader_tr.steps_per_epoch, epochs=epochs, validation_data=loader_va.load(), validation_steps=loader_va.steps_per_epoch)

  np.random.shuffle(a)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
import plotly.graph_objects as go

epochs = list(range(1,len(fit_log_1_7.history['binary_accuracy'])+1))
training_accuracy = fit_log_1_7.history['binary_accuracy']
validation_accuracy = fit_log_1_7.history['val_binary_accuracy']

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=epochs,
    y=training_accuracy,
    mode='lines',
    name='Training'))
fig.add_trace(go.Scatter(
    x=epochs,
    y=validation_accuracy,
    mode='lines',
    name='Validation'))
fig.update_layout(
    template='none',
    xaxis_title='Epochs',
    yaxis_title="Accuracy")
fig.update_xaxes(tick0=0, dtick=1)
# fig.update_yaxes(range=[0.5,1])
# fig.show()
fig.write_image(f'../images/model_1_7_accuracy.png', scale=5)
pd.DataFrame({'epoch': epochs, 'training_accuracy':training_accuracy, 'validation_accuracy':validation_accuracy}).to_csv('../images/model_1_7_accuracy.csv', index=False)

### Model 1.8 Hyperparameter Sweep

In [11]:
# Configuration
learning_rate = 0.001  # Learning rate
epochs = 40  # Number of training epochs
es_patience = 10  # Patience for early stopping
batch_size = 256  # Batch size

# Data loaders
loader_tr = BatchLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_va = BatchLoader(data_va, batch_size=batch_size)
loader_te = BatchLoader(data_te, batch_size=batch_size)

# Build model
class Net_1_8(Model):
    def __init__(self, channels):
        super().__init__()
        self.conv1 = GATConv(channels, activation='relu')
        self.pool1 = GlobalAvgPool()
        self.dense = Dense(d.n_labels, activation="sigmoid")

    def call(self, inputs):
        x, a = inputs
        # x.shape is [n_batches, 5, n_ features]
        # a.shape is [n_batches, 5, 5]

        x = self.conv1([x, a])
        # x.shape is [n_batches, 5, channels] where I have set channels = features (20)

        x = self.pool1(x)
        # x.shape is [n_batches, channels]

        x = self.dense(x)
        # output.shape is [n_batches, 1]
        
        return x

# Loop through range of num channels hyperparameter
channels = [5,10,15,19,25]
for i, channel in enumerate(channels):
    # Compile and train model
    model_1_8 = Net_1_8(channel)
    optimizer = Adam(learning_rate=learning_rate)
    loss_fn = BinaryCrossentropy()
    model_1_8.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_accuracy'])
    fit_log_1_8 = model_1_8.fit(loader_tr.load(), steps_per_epoch=loader_tr.steps_per_epoch, epochs=epochs, validation_data=loader_va.load(), validation_steps=loader_va.steps_per_epoch)

    # Save training and validation results
    epochs = list(range(1,len(fit_log_1_8.history['binary_accuracy'])+1))
    training_accuracy = fit_log_1_8.history['binary_accuracy']
    validation_accuracy = fit_log_1_8.history['val_binary_accuracy']
    pd.DataFrame({'epoch': epochs, 'training_accuracy':training_accuracy, 'validation_accuracy':validation_accuracy}).to_csv(f'../images/model_1_8_{i+1}_accuracy.csv', index=False)

    # Pickle model and training+validation log
    filehandler = open(f'../models/fit_log_1_8_{i}.pkl','wb')
    pickle.dump(fit_log_1_8, filehandler)
    filehandler = open(f'../models/model_1_8_{i}.pkl','wb')
    pickle.dump(model_1_8, filehandler)

  np.random.shuffle(a)


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40

### Model 1.9 MMR ranges

In [None]:
# REMEMBER to choose optimal channels!

# Configuration
learning_rate = 0.001  # Learning rate
epochs = 50  # Number of training epochs
es_patience = 10  # Patience for early stopping
batch_size = 256  # Batch size

# Build model
class Net_1_9(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = GATConv(19, activation='relu')
        self.pool1 = GlobalAvgPool()
        self.dense = Dense(d.n_labels, activation="sigmoid")

    def call(self, inputs):
        x, a = inputs
        # x.shape is [n_batches, 5, n_ features]
        # a.shape is [n_batches, 5, 5]

        x = self.conv1([x, a])
        # x.shape is [n_batches, 5, channels] where I have set channels = features (20)

        x = self.pool1(x)
        # x.shape is [n_batches, channels]

        x = self.dense(x)
        # output.shape is [n_batches, 1]
        
        return x

mmrs = [1,2,3,4,5,6]

for group in mmrs:
    # Filter data for current mmr group
    filt = df_filters['filt_std'].values & df_filters[f'filt_mmr_{group}'].values
    step = 50000
    filt_vals = []
    for i in range(0,int(np.ceil(len(filt)/step))):
        start = i*step
        end = start+step
        filt_vals = np.append(filt_vals, filt[start:end])
        filt_vals = np.append(filt_vals, filt[start:end])

    # Get indices of True values in filters
    filt_idx = [i for i, x in enumerate(filt_vals) if x]

    # Select subset of graphs
    graphs_filt_mmr = graphs[filt_idx]

    # Train/valid/test split
    d = graphs_filt_mmr
    np.random.seed(10)
    idxs = np.random.permutation(len(d))
    split_va, split_te = int(0.7 * len(d)), int(len(d)) #70% training, 30% validation
    idx_tr, idx_va, idx_te = np.split(idxs, [split_va, split_te])
    data_tr = d[idx_tr]
    data_va = d[idx_va]

    # Data loaders
    loader_tr = BatchLoader(data_tr, batch_size=batch_size, epochs=epochs)
    loader_va = BatchLoader(data_va, batch_size=batch_size)

    # Compile and train model
    model_1_9 = Net_1_9()
    optimizer = Adam(learning_rate=learning_rate)
    loss_fn = BinaryCrossentropy()
    model_1_9.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_accuracy'])
    fit_log_1_9 = model_1_9.fit(loader_tr.load(), steps_per_epoch=loader_tr.steps_per_epoch, epochs=epochs, validation_data=loader_va.load(), validation_steps=loader_va.steps_per_epoch)

    # Save training and validation results
    epochs = list(range(1,len(fit_log_1_9.history['binary_accuracy'])+1))
    training_accuracy = fit_log_1_9.history['binary_accuracy']
    validation_accuracy = fit_log_1_9.history['val_binary_accuracy']
    pd.DataFrame({'epoch': epochs, 'training_accuracy':training_accuracy, 'validation_accuracy':validation_accuracy}).to_csv(f'../images/model_1_9_{group}_accuracy.csv', index=False)

    # Pickle model and training+validation log
    filehandler = open(f'../models/fit_log_1_9_{i}.pkl','wb')
    pickle.dump(fit_log_1_9, filehandler)
    filehandler = open(f'../models/model_1_9_{i}.pkl','wb')
    pickle.dump(model_1_9, filehandler)

### Extra useful code

In [117]:
loss = model.evaluate(loader_te.load(), steps=loader_te.steps_per_epoch)
print(f"Test accuracy: {np.round(loss[1],4)*100}%")

Test accuracy: 50.9%


  np.random.shuffle(a)


In [141]:
loader_single = BatchLoader(data_va, batch_size=128)
loss = model.evaluate(loader_single.load(), steps=loader_single.steps_per_epoch)
print(f"Validation accuracy: {np.round(loss[1],4)*100}%")

Test accuracy: 57.99999999999999%


  np.random.shuffle(a)
