# Modelling sample

In [1]:
from importlib import reload
import pandas as pd
import numpy as np
from spektral.data import Dataset, Graph, BatchLoader
import dataset
reload(dataset)
import pickle
import os

from spektral.layers import GATConv, GCNConv, GlobalAvgPool, GlobalMaxPool, GlobalSumPool, GlobalAttentionPool
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import binary_accuracy
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import keras
import random

In [2]:
# Load graph dataset
path = '../data/graphs_v1_scaled/graphs_v1_scaled_0-49999.pkl'
file = open(path,'rb')
graphs = pickle.load(file)
    

In [None]:
# Remove attack_backswing feature
for i in range(0,len(graphs)):
    graphs[i].x = graphs[i].x[:,[0,1,2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19]] # remove attack_backswing as a feature
print('Attack backswing feature removed')

In [None]:
# Train/valid/test split
np.random.seed(10)
idxs = np.random.permutation(len(graphs))
split_va, split_te = int(0.64 * len(graphs)), int(0.8 * len(graphs)) #64% training, 16% validation, 20% test
idx_tr, idx_va, idx_te = np.split(idxs, [split_va, split_te])
data_tr = graphs[idx_tr]
data_va = graphs[idx_va]
data_te = graphs[idx_te]

print(f'Training data: {np.round(len(data_tr)/len(graphs),2)*100}%')
print(f'Validation data: {np.round(len(data_va)/len(graphs),2)*100}%')
print(f'Test data: {np.round(len(data_te)/len(graphs),2)*100}%')

In [None]:
# Configuration
learning_rate = 0.001  # Learning rate
epochs = 50  # Number of training epochs
batch_size = 256  # Batch size

# Data loaders
loader_tr = BatchLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_va = BatchLoader(data_va, batch_size=batch_size)
loader_te = BatchLoader(data_te, batch_size=batch_size)

# Build model
class Net_sample(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(19, activation='relu')
        self.flatten = Flatten()
        self.dense = Dense(graphs.n_labels, activation="sigmoid")

    def call(self, inputs):
        x, a = inputs
        x = self.conv1([x, a])
        x = self.flatten(x)
        x = self.dense(x) 
        return x

# Train model
model_sample = Net_sample()
optimizer = Adam(learning_rate=learning_rate)
loss_fn = BinaryCrossentropy()
model_sample.compile(optimizer=optimizer, loss=loss_fn, metrics=['binary_accuracy'])
fit_log_sample = model_sample.fit(loader_tr.load(), steps_per_epoch=loader_tr.steps_per_epoch, epochs=epochs, validation_data=loader_va.load(), validation_steps=loader_va.steps_per_epoch)

# Save training record
epochs = list(range(1,len(fit_log_sample.history['binary_accuracy'])+1))
training_accuracy = fit_log_sample.history['binary_accuracy']
validation_accuracy = fit_log_sample.history['val_binary_accuracy']
pd.DataFrame({'epoch': epochs, 'training_accuracy':training_accuracy, 'validation_accuracy':validation_accuracy}).to_csv('../models/fit_records/model_sample_accuracy.csv', index=False)

In [None]:
# Optional: save model
model_sample.save(f'../models/model_sample.tf', save_format='tf')