In [20]:
# Install the needed packages
%pip install pathlib pandas numpy tensorflow tensorboard

Note: you may need to restart the kernel to use updated packages.


In [21]:
# Import packages.
import os
import signal
import tempfile
from pathlib import Path
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorboard as tb

In [22]:
# Define parameters.
LOGS_PATH = '../logs'
DATA_PATH = '../data/own_data_preparation.csv'
RESULT_COLUMN = 'result_team1'
DUMMY_COLUMNS = ['team1', 'team2']
NORMAILZE_COLUMNS = ['points', 'squard',
    'average_age', 'average_market_value_in_euro', 'total_market_value_in_euro',
    'rank_last_season', 'points_last_season_all', 'points_last_season']
WITH_TEAMS = True
VALIDATION_SIZE = 0.2
ACTIVATION_FUNCTIONS = ['sigmoid', 'tanh', 'relu', 'relu6', 'softsign', 'linear']
LOSS_FUNCTION = 'sparse_categorical_crossentropy'
OPTIMIZER_FUNCTION = 'Adam'
METRICS = 'sparse_categorical_accuracy'
EPOCHS = 300
BATCH_SIZE = 32
TITLE = 'activation_with_teams'

TENSORBOARD_PROCESS = 'tensorboard'
TENSORBOARD_SERVER = 'localhost'
TENSORBOARD_PORT = 6008

LOGS_PATH = os.path.join(LOGS_PATH, TITLE)

In [23]:
# Set up tensorboard.
%load_ext tensorboard
logs = Path(LOGS_PATH)
logs.mkdir(mode=0o777, parents=True, exist_ok=True)

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [24]:
# Load the data set.
data = pd.read_csv(DATA_PATH, delimiter=',', decimal='.')

# Create the difference between team1 and team2 for each feature.
for column in NORMAILZE_COLUMNS:
    data[column] = data[column + '_team1'] - data[column + '_team2']

# Normalize column using the max value for each column.
for column in NORMAILZE_COLUMNS:
    max_value = data[column].max()
    data[column] = data[column].apply(lambda x: x / max_value)

# Get the wanted columns and create dummy columns for the teams.
if WITH_TEAMS:
    data = data.loc[:, DUMMY_COLUMNS + NORMAILZE_COLUMNS + [RESULT_COLUMN]]
    data = pd.get_dummies(data, columns=DUMMY_COLUMNS)
else:
    data = data.loc[:, NORMAILZE_COLUMNS + [RESULT_COLUMN]]

# Drop rows with nan values.
data = data.dropna()

# Split the data set.
split_index = int(len(data) * VALIDATION_SIZE)
data = data.sample(frac=1)
data_train = data[split_index:]
data_valid = data[:split_index]

par_train = data_train.loc[:, data_train.columns != RESULT_COLUMN]
res_train = data_train.loc[:, [RESULT_COLUMN]]
par_valid = data_valid.loc[:, data_train.columns != RESULT_COLUMN]
res_valid = data_valid.loc[:, [RESULT_COLUMN]]

if WITH_TEAMS:
    dummies = [key for key in par_train.keys() if 'team' in key]

data.head()

Unnamed: 0,points,squard,average_age,average_market_value_in_euro,total_market_value_in_euro,rank_last_season,points_last_season_all,points_last_season,result_team1,team1_1. FC Nürnberg,...,team2_SC Freiburg,team2_SC Paderborn,team2_SV Darmstadt 98,team2_Schalke 04,team2_SpVgg Greuther Fürth,team2_TSG Hoffenheim,team2_VfB Stuttgart,team2_VfL Bochum,team2_VfL Wolfsburg,team2_Werder Bremen
16,-0.017857,0.333333,-0.046512,-0.003854,0.010333,-0.45,0.147727,0.106557,1.0,0,...,0,0,0,0,0,1,0,0,0,0
705,-0.142857,-0.111111,0.418605,-0.067116,-0.087855,0.45,-0.488636,-0.064083,2.0,0,...,0,0,0,0,0,0,0,0,0,0
160,-0.160714,0.444444,-0.209302,-0.262042,-0.265767,0.2,-0.318182,-0.025501,1.0,0,...,0,0,0,0,0,0,0,0,0,0
465,-0.196429,-0.166667,0.348837,-0.033719,-0.050782,0.2,-0.193182,-0.015483,1.0,0,...,0,0,0,0,0,1,0,0,0,0
1575,-0.267857,0.277778,-0.325581,-1.0,-0.907312,0.95,-0.886364,-0.213115,0.0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
# Convert data frames to numpy arrays.
res_train = res_train.to_numpy()
res_valid = res_valid.to_numpy()

print(par_train.shape)
print(res_train.shape)
print(par_valid.shape)
print(res_valid.shape)

(1347, 62)
(1347, 1)
(336, 62)
(336, 1)


In [26]:
def defineModel(input_dim, activation):
    # Define the model
    nodes = input_dim*2-1
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(nodes, activation=activation, input_dim=input_dim),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(3, activation='softmax')
    ])

    # Compile the model.
    model.compile(loss=LOSS_FUNCTION, optimizer=OPTIMIZER_FUNCTION, metrics=[METRICS])

    return model


In [27]:
def runModel(model, x_train, x_valid, y_train, y_valid, title):
    # Define callback function for writing data for tensorBoard
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=os.path.join(LOGS_PATH, title), histogram_freq=1)

    # Run the model.
    history = model.fit(
        x=x_train,
        y=y_train,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_data=(x_valid, y_valid),
        callbacks=[tensorboard_callback],
        verbose=0
    )

    return history

In [28]:
sel_train = par_train.to_numpy()
sel_valid = par_valid.to_numpy()

for activation in ACTIVATION_FUNCTIONS:
    runModel(defineModel(sel_train.shape[1], activation), sel_train, sel_valid, res_train, res_valid, activation)

In [29]:
# Kill the existing tensorboard process and delete the tensorflow temp folder. After this start a new tensorboard process.
try:    
    # Iterating through each instance of the process.
    for line in os.popen("ps ax | grep " + TENSORBOARD_PROCESS + " | grep -v grep"):
        fields = line.split()
            
        # Extracting Process ID from the output.
        pid = fields[0]
            
        # Terminating process.
        os.kill(int(pid), signal.SIGKILL)

    # Delete tensorboard temp folder.
    tb_temp_folder = os.path.join(tempfile.gettempdir(), '.tensorboard-info')
    os.system("rm -rf "+tb_temp_folder)
    print("Process Successfully terminated") 
except Exception as e:
    print(e)

%tensorboard --logdir $LOGS_PATH --host $TENSORBOARD_SERVER --port $TENSORBOARD_PORT

Process Successfully terminated
