In [None]:
# Install the needed packages
%pip install pathlib pandas numpy tensorflow tensorboard

In [None]:
# Import packages.
import os
import signal
import tempfile
from pathlib import Path
from itertools import combinations
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorboard as tb

In [None]:
# Define parameters.
LOGS_PATH = '../logs'
DATA_PATH = '../data/own_data_preparation.csv'
RESULT_COLUMN = 'result_team1'
DUMMY_COLUMNS = ['team1', 'team2']
NORMAILZE_COLUMNS = ['points', 'squard',
    'average_age', 'average_market_value_in_euro', 'total_market_value_in_euro',
    'rank_last_season', 'points_last_season_all', 'points_last_season']
VALIDATION_SIZE = 0.2
LOSS_FUNCTION = 'sparse_categorical_crossentropy'
OPTIMIZER_FUNCTION = 'Adam'
METRICS = 'sparse_categorical_accuracy'
EPOCHS = 100
BATCH_SIZE = 32
TITLE = 'column_selection'
MIN_COLUMNS = 1

TENSORBOARD_PROCESS = 'tensorboard'
TENSORBOARD_SERVER = 'localhost'
TENSORBOARD_PORT = 6008

LOGS_PATH = os.path.join(LOGS_PATH, TITLE)

In [None]:
# Set up tensorboard.
%load_ext tensorboard
logs = Path(LOGS_PATH)
logs.mkdir(mode=0o777, parents=True, exist_ok=True)

In [None]:
# Load the data set.
data = pd.read_csv(DATA_PATH, delimiter=',', decimal='.')

# Create the difference between team1 and team2 for each feature.
for column in NORMAILZE_COLUMNS:
    data[column] = data[column + '_team1'] - data[column + '_team2']

# Normalize column using the max value for each column.
for column in NORMAILZE_COLUMNS:
    max_value = data[column].max()
    data[column] = data[column].apply(lambda x: x / max_value)

# Get the wanted columns and create dummy columns for the teams.
data = data.loc[:, DUMMY_COLUMNS + NORMAILZE_COLUMNS + [RESULT_COLUMN]]
data = pd.get_dummies(data, columns=DUMMY_COLUMNS)

# Drop rows with nan values.
data = data.dropna()

# Split the data set.
split_index = int(len(data) * VALIDATION_SIZE)
data = data.sample(frac=1)
data_train = data[split_index:]
data_valid = data[:split_index]

par_train = data_train.loc[:, data_train.columns != RESULT_COLUMN]
res_train = data_train.loc[:, [RESULT_COLUMN]]
par_valid = data_valid.loc[:, data_train.columns != RESULT_COLUMN]
res_valid = data_valid.loc[:, [RESULT_COLUMN]]

dummies = [key for key in par_train.keys() if 'team' in key]

data.head()

In [None]:
# Convert data frames to numpy arrays.
res_train = res_train.to_numpy()
res_valid = res_valid.to_numpy()

print(par_train.shape)
print(res_train.shape)
print(par_valid.shape)
print(res_valid.shape)

In [None]:
def defineModel(input_dim):
    # Define the model
    nodes = input_dim*2-1
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(nodes, activation='tanh', input_dim=input_dim),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(3, activation='softmax')
    ])

    # Compile the model.
    model.compile(loss=LOSS_FUNCTION, optimizer=OPTIMIZER_FUNCTION, metrics=[METRICS])

    return model


In [None]:
def runModel(model, x_train, x_valid, y_train, y_valid, title):
    # Define callback function for writing data for tensorBoard
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=os.path.join(LOGS_PATH, title), histogram_freq=1)

    # Run the model.
    history = model.fit(
        x=x_train,
        y=y_train,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_data=(x_valid, y_valid),
        callbacks=[tensorboard_callback],
        verbose=0
    )

    return history

In [None]:
# Run combinations of two columns without teams.
list_combinations = []
for i in range(MIN_COLUMNS, len(NORMAILZE_COLUMNS)):
    list_combinations += combinations(NORMAILZE_COLUMNS, i)

for columns in list_combinations:
    columns = list(columns)
    sel_train = par_train.loc[:, columns]
    sel_valid = par_valid.loc[:, columns]

    sel_train = sel_train.to_numpy()
    sel_valid = sel_valid.to_numpy()

    name = str(len(columns))+'_'+'--'.join(columns)+'_without_teams'
    runModel(defineModel(sel_train.shape[1]), sel_train, sel_valid, res_train, res_valid, name)

In [None]:
# Run combinations of columns with teams.
list_combinations = []
for i in range(MIN_COLUMNS, len(NORMAILZE_COLUMNS)):
    list_combinations += combinations(NORMAILZE_COLUMNS, i)

for columns in list_combinations:
    columns = list(columns)
    sel_train = par_train.loc[:, dummies + columns]
    sel_valid = par_valid.loc[:, dummies + columns]

    sel_train = sel_train.to_numpy()
    sel_valid = sel_valid.to_numpy()

    name = str(len(columns))+'_'+'--'.join(columns)+'_with_teams'
    runModel(defineModel(sel_train.shape[1]), sel_train, sel_valid, res_train, res_valid, name)

In [None]:
# Only team columns.
sel_train = par_train.loc[:, dummies]
sel_valid = par_valid.loc[:, dummies]

sel_train = sel_train.to_numpy()
sel_valid = sel_valid.to_numpy()

runModel(defineModel(sel_train.shape[1]), sel_train, sel_valid, res_train, res_valid, '0_only_teams')

In [None]:
# Only not team columns.
sel_train = par_train.loc[:, NORMAILZE_COLUMNS]
sel_valid = par_valid.loc[:, NORMAILZE_COLUMNS]

sel_train = sel_train.to_numpy()
sel_valid = sel_valid.to_numpy()

runModel(defineModel(sel_train.shape[1]), sel_train, sel_valid, res_train, res_valid, '0_only_not_teams')

In [None]:
# All columns.
sel_train = par_train.to_numpy()
sel_valid = par_valid.to_numpy()

runModel(defineModel(sel_train.shape[1]), sel_train, sel_valid, res_train, res_valid, '0_all_columns')

In [None]:
# Kill the existing tensorboard process and delete the tensorflow temp folder. After this start a new tensorboard process.
try:    
    # Iterating through each instance of the process.
    for line in os.popen("ps ax | grep " + TENSORBOARD_PROCESS + " | grep -v grep"):
        fields = line.split()
            
        # Extracting Process ID from the output.
        pid = fields[0]
            
        # Terminating process.
        os.kill(int(pid), signal.SIGKILL)

    # Delete tensorboard temp folder.
    tb_temp_folder = os.path.join(tempfile.gettempdir(), '.tensorboard-info')
    os.system("rm -rf "+tb_temp_folder)
    print("Process Successfully terminated") 
except Exception as e:
    print(e)

%tensorboard --logdir $LOGS_PATH --host $TENSORBOARD_SERVER --port $TENSORBOARD_PORT