<a href="https://colab.research.google.com/github/DirkStulgies/mlwtSportsPrediction/blob/main/playground/own_data_column_selection.ipynb"
 target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import packages.
import os
import signal
import tempfile
from pathlib import Path
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorboard as tb

2022-01-08 21:17:36.795656: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-01-08 21:17:36.795806: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
# Define parameters.
LOGS_PATH = '../logs'
DATA_PATH = '../data/own_data_preparation_dirk.csv'
RESULT_COLUMN = 'result_team1'
DUMMY_COLUMNS = ['team1', 'team2']
NORMAILZE_COLUMNS = ['points', 'squard',
    'average_age', 'average_market_value_in_euro', 'total_market_value_in_euro',
    'rank_last_season', 'points_last_season_all', 'points_last_season']
VALIDATION_SIZE = 0.2
LOSS_FUNCTION = 'sparse_categorical_crossentropy'
OPTIMIZER_FUNCTION = 'Adam'
METRICS = 'sparse_categorical_accuracy'
EPOCHS = 500
BATCH_SIZE = 32
TITLE = 'net'

TENSORBOARD_PROCESS = 'tensorboard'
TENSORBOARD_SERVER = 'localhost'
TENSORBOARD_PORT = 6008

LOGS_PATH = os.path.join(LOGS_PATH, TITLE)

In [3]:
# Set up tensorboard.
%load_ext tensorboard
logs = Path(LOGS_PATH)
logs.mkdir(mode=0o777, parents=True, exist_ok=True)

In [4]:
# Load the data set and drop rows with nan values.
data = pd.read_csv(DATA_PATH, delimiter=',', decimal='.')
data = data.dropna()

# Create the difference between team1 and team2 for each feature.
for column in NORMAILZE_COLUMNS:
    data[column] = data[column + '_team1'] - data[column + '_team2']

# Normalize column using the max value for each column.
for column in NORMAILZE_COLUMNS:
    max_value = data[column].max()
    data[column] = data[column].apply(lambda x: x / max_value)

# Get the wanted columns and create dummy columns for the teams.
data = data.loc[:, DUMMY_COLUMNS + NORMAILZE_COLUMNS + [RESULT_COLUMN]]
data = pd.get_dummies(data, columns=DUMMY_COLUMNS)

# Split the data set.
split_index = int(len(data) * VALIDATION_SIZE)
data = data.sample(frac=1)
data_train = data[split_index:]
data_valid = data[:split_index]

par_train = data_train.loc[:, data_train.columns != RESULT_COLUMN]
res_train = data_train.loc[:, [RESULT_COLUMN]]
par_valid = data_valid.loc[:, data_train.columns != RESULT_COLUMN]
res_valid = data_valid.loc[:, [RESULT_COLUMN]]

dummies = [key for key in par_train.keys() if 'team' in key]

data.head()

Unnamed: 0,points,squard,average_age,average_market_value_in_euro,total_market_value_in_euro,rank_last_season,points_last_season_all,points_last_season,result_team1,team1_1. FC Union Berlin,...,team2_Hertha Berlin,team2_Mainz,team2_RB Leipzig,team2_SC Freiburg,team2_SV Darmstadt 98,team2_Schalke 04,team2_TSG Hoffenheim,team2_VfB Stuttgart,team2_VfL Wolfsburg,team2_Werder Bremen
1340,-0.035714,0.058824,0.147059,-0.016263,-0.016432,0.6,-0.230769,-0.04,2,0,...,1,0,0,0,0,0,0,0,0,0
2442,0.107143,-1.0,0.117647,0.113176,0.06374,0.066667,-0.038462,-0.008,2,0,...,0,0,0,0,0,0,0,0,0,0
5201,0.25,-0.294118,0.411765,-0.177564,-0.26027,0.066667,0.0,0.0,1,1,...,1,0,0,0,0,0,0,0,0,0
1032,0.035714,-0.235294,-0.558824,0.09459,0.106401,-0.533333,0.326923,0.010625,1,0,...,0,0,0,0,0,0,0,0,0,0
1467,0.071429,-0.117647,0.117647,0.024892,0.024525,-0.533333,0.211538,0.022,2,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
# Convert data frames to numpy arrays.
res_train = res_train.to_numpy()
res_valid = res_valid.to_numpy()

print(par_train.shape)
print(res_train.shape)
print(par_valid.shape)
print(res_valid.shape)

(4777, 54)
(4777, 1)
(1194, 54)
(1194, 1)


In [6]:
def defineModelDefault(input_dim, optimizer, loss):
    # Define the model
    nodes = input_dim*2-1
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(nodes, activation='relu', input_dim=input_dim),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(3, activation='softmax')
    ])

    # Compile the model.
    model.compile(loss=loss, optimizer=optimizer, metrics=[METRICS])

    return model


In [7]:
def defineModelDecrease(input_dim, optimizer, loss):
    # Define the model
    nodes = input_dim*2-1
    nodes2 = int((nodes - 3) / 2)
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(nodes, activation='relu', input_dim=input_dim),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(nodes2, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(3, activation='softmax')
    ])

    # Compile the model.
    model.compile(loss=loss, optimizer=optimizer, metrics=[METRICS])

    return model


In [8]:
def defineModelLstm(input_dim, optimizer, loss):
    # Define the model
    nodes = input_dim*2-1
    model = tf.keras.models.Sequential([
        tf.keras.layers.LSTM(nodes, return_sequences=True),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(nodes, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(3, activation='softmax')
    ])

    # Compile the model.
    model.compile(loss=loss, optimizer=optimizer, metrics=[METRICS])

    return model

In [9]:
def defineModelBiLstm(input_dim, optimizer, loss):
    # Define the model
    nodes = input_dim*2-1
    model = tf.keras.models.Sequential([
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(nodes, return_sequences=True)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(nodes, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(3, activation='softmax')
    ])

    # Compile the model.
    model.compile(loss=loss, optimizer=optimizer, metrics=[METRICS])

    return model

In [10]:
def defineModelConv(input_dim, optimizer, loss):
    # Define the model
    nodes = input_dim*2-1
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv1D(nodes, kernel_size=5, strides=1, padding='causal', activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(nodes, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(3, activation='softmax')
    ])

    # Compile the model.
    model.compile(loss=loss, optimizer=optimizer, metrics=[METRICS])

    return model

In [11]:
def defineModelMix(input_dim, optimizer, loss):
    # Define the model
    nodes = input_dim*2-1
    nodes2 = int((nodes - 3) / 2)
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv1D(nodes, kernel_size=5, strides=1, padding='causal', activation='relu'),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(nodes, return_sequences=True)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(nodes, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(nodes2, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(3, activation='softmax')
    ])

    # Compile the model.
    model.compile(loss=loss, optimizer=optimizer, metrics=[METRICS])

    return model

In [12]:
def runModel(model, x_train, x_valid, y_train, y_valid, title):
    # Define callback function for writing data for tensorBoard
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=os.path.join(LOGS_PATH, title), histogram_freq=1)

    # Run the model.
    history = model.fit(
        x=x_train,
        y=y_train,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_data=(x_valid, y_valid),
        callbacks=[tensorboard_callback],
        verbose=1
    )

    return history

In [13]:
# Try default model.
sel_train = par_train.to_numpy()
sel_valid = par_valid.to_numpy()

runModel(defineModelDefault(par_train.shape[1], OPTIMIZER_FUNCTION, LOSS_FUNCTION), sel_train, sel_valid, res_train, res_valid, 'default')

2022-01-08 21:17:38.958151: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-01-08 21:17:38.958184: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (u50sj3d10xnl): /proc/driver/nvidia/version does not exist
2022-01-08 21:17:38.958714: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x7fbfcc3f97c0>

In [14]:
# Try decreasing model.
sel_train = par_train.to_numpy()
sel_valid = par_valid.to_numpy()

runModel(defineModelDecrease(par_train.shape[1], OPTIMIZER_FUNCTION, LOSS_FUNCTION), sel_train, sel_valid, res_train, res_valid, 'decreasing')

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x7fbfbc442fa0>

In [15]:
sel_train = par_train.to_numpy()
sel_valid = par_valid.to_numpy()

sel_train = sel_train.reshape(len(sel_train), 1, sel_train.shape[1])
sel_valid = sel_valid.reshape(len(sel_valid), 1, sel_valid.shape[1])

runModel(defineModelLstm(par_train.shape[1], OPTIMIZER_FUNCTION, LOSS_FUNCTION), sel_train, sel_valid, res_train, res_valid, 'LSTM')

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x7fbfbc256130>

In [16]:
sel_train = par_train.to_numpy()
sel_valid = par_valid.to_numpy()

sel_train = sel_train.reshape(len(sel_train), 1, sel_train.shape[1])
sel_valid = sel_valid.reshape(len(sel_valid), 1, sel_valid.shape[1])

runModel(defineModelBiLstm(par_train.shape[1], OPTIMIZER_FUNCTION, LOSS_FUNCTION), sel_train, sel_valid, res_train, res_valid, 'BILSTM')

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x7fbfb4533190>

In [17]:
sel_train = par_train.to_numpy()
sel_valid = par_valid.to_numpy()

sel_train = sel_train.reshape(len(sel_train), 1, sel_train.shape[1])
sel_valid = sel_valid.reshape(len(sel_valid), 1, sel_valid.shape[1])

runModel(defineModelConv(par_train.shape[1], OPTIMIZER_FUNCTION, LOSS_FUNCTION), sel_train, sel_valid, res_train, res_valid, 'CONV')

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x7fbfb4439ca0>

In [18]:
sel_train = par_train.to_numpy()
sel_valid = par_valid.to_numpy()

sel_train = sel_train.reshape(len(sel_train), 1, sel_train.shape[1])
sel_valid = sel_valid.reshape(len(sel_valid), 1, sel_valid.shape[1])

runModel(defineModelMix(par_train.shape[1], OPTIMIZER_FUNCTION, LOSS_FUNCTION), sel_train, sel_valid, res_train, res_valid, 'MIX')

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x7fbf941a43a0>

In [19]:
# Kill the existing tensorboard process and delete the tensorflow temp folder. After this start a new tensorboard process.
try:    
    # Iterating through each instance of the process.
    for line in os.popen("ps ax | grep " + TENSORBOARD_PROCESS + " | grep -v grep"):
        fields = line.split()
            
        # Extracting Process ID from the output.
        pid = fields[0]
            
        # Terminating process.
        os.kill(int(pid), signal.SIGKILL)

    # Delete tensorboard temp folder.
    tb_temp_folder = os.path.join(tempfile.gettempdir(), '.tensorboard-info')
    os.system("rm -rf "+tb_temp_folder)
    print("Process Successfully terminated") 
except Exception as e:
    print(e)

%tensorboard --logdir $LOGS_PATH --host $TENSORBOARD_SERVER --port $TENSORBOARD_PORT

Process Successfully terminated
