In [None]:
# Mount gdrive 
from google.colab import drive
drive._mount('/content/gdrive')

In [None]:
# Install the needed packages
%pip install pathlib2 pandas numpy tensorflow tensorboard --quiet

# Import packages.
import os
import signal
import tempfile
from pathlib2 import Path
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorboard as tb

In [None]:
# Path for data files.
DATA_FOLDER = '/content/gdrive/MyDrive/data/'
FIVETHIRTYEIGHT_PROJECTS_FILE = os.path.join(DATA_FOLDER, 'spi_matches.csv')
MARKET_VALUE_FILE = os.path.join(DATA_FOLDER, 'market_value_soccer_german_federal_league.txt')
POINTS_FILE = os.path.join(DATA_FOLDER, 'points_soccer_german_federal_league.txt')
PREPARED_DATA_FILE = os.path.join(DATA_FOLDER, 'own_data.csv')

# Column names and values.
FILTER_COLUMN = 'league'
FILTER_VALUE = 'German Bundesliga'
RESULT_COLUMN = 'result_team1'
RESULT_HEADERS = ['loss_team1', 'draw', 'win_team1']
RESULT_INFO_COLUMNS = ['season', 'date', 'team1', 'team2']
VALUE_COLUMNS = [
    'average_age',
    'points_last_season',
    'points_this_season',
    'rank_last_season',
    'squad',
    'total_market_value_in_euro'
    ]
VALUE_HOME_DRAW = 1
VALUE_HOME_LOST = 0
VALUE_HOME_WON = 2

# Parameters for using the neural net.
BATCH_SIZE = 32
EPOCHS = 100
LEARNING_RATE = 0.001
LOGS_PATH = '/content/logs'
LOSS_FUNCTION = 'sparse_categorical_crossentropy'
METRICS = 'sparse_categorical_accuracy'
OPTIMIZER_FUNCTION = 'Adam'
VALIDATION_SIZE = 0.2

In [None]:
# Load the data.,
data = pd.read_csv(PREPARED_DATA_FILE, delimiter=',', decimal='.')
result_info = data[data[RESULT_COLUMN].isna()]
result_info = result_info.loc[:, RESULT_INFO_COLUMNS]

# Get the wanted columns and create dummy columns for the teams.
data_cal = data.loc[:, VALUE_COLUMNS + [RESULT_COLUMN]]

# Separate columns with match results from the ones that have none.
data_prediction = data_cal[data_cal[RESULT_COLUMN].isna()]
data_modeling = data_cal.dropna()

# Split the data set.,
split_index = int(len(data_modeling) * VALIDATION_SIZE)
data_modeling = data_modeling.sample(frac=1)
data_train = data_modeling[split_index:]
data_valid = data_modeling[:split_index]

par_train = data_train.loc[:, data_train.columns != RESULT_COLUMN]
res_train = data_train.loc[:, [RESULT_COLUMN]]
par_valid = data_valid.loc[:, data_train.columns != RESULT_COLUMN]
res_valid = data_valid.loc[:, [RESULT_COLUMN]]
data_prediction = data_prediction.loc[:, data_train.columns != RESULT_COLUMN]

sel_train = par_train.to_numpy()
sel_valid = par_valid.to_numpy()

In [None]:
# Define the model
dim = sel_train.shape[1]
nodes = dim*2-1
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(nodes, activation='relu', input_dim=dim),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(3, activation='softmax')
])

# Define the optimizer function.
optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)

# Compile the model.
model.compile(loss=LOSS_FUNCTION, optimizer=optimizer, metrics=[METRICS])

# Define callback function for writing data for tensorBoard
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=LOGS_PATH, histogram_freq=1)

# Run the model.
history = model.fit(
    x=sel_train,
    y=res_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(sel_valid, res_valid),
    callbacks=[tensorboard_callback],
    verbose=0,
)

In [None]:
# TODO: Check if the order of the prediction data set is not changed by tensorflow => Otherwise the code below would merge the wrong infos to the predictions.
# TODO: Check if the order of predictions results is correct (loss, draw, win) => Otherwise the table headers, need to be changed.

# Make predictions.
result_info = result_info.to_numpy()
pre_data = data_prediction.to_numpy()

predictions = model.predict(pre_data)

# Show the predictions as table.
result = []
for i in range(0, len(predictions)):
    item = list(result_info[i]) + list(predictions[i])
    result.append(item)

result = pd.DataFrame(result, columns = RESULT_INFO_COLUMNS + RESULT_HEADERS)
result

In [None]:
# Kill the existing tensorboard process and delete the tensorflow temp folder. After this start a new tensorboard process.
try:    
    # Iterating through each instance of the process.
    for line in os.popen("ps ax | grep " + TENSORBOARD_PROCESS + " | grep -v grep"):
        fields = line.split()
            
        # Extracting Process ID from the output.
        pid = fields[0]
            
        # Terminating process.
        os.kill(int(pid), signal.SIGKILL)

    # Delete tensorboard temp folder.
    tb_temp_folder = os.path.join(tempfile.gettempdir(), '.tensorboard-info')
    os.system("rm -rf "+tb_temp_folder)
    print("Process Successfully terminated") 
except Exception as e:
    print(e)

%load_ext tensorboard
%tensorboard --logdir $LOGS_PATH --host localhost --port 6008