# Score predicts
### Predicting specific scores of one week of english league games using RNN model created earlier

In [1]:
# Obsługa środowisk Python 2 i Python 3
from __future__ import division, print_function, unicode_literals

# Importowanie popularnych modułów
import numpy as np
import os

# W celu zachowania powtarzalności wyników w kolejnych przebiegach
np.random.seed(42)

# Generowanie ładnych wykresów
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Lokacja, w której będą zapisywane rysunki
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "preparing_dataset"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "pictures", CHAPTER_ID)

def image_path(fig_id):
    return os.path.join(PROJECT_ROOT_DIR, "pictures", CHAPTER_ID, fig_id)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving an image", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [2]:
import os
import pandas as pd

FOOTBALL_PATH_SP = os.path.join("datasets", "spain")
football_path_sp = FOOTBALL_PATH_SP

FOOTBALL_PATH_EN = os.path.join("datasets", "england")
football_path_en = FOOTBALL_PATH_EN

FOOTBALL_PATH_FR = os.path.join("datasets", "france")
football_path_fr = FOOTBALL_PATH_FR

FOOTBALL_PATH_GE = os.path.join("datasets", "germany")
football_path_ge = FOOTBALL_PATH_GE

FOOTBALL_PATH_IT = os.path.join("datasets", "italy")
football_path_it = FOOTBALL_PATH_IT

def load_football_data(football_path, file):
    csv_path = os.path.join(football_path, file)
    return pd.read_csv(csv_path, error_bad_lines=False)

In [3]:
football_sp = load_football_data(FOOTBALL_PATH_SP, "spain.csv")
football_en = load_football_data(FOOTBALL_PATH_EN, "england.csv")
football_fr = load_football_data(FOOTBALL_PATH_FR, "france.csv")
football_ge = load_football_data(FOOTBALL_PATH_GE, "germany.csv")
football_it = load_football_data(FOOTBALL_PATH_IT, "italy.csv")

In [4]:
football = football_en.copy()

In [5]:
from sklearn.preprocessing import LabelEncoder

homeTeamList = football["HomeTeam"].tolist() 
awayTeamList = football["AwayTeam"].tolist()
fTRList = football["FTR"].tolist()
hTRList = football["HTR"].tolist()
divList = football["Div"].tolist()

labelEncoder = LabelEncoder()

labelEncoder.fit(homeTeamList)
label = labelEncoder.transform(homeTeamList)
football['homeTeam']=pd.Series(label)

labelEncoder.fit(awayTeamList)
label = labelEncoder.transform(awayTeamList)
football['awayTeam']=pd.Series(label)

labelEncoder.fit(hTRList)
label = labelEncoder.transform(hTRList)
football['hTR']=pd.Series(label)

labelEncoder.fit(fTRList)
label = labelEncoder.transform(fTRList)
football['fTR']=pd.Series(label)

labelEncoder.fit(divList)
label = labelEncoder.transform(divList)
football['div']=pd.Series(label)

In [6]:
import datetime

dates = pd.Series(football['Date'])
dates = pd.to_datetime(dates, format = '%d/%m/%y')
days = []
years = []

for i in dates:
    d = i.dayofyear
    days.append(d)
    y = i.year
    years.append(y)
    
x = pd.Series(days)
y = pd.Series(years)
football["DayOfTheYear"] = x
football["Year"] = y

In [7]:
football = football.drop(columns = ['div', 'Div','Date', 'HomeTeam', 'AwayTeam', 'HTR', 'FTR', 'HTAG', 'HTHG', 'hTR', 'fTR', 'HST', 'AST', 'HS', 'AS', 'HF', 'AF', 'HY', 'AY', 'HR', 'AR', ], axis = 1)

In [8]:
football.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5410 entries, 0 to 5409
Data columns (total 12 columns):
FTHG            5410 non-null float64
FTAG            5410 non-null float64
B365H           5410 non-null float64
B365D           5410 non-null float64
B365A           5410 non-null float64
BWH             5409 non-null float64
BWD             5409 non-null float64
BWA             5409 non-null float64
homeTeam        5410 non-null int64
awayTeam        5410 non-null int64
DayOfTheYear    5410 non-null int64
Year            5410 non-null int64
dtypes: float64(8), int64(4)
memory usage: 507.3 KB


In [9]:
y = np.array(football[['FTHG', 'FTAG']])
football = football.drop(columns = ['FTHG', 'FTAG'], axis = 1)
x = np.array(football)

x = x.astype(float)
y = y.astype(float)

x = np.reshape(x, (x.shape[0], 1, x.shape[1]))

x_train = x[:-10]
x_test = x[-10:]
y_train = y[:-10]
y_test = y[-10:]

In [None]:
import tensorflow as tf

model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(128, input_shape=(1, 10), activation='relu', return_sequences=True))
model.add(tf.keras.layers.LSTM(64, activation='relu', return_sequences=True))
model.add(tf.keras.layers.LSTM(16, activation='relu'))
model.add(tf.keras.layers.Dense(2, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=25, batch_size=20, verbose=1)

In [30]:
y_pred = model.predict(x_test, batch_size=20).round()
val_loss, val_acc = model.evaluate(x_test, y_test, verbose=0)
print(val_acc, val_loss)

0.8 0.9887989163398743


In [25]:
for actual, predicted in zip(y_test, y_pred):
    print(actual, predicted)

[1. 0.] [1. 0.]
[0. 3.] [0. 1.]
[1. 1.] [1. 0.]
[2. 0.] [1. 0.]
[1. 0.] [0. 1.]
[2. 0.] [0. 1.]
[3. 1.] [1. 0.]
[0. 1.] [0. 1.]
[1. 1.] [1. 0.]
[1. 0.] [1. 0.]
