In [4]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Disable warnings
import warnings
warnings.filterwarnings('ignore')

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import copy
import pandas as pd
import numpy as np
import os
PRJ_ROOT_DIR = os.path.dirname(os.path.abspath(''))

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
import seaborn as sns

In [5]:
# Where to save the figures
NOTE_ROOT_DIR = os.path.abspath('')
CHAPTER_ID = "04_modelling"
IMAGES_PATH = os.path.join(NOTE_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

## 1 - Load the Data

In [8]:
# Load the integrated data
df_match = pd.read_csv(os.path.join(PRJ_ROOT_DIR, "data", "tabular", "integrate", "matches.csv"))
df_match.head()

Unnamed: 0,home_result,season,defence/clean_sheets_0,defence/goals_conceded_0,defence/tackles_0,defence/tackle_success_%_0,defence/last_man_tackles_0,defence/blocked_shots_0,defence/interceptions_0,defence/clearances_0,...,defence/duels_lost,teamplay/passes,teamplay/crosses,teamplay/accurate_long_balls,defence,attack,defence/clean_sheets_26,defence/goals_conceded_26,defence/tackles_26,defence/tackle_success_%_26
0,lose,2019/20,5.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1097.0,88.0,0.0,4.277778,15.0275,138.36,0.0,6.5,103025
1,win,2019/20,6.0,21.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1
2,win,2019/20,13.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1
3,win,2019/20,11.0,37.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,248.0,13.0,0.0,1.888889,7.594167,30.696667,0.0,6.75,200439
4,win,2019/20,13.0,36.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1
