# Set up

In [1]:
from footbot.data import utils
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import Ridge

  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_colwidth', 1000)

# Getting data

In [3]:
client = utils.set_up_bigquery(secrets_path='../secrets/service_account.json')

In [6]:
sql = \
'''
SELECT
  total_points,
  element_type,
  team,
  value,
  total_points_per_minute_previous_season,
  goals_scored_per_minute_previous_season,
  assists_per_minute_previous_season,
  clean_sheets_per_minute_previous_season,
  goals_conceded_per_minute_previous_season,
  own_goals_per_minute_previous_season,
  penalties_saved_per_minute_previous_season,
  penalties_missed_per_minute_previous_season,
  yellow_cards_per_minute_previous_season,
  red_cards_per_minute_previous_season,
  saves_per_minute_previous_season,
  bonus_per_minute_previous_season,
  bps_per_minute_previous_season,
  opponent_team,
  was_home,
  was_sunday,
  was_weekday,
  was_late,
  was_early,
  rolling_avg_total_points_element,
  rolling_avg_minutes_element,
  rolling_avg_goals_scored_element,
  rolling_avg_assists_element,
  rolling_avg_clean_sheets_element,
  rolling_avg_goals_conceded_element,
  rolling_avg_own_goals_element,
  rolling_avg_penalties_saved_element,
  rolling_avg_penalties_missed_element,
  rolling_avg_yellow_cards_element,
  rolling_avg_red_cards_element,
  rolling_avg_saves_element,
  rolling_avg_bonus_element,
  rolling_avg_bps_element,
  rolling_avg_total_points_element_p1,
  rolling_avg_total_points_element_p2,
  rolling_avg_total_points_element_p3,
  rolling_avg_total_points_element_p4,
  rolling_avg_total_points_element_p5,
  rolling_avg_total_points_element_p10,
  rolling_avg_goals_scored_element_p1,
  rolling_avg_goals_scored_element_p2,
  rolling_avg_goals_scored_element_p3,
  rolling_avg_goals_scored_element_p4,
  rolling_avg_goals_scored_element_p5,
  rolling_avg_goals_scored_element_p10,
  rolling_avg_assists_element_p1,
  rolling_avg_assists_element_p2,
  rolling_avg_assists_element_p3,
  rolling_avg_assists_element_p4,
  rolling_avg_assists_element_p5,
  rolling_avg_assists_element_p10,
  rolling_avg_clean_sheets_element_p1,
  rolling_avg_clean_sheets_element_p2,
  rolling_avg_clean_sheets_element_p3,
  rolling_avg_clean_sheets_element_p4,
  rolling_avg_clean_sheets_element_p5,
  rolling_avg_clean_sheets_element_p10,
  rolling_avg_goals_conceded_element_p1,
  rolling_avg_goals_conceded_element_p2,
  rolling_avg_goals_conceded_element_p3,
  rolling_avg_goals_conceded_element_p4,
  rolling_avg_goals_conceded_element_p5,
  rolling_avg_goals_conceded_element_p10,
  rolling_avg_saves_element_p1,
  rolling_avg_saves_element_p2,
  rolling_avg_saves_element_p3,
  rolling_avg_saves_element_p4,
  rolling_avg_saves_element_p5,
  rolling_avg_saves_element_p10,
  rolling_avg_minutes_element_p1,
  rolling_avg_minutes_element_p2,
  rolling_avg_minutes_element_p3,
  rolling_avg_minutes_element_p4,
  rolling_avg_minutes_element_p5,
  rolling_avg_minutes_element_p10,
  rolling_avg_total_points_against_opponent_team_element_type,
  rolling_avg_minutes_against_opponent_team_element_type,
  rolling_avg_goals_scored_against_opponent_team_element_type,
  rolling_avg_assists_against_opponent_team_element_type,
  rolling_avg_clean_sheets_against_opponent_team_element_type,
  rolling_avg_goals_conceded_against_opponent_team_element_type,
  rolling_avg_own_goals_against_opponent_team_element_type,
  rolling_avg_penalties_saved_against_opponent_team_element_type,
  rolling_avg_penalties_missed_against_opponent_team_element_type,
  rolling_avg_yellow_cards_against_opponent_team_element_type,
  rolling_avg_red_cards_against_opponent_team_element_type,
  rolling_avg_saves_against_opponent_team_element_type,
  rolling_avg_bonus_against_opponent_team_element_type,
  rolling_avg_bps_against_opponent_team_element_type,
  rolling_avg_total_points_element_type,
  rolling_avg_minutes_element_type,
  rolling_avg_goals_scored_element_type,
  rolling_avg_assists_element_type,
  rolling_avg_clean_sheets_element_type,
  rolling_avg_goals_conceded_element_type,
  rolling_avg_own_goals_element_type,
  rolling_avg_penalties_saved_element_type,
  rolling_avg_penalties_missed_element_type,
  rolling_avg_yellow_cards_element_type,
  rolling_avg_red_cards_element_type,
  rolling_avg_saves_element_type,
  rolling_avg_bonus_element_type,
  rolling_avg_bps_element_type,
  expected_total_points_against_opponent_team_element_type,
  expected_minutes_against_opponent_team_element_type,
  expected_goals_scored_against_opponent_team_element_type,
  expected_assists_against_opponent_team_element_type,
  expected_clean_sheets_against_opponent_team_element_type,
  expected_goals_conceded_against_opponent_team_element_type,
  expected_own_goals_against_opponent_team_element_type,
  expected_penalties_saved_against_opponent_team_element_type,
  expected_penalties_missed_against_opponent_team_element_type,
  expected_yellow_cards_against_opponent_team_element_type,
  expected_red_cards_against_opponent_team_element_type,
  expected_saves_against_opponent_team_element_type,
  expected_bonus_against_opponent_team_element_type,
  expected_bps_against_opponent_team_element_type,
  rolling_avg_squad,
  rolling_avg_first_team,
  rolling_avg_vice_or_captain,
  rolling_avg_squad_p1,
  rolling_avg_first_team_p1,
  rolling_avg_vice_or_captain_p1
FROM
  `footbot-001.fpl.element_gameweeks_features_1920_v01`
WHERE
  rolling_avg_total_points_element >= 2
'''

In [7]:
# get dataframe
df_all = client.query(sql).to_dataframe()
df = df_all.copy()

# Pre-processing data

In [18]:
df = pd.concat([
    df[df['element_type'] == i].fillna(
        df[
            df['element_type'] == i
        ].mean()).fillna(0)
    for i in range(1, 5)
])

In [12]:
categorical_features = [
    'element_type',
    'team',
    'opponent_team',
    'was_home',
    'was_sunday',
    'was_weekday',
    'was_late',
    'was_early'
]

numeric_features = [
    i for i in df.columns if i not in ['total_points'] + categorical_features
]

In [19]:
ct = ColumnTransformer(
    [("scale", StandardScaler(), numeric_features),
     ("onehot", OneHotEncoder(), categorical_features)])

In [20]:
X = ct.fit_transform(df)

In [25]:
y = df['total_points'].values

In [26]:
model = Ridge(alpha=300)

In [27]:
model.fit(X, y)

Ridge(alpha=300, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)