# Set up

In [1]:
# update path with data dir
import sys
sys.path.append('../data/')

In [2]:
import player_data as player
import pandas as pd
from datetime import datetime
import statsmodels.api as sm

  from pandas.core import datetools


# Get data

In [3]:
file_name = 'csvs/element_gameweek_' + datetime.today().strftime('%Y-%m-%d') + '.csv'

In [4]:
try:
    element_gameweek_df = pd.read_csv(file_name)
except:
    element_gameweek_df = player.get_element_gameweek_df()
    element_gameweek_df.to_csv(file_name)

In [5]:
current_event = 26
element_types = [2, 3, 4]
threshold_minutes = 85

In [6]:
element_gameweek_df =\
element_gameweek_df[
    (element_gameweek_df['event'] <= current_event)
    & (element_gameweek_df['element_type'].isin(element_types))
    & (element_gameweek_df['minutes'] >= threshold_minutes)
]

In [7]:
player.add_home_categorical_variable(element_gameweek_df)

# Models

## Simple Poisson teams model

### Preparing data

In [31]:
spm_df = element_gameweek_df.copy()

In [32]:
spm_redundant_columns = [
    'row_id', 'assists', 'attempted_passes', 'big_chances_created',
    'big_chances_missed', 'bonus', 'bps', 'clean_sheets',
    'clearances_blocks_interceptions', 'completed_passes', 'creativity',
    'dribbles', 'ea_index', 'element', 'errors_leading_to_goal',
    'errors_leading_to_goal_attempt', 'fixture', 'fouls', 'goals_conceded',
    'ict_index', 'id', 'influence', 'key_passes', 'kickoff_time',
    'kickoff_time_formatted', 'loaned_in', 'loaned_out', 'minutes', 'offside',
    'open_play_crosses', 'opponent_team', 'own_goals', 'penalties_conceded',
    'penalties_missed', 'penalties_saved', 'recoveries', 'red_cards', 'round',
    'saves', 'selected', 'tackled', 'tackles', 'target_missed', 'team_a_score',
    'team_h_score', 'threat', 'total_points', 'transfers_balance',
    'transfers_in', 'transfers_out', 'value', 'winning_goals',
    'yellow_cards', 'event', 'team_a', 'team_a_difficulty', 'team_h',
    'team_h_difficulty', 'element_type', 'team', 'web_name', 'safe_web_name',
    'own_team', 'opposition_team', 'was_home'
]

In [33]:
spm_df = player.remove_redundant_columns(spm_df, spm_redundant_columns)

In [34]:
spm_response_df, spm_explanatory_df = player.get_response_explanatory_dfs(spm_df, 'goals_scored')

In [35]:
spm_explanatory_df.columns

Index(['const'], dtype='object')

In [36]:
spm_response_df.head()

row_id
70    0
74    0
75    0
76    1
78    1
Name: goals_scored, dtype: int64

### Fitting model

In [37]:
spm_results = sm.GLM(
    spm_response_df,
    spm_explanatory_df,
    family=sm.families.Poisson(),
    missing='drop'
).fit()

In [38]:
print(spm_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:           goals_scored   No. Observations:                 3952
Model:                            GLM   Df Residuals:                     3951
Model Family:                 Poisson   Df Model:                            0
Link Function:                    log   Scale:                             1.0
Method:                          IRLS   Log-Likelihood:                -1538.3
Date:                Sat, 23 Feb 2019   Deviance:                       2182.8
Time:                        13:14:30   Pearson chi2:                 4.38e+03
No. Iterations:                     6                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.0999      0.045    -46.198      0.000      -2.189      -2.011


In [39]:
pd.Series(spm_results.pvalues)[spm_results.pvalues < 0.2]

const    0.0
dtype: float64

## Simple Poisson teams model

### Preparing data

In [15]:
sptm_df = element_gameweek_df.copy()

In [16]:
sptm_df = player.add_categorical_variables(sptm_df, 'element_type')
sptm_df = player.add_categorical_variables(sptm_df, 'opposition_team')
sptm_df = player.add_categorical_variables(sptm_df, 'own_team')

In [17]:
sptm_redundant_columns = [
    'row_id', 'assists', 'attempted_passes', 'big_chances_created',
    'big_chances_missed', 'bonus', 'bps', 'clean_sheets',
    'clearances_blocks_interceptions', 'completed_passes', 'creativity',
    'dribbles', 'ea_index', 'element', 'errors_leading_to_goal',
    'errors_leading_to_goal_attempt', 'fixture', 'fouls', 'goals_conceded',
    'ict_index', 'id', 'influence', 'key_passes', 'kickoff_time',
    'kickoff_time_formatted', 'loaned_in', 'loaned_out', 'minutes', 'offside',
    'open_play_crosses', 'opponent_team', 'own_goals', 'penalties_conceded',
    'penalties_missed', 'penalties_saved', 'recoveries', 'red_cards', 'round',
    'saves', 'selected', 'tackled', 'tackles', 'target_missed', 'team_a_score',
    'team_h_score', 'threat', 'total_points', 'transfers_balance',
    'transfers_in', 'transfers_out', 'value', 'winning_goals',
    'yellow_cards', 'event', 'team_a', 'team_a_difficulty', 'team_h',
    'team_h_difficulty', 'element_type', 'team', 'web_name', 'safe_web_name',
    'own_team', 'opposition_team'
]

In [18]:
sptm_df = player.remove_redundant_columns(sptm_df, sptm_redundant_columns)

In [19]:
sptm_response_df, sptm_explanatory_df = player.get_response_explanatory_dfs(sptm_df, 'goals_scored')

In [20]:
sptm_explanatory_df.columns

Index(['const', 'was_home', 'element_type_3', 'element_type_4',
       'opposition_team_2', 'opposition_team_3', 'opposition_team_4',
       'opposition_team_5', 'opposition_team_6', 'opposition_team_7',
       'opposition_team_8', 'opposition_team_9', 'opposition_team_10',
       'opposition_team_11', 'opposition_team_12', 'opposition_team_13',
       'opposition_team_14', 'opposition_team_15', 'opposition_team_16',
       'opposition_team_17', 'opposition_team_18', 'opposition_team_19',
       'opposition_team_20', 'own_team_2', 'own_team_3', 'own_team_4',
       'own_team_5', 'own_team_6', 'own_team_7', 'own_team_8', 'own_team_9',
       'own_team_10', 'own_team_11', 'own_team_12', 'own_team_13',
       'own_team_14', 'own_team_15', 'own_team_16', 'own_team_17',
       'own_team_18', 'own_team_19', 'own_team_20'],
      dtype='object')

In [21]:
sptm_response_df.head()

row_id
70    0
74    0
75    0
76    1
78    1
Name: goals_scored, dtype: int64

### Fitting model

In [22]:
sptm_results = sm.GLM(
    sptm_response_df,
    sptm_explanatory_df,
    family=sm.families.Poisson(),
    missing='drop'
).fit()

In [23]:
print(sptm_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:           goals_scored   No. Observations:                 3952
Model:                            GLM   Df Residuals:                     3910
Model Family:                 Poisson   Df Model:                           41
Link Function:                    log   Scale:                             1.0
Method:                          IRLS   Log-Likelihood:                -1355.2
Date:                Sat, 23 Feb 2019   Deviance:                       1816.6
Time:                        13:08:53   Pearson chi2:                 4.20e+03
No. Iterations:                     6                                         
                         coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                 -3.3049      0.326    -10.140      0.000      -3.944      -2.666
was_home               0.156

In [25]:
pd.Series(sptm_results.pvalues)[sptm_results.pvalues < 0.2]

const                 3.671265e-24
was_home              8.982414e-02
element_type_3        6.882327e-23
element_type_4        9.561406e-57
opposition_team_2     8.549882e-02
opposition_team_3     6.654039e-02
opposition_team_4     3.888057e-02
opposition_team_5     3.803621e-02
opposition_team_9     3.814878e-02
opposition_team_10    1.296185e-01
opposition_team_16    5.009482e-02
own_team_4            1.631534e-01
own_team_7            1.560780e-01
own_team_9            8.073039e-02
own_team_10           5.585457e-03
own_team_11           1.785552e-01
own_team_12           1.945707e-01
own_team_13           1.168910e-01
own_team_15           4.964303e-03
own_team_16           9.939768e-02
dtype: float64

## Simple Poisson player model

In [None]:
element_appearences = element_gameweek_df.groupby('element').size()

In [None]:
element_several_appearences = list(element_appearences[element_appearences > 1].index)

In [None]:
element_gameweek_df = element_gameweek_df[element_gameweek_df['element'].isin(element_several_appearences)]

### Preparing data

In [40]:
sppm_df = element_gameweek_df.copy()

In [41]:
sppm_df = player.add_categorical_variables(sppm_df, 'element_type')
sppm_df = player.add_categorical_variables(sppm_df, 'opposition_team')
sppm_df = player.add_categorical_variables(sppm_df, 'element')

In [42]:
sppm_redundant_columns = [
    'row_id', 'assists', 'attempted_passes', 'big_chances_created',
    'big_chances_missed', 'bonus', 'bps', 'clean_sheets',
    'clearances_blocks_interceptions', 'completed_passes', 'creativity',
    'dribbles', 'ea_index', 'element', 'errors_leading_to_goal',
    'errors_leading_to_goal_attempt', 'fixture', 'fouls', 'goals_conceded',
    'ict_index', 'id', 'influence', 'key_passes', 'kickoff_time',
    'kickoff_time_formatted', 'loaned_in', 'loaned_out', 'minutes', 'offside',
    'open_play_crosses', 'opponent_team', 'own_goals', 'penalties_conceded',
    'penalties_missed', 'penalties_saved', 'recoveries', 'red_cards', 'round',
    'saves', 'selected', 'tackled', 'tackles', 'target_missed', 'team_a_score',
    'team_h_score', 'threat', 'total_points', 'transfers_balance',
    'transfers_in', 'transfers_out', 'value', 'winning_goals',
    'yellow_cards', 'event', 'team_a', 'team_a_difficulty', 'team_h',
    'team_h_difficulty', 'element_type', 'team', 'web_name', 'safe_web_name',
    'own_team', 'opposition_team'
]

In [43]:
sppm_df = player.remove_redundant_columns(sppm_df, sppm_redundant_columns)

In [44]:
sppm_response_df, sppm_explanatory_df = player.get_response_explanatory_dfs(sppm_df, 'goals_scored')

In [45]:
sppm_explanatory_df.columns

Index(['const', 'was_home', 'element_type_3', 'element_type_4',
       'opposition_team_2', 'opposition_team_3', 'opposition_team_4',
       'opposition_team_5', 'opposition_team_6', 'opposition_team_7',
       ...
       'element_535', 'element_539', 'element_549', 'element_556',
       'element_565', 'element_570', 'element_571', 'element_575',
       'element_579', 'element_591'],
      dtype='object', length=392)

In [46]:
sppm_response_df.head()

row_id
70    0
74    0
75    0
76    1
78    1
Name: goals_scored, dtype: int64

### Fitting model

In [47]:
sppm_results = sm.GLM(
    sppm_response_df,
    sppm_explanatory_df,
    family=sm.families.Poisson(),
    missing='drop'
).fit()

In [48]:
print(sppm_results.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:           goals_scored   No. Observations:                 3952
Model:                            GLM   Df Residuals:                     3562
Model Family:                 Poisson   Df Model:                          389
Link Function:                    log   Scale:                             1.0
Method:                          IRLS   Log-Likelihood:                -1110.8
Date:                Sat, 23 Feb 2019   Deviance:                       1327.9
Time:                        13:16:11   Pearson chi2:                 2.20e+03
No. Iterations:                    25                                         
                         coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                 -1.3799      0.755     -1.827      0.068      -2.860       0.101
was_home               0.123

In [49]:
pd.Series(sppm_results.pvalues)[sppm_results.pvalues < 0.2]

const                 0.067716
was_home              0.194132
opposition_team_3     0.076961
opposition_team_4     0.070622
opposition_team_5     0.125236
opposition_team_9     0.174585
opposition_team_10    0.154554
opposition_team_16    0.087555
element_9             0.179440
element_27            0.084745
element_31            0.165023
element_48            0.197462
element_115           0.096619
element_116           0.152386
element_118           0.090819
element_138           0.117012
element_142           0.084809
element_157           0.127573
element_162           0.086557
element_192           0.093207
element_215           0.192808
element_245           0.172051
element_246           0.085634
element_265           0.123467
element_268           0.144444
element_286           0.125115
element_292           0.125072
element_293           0.138536
element_310           0.137491
element_357           0.130991
element_406           0.101152
element_427           0.091189
element_

## Simple Gaussian model

### Preparing data

In [None]:
sgmt_df = element_gameweek_df.copy()

In [None]:
sgmt_redundant_columns = [
    'row_id', 'assists', 'attempted_passes', 'big_chances_created',
    'big_chances_missed', 'bonus', 'bps', 'clean_sheets',
    'clearances_blocks_interceptions', 'completed_passes', 'creativity',
    'dribbles', 'ea_index', 'element', 'errors_leading_to_goal',
    'errors_leading_to_goal_attempt', 'fixture', 'fouls', 'goals_conceded',
    'ict_index', 'id', 'influence', 'key_passes', 'kickoff_time',
    'kickoff_time_formatted', 'loaned_in', 'loaned_out', 'minutes', 'offside',
    'open_play_crosses', 'opponent_team', 'own_goals', 'penalties_conceded',
    'penalties_missed', 'penalties_saved', 'recoveries', 'red_cards', 'round',
    'saves', 'selected', 'tackled', 'tackles', 'target_missed', 'team_a_score',
    'team_h_score', 'threat', 'total_points', 'transfers_balance',
    'transfers_in', 'transfers_out', 'value', 'winning_goals',
    'yellow_cards', 'event', 'team_a', 'team_a_difficulty', 'team_h',
    'team_h_difficulty', 'element_type', 'team', 'web_name', 'safe_web_name',
    'own_team', 'opposition_team'
]

In [None]:
sgmt_df = player.remove_redundant_columns(sgmt_df, sgmt_redundant_columns)

In [None]:
sgmt_response_df, sgmt_explanatory_df = player.get_response_explanatory_dfs(sgmt_df, 'goals_scored')

### Fitting model

In [None]:
sgmt_results = sm.GLM(
    sgmt_response_df,
    sgmt_explanatory_df,
    family=sm.families.Gaussian(),
    missing='drop'
).fit()

In [None]:
print(sgmt_results.summary())

In [None]:
sgmt_results.pvalues[sgmt_results.pvalues < 0.1]