In [218]:
library(tidyverse)
library(rpart)
library(pROC)
library(rpart.plot)
library(caret)
library(nnet)
library(randomForest)
library(Matrix)
library(xgboost)

In [219]:
df <- read.csv('epl_data_w_features.csv')

In [322]:
df <- df %>% select(-c_ability_3)

In [323]:
df_play <- df %>%
    filter(data_type == 'hist') %>%
    na.omit %>%
    select(-data_type, -team_1_score, -team_2_score, -id)

### Use the last 5% of rows for 'hold-out'

In [324]:
holdout <- df_play[round(df_play %>% nrow * 0.95):(df_play %>% nrow),]
df_model <- df_play[1:round(df_play %>% nrow * 0.95),]

In [325]:
index <- caret::createDataPartition(y = df_model$result, p = 0.8, list = F)

In [326]:
train <- df_model[index,]
test <- df_model[-index,]

# Classification Models

## Linear Models

### Multinom

In [327]:
fit.mult <- train %>%
                multinom(formula = result ~ .)

# weights:  339 (224 variable)
initial  value 3856.129133 
iter  10 value 3444.284188
iter  20 value 3385.442892
iter  30 value 3341.695076
iter  40 value 3294.117657
iter  50 value 3280.204725
iter  60 value 3273.122916
iter  70 value 3269.953816
iter  80 value 3268.858662
iter  90 value 3267.643583
iter 100 value 3266.948937
final  value 3266.948937 
stopped after 100 iterations


In [328]:
fit.mult %>% summary

"NaNs produced"

Call:
multinom(formula = result ~ ., data = .)

Coefficients:
           (Intercept) is_february is_november d_ability_1 d_ability_3
team_2_win    4.033818 -0.09617404 -0.04268949   -8.093651    7.687734
tie           1.857866 -0.01106152  0.03737004   -3.163904    8.462804
           d_ability_4    d_form_4    d_h2h_2 team_1_nameAston Villa
team_2_win  -0.2706663  0.01294139 -0.5023426              2.3453274
tie         -5.6950433 -0.08489720 -0.0890849              0.7844435
           team_1_nameBirmingham team_1_nameBlackburn team_1_nameBlackpool
team_2_win             1.3158453            1.5676319             3.501682
tie                    0.2237564            0.2079519             1.570668
           team_1_nameBolton team_1_nameBournemouth team_1_nameBrighton
team_2_win         1.4147930              2.2734065                   0
tie                0.1265607              0.8492698                   0
           team_1_nameBurnley team_1_nameCardiff team_1_nameCharlton
team_2_w

In [329]:
preds <- predict(fit.mult, newdata = test, type="probs")

In [330]:
test$team_tie <- as.numeric(test$result == 'tie')
test$team_1_win <- as.numeric(test$result == 'team_1_win')
test$team_2_win <- as.numeric(test$result == 'team_2_win')

test$team_tie_prob_mult_nom <- (preds %>% as.data.frame)$tie
test$team_1_prob_mult_nom <- (preds %>% as.data.frame)$team_1_win
test$team_2_prob_mult_nom <- (preds %>% as.data.frame)$team_2_win

In [331]:
pROC::roc(response = test$team_tie, predictor = test$team_tie_prob_mult_nom)


Call:
roc.default(response = test$team_tie, predictor = test$team_tie_prob_mult_nom)

Data: test$team_tie_prob_mult_nom in 654 controls (test$team_tie 0) < 223 cases (test$team_tie 1).
Area under the curve: 0.5895

In [332]:
pROC::roc(response = test$team_1_win, predictor = test$team_1_prob_mult_nom)


Call:
roc.default(response = test$team_1_win, predictor = test$team_1_prob_mult_nom)

Data: test$team_1_prob_mult_nom in 468 controls (test$team_1_win 0) < 409 cases (test$team_1_win 1).
Area under the curve: 0.7023

In [333]:
pROC::roc(response = test$team_2_win, predictor = test$team_2_prob_mult_nom)


Call:
roc.default(response = test$team_2_win, predictor = test$team_2_prob_mult_nom)

Data: test$team_2_prob_mult_nom in 632 controls (test$team_2_win 0) < 245 cases (test$team_2_win 1).
Area under the curve: 0.7159

### pairwise logreg

In [334]:
train$team_tie <- as.numeric(train$result == 'tie')
train$team_1_win <- as.numeric(train$result == 'team_1_win')
train$team_2_win <- as.numeric(train$result == 'team_2_win')


fit.log_tie <- train %>%
                select(-team_1_win,-team_2_win, -result) %>%
                glm(formula = team_tie ~ ., family="binomial")

fit.log_team_1 <- train %>%
                select(-team_tie,-team_2_win, -result) %>%
                glm(formula = team_1_win ~ ., family="binomial")

fit.log_team_2 <- train %>%
                select(-team_1_win,-team_tie, -result) %>%
                glm(formula = team_2_win ~ ., family="binomial")

In [335]:
fit.log_tie %>% summary


Call:
glm(formula = team_tie ~ ., family = "binomial", data = .)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.4356  -0.8018  -0.6726   1.1940   2.2434  

Coefficients:
                               Estimate Std. Error z value Pr(>|z|)    
(Intercept)                   6.195e-01  2.391e+00   0.259 0.795540    
is_february                   2.617e-02  4.216e-02   0.621 0.534795    
is_november                   5.178e-02  4.326e-02   1.197 0.231361    
d_ability_1                   6.660e-04  1.336e+00   0.000 0.999602    
d_ability_3                   7.093e+00  4.053e+00   1.750 0.080133 .  
d_ability_4                  -7.233e+00  3.735e+00  -1.937 0.052771 .  
d_form_4                     -8.911e-02  1.478e-01  -0.603 0.546655    
d_h2h_2                       1.084e-01  6.581e-02   1.648 0.099424 .  
team_1_nameAston Villa        7.613e-02  3.871e-01   0.197 0.844087    
team_1_nameBirmingham         4.331e-02  4.695e-01   0.092 0.926507    
team_1_nameBl

In [336]:
pred_tie_log <- predict(fit.log_tie, newdata = test, type="response")
pred_team_1_log <- predict(fit.log_team_1, newdata = test, type="response")
pred_team_2_log <- predict(fit.log_team_2, newdata = test, type="response")

In [337]:
test$pred_tie_log <- pred_tie_log
test$pred_team_1_log <- pred_team_1_log
test$pred_team_2_log <- pred_team_2_log

In [338]:
pROC::roc(response = test$team_tie, predictor = test$pred_tie_log)


Call:
roc.default(response = test$team_tie, predictor = test$pred_tie_log)

Data: test$pred_tie_log in 654 controls (test$team_tie 0) < 223 cases (test$team_tie 1).
Area under the curve: 0.5682

In [339]:
pROC::roc(response = test$team_1_win, predictor = test$pred_team_1_log)


Call:
roc.default(response = test$team_1_win, predictor = test$pred_team_1_log)

Data: test$pred_team_1_log in 468 controls (test$team_1_win 0) < 409 cases (test$team_1_win 1).
Area under the curve: 0.7056

In [340]:
pROC::roc(response = test$team_2_win, predictor = test$pred_team_2_log)


Call:
roc.default(response = test$team_2_win, predictor = test$pred_team_2_log)

Data: test$pred_team_2_log in 632 controls (test$team_2_win 0) < 245 cases (test$team_2_win 1).
Area under the curve: 0.718

## Non Linear Models

### D Tree (excl. team name factors)

In [341]:
train <- df_model[index,]
test <- df_model[-index,]

fit.tree <- train %>%
                select(-team_1_name, -team_2_name) %>% # rpart isn't very efficient with 'large' factors
                rpart(formula = result ~ .
                      , parms =  list(split = "information")
                      , control = rpart.control(
                                minsplit = 20
                              , cp = 0.001
                              #, maxcompete = 4
                              #, maxsurrogate = 5
                              #, usesurrogate = 2
                              #, xval = 10
                              #, surrogatestyle = 0
                              #, maxdepth = 30
                      )
                     )

In [342]:
#fit.tree %>% summary

In [343]:
#rpart.plot(fit.tree)

In [344]:
preds <- predict(fit.tree, newdata = test, type="prob") %>% as.data.frame

In [345]:
test$team_tie <- as.numeric(test$result == 'tie')
test$team_1_win <- as.numeric(test$result == 'team_1_win')
test$team_2_win <- as.numeric(test$result == 'team_2_win')

test$team_tie_prob_dtree <- (preds %>% as.data.frame)$tie
test$team_1_prob_dtree <- (preds %>% as.data.frame)$team_1_win
test$team_2_prob_dtree <- (preds %>% as.data.frame)$team_2_win

In [346]:
pROC::roc(response = test$team_tie, predictor = test$team_tie_prob_dtree)


Call:
roc.default(response = test$team_tie, predictor = test$team_tie_prob_dtree)

Data: test$team_tie_prob_dtree in 654 controls (test$team_tie 0) < 223 cases (test$team_tie 1).
Area under the curve: 0.5607

In [347]:
pROC::roc(response = test$team_1_win, predictor = test$team_1_prob_dtree)


Call:
roc.default(response = test$team_1_win, predictor = test$team_1_prob_dtree)

Data: test$team_1_prob_dtree in 468 controls (test$team_1_win 0) < 409 cases (test$team_1_win 1).
Area under the curve: 0.6434

In [348]:
pROC::roc(response = test$team_2_win, predictor = test$team_2_prob_dtree)


Call:
roc.default(response = test$team_2_win, predictor = test$team_2_prob_dtree)

Data: test$team_2_prob_dtree in 632 controls (test$team_2_win 0) < 245 cases (test$team_2_win 1).
Area under the curve: 0.6061

### Random Forest

In [349]:
# train <- df_model[index,]
# test <- df_model[-index,]

# fit.forest <- train %>%
#                 select(-team_1_name, -team_2_name) %>% # rpart isn't very efficient with 'large' factors
#                 randomForest(formula = result ~ .
#                      )

In [350]:
# fit.forest %>% summary

### Xgboost

In [351]:
sparse_matrix <- sparse.model.matrix(result ~ .-1, data = df_model)

sparse_matrix_train <- sparse_matrix[as.vector(index), ,]
sparse_matrix_test <- sparse_matrix[-as.vector(index), ,]

In [352]:
label <- as.numeric(df_model$result)-1

In [353]:
df_model$result[1:15]

In [354]:
label[1:15]

In [355]:
train_label <- label[index]
test_label <- label[-index]

In [356]:
xgb <- xgboost(data = data.matrix(sparse_matrix_train), label = train_label,
            booster = 'gbtree',
             eta = 0.1,
             gamma = 0.01,
             max_depth = 13, 
             subsample = 0.5,
             colsample_bytree = 0.5,
             seed = 1,
             eval_metric = "mlogloss",
             #num_parallel_tree,
             num_class = 3,
             nthread = 8,
             nround=100,
             #feval
              objective = "multi:softprob")

[1]	train-mlogloss:1.054484 
[2]	train-mlogloss:1.014985 
[3]	train-mlogloss:0.974090 
[4]	train-mlogloss:0.936342 
[5]	train-mlogloss:0.905022 
[6]	train-mlogloss:0.874824 
[7]	train-mlogloss:0.846290 
[8]	train-mlogloss:0.816756 
[9]	train-mlogloss:0.788367 
[10]	train-mlogloss:0.762583 
[11]	train-mlogloss:0.739644 
[12]	train-mlogloss:0.715316 
[13]	train-mlogloss:0.694908 
[14]	train-mlogloss:0.673172 
[15]	train-mlogloss:0.651578 
[16]	train-mlogloss:0.631341 
[17]	train-mlogloss:0.613895 
[18]	train-mlogloss:0.595569 
[19]	train-mlogloss:0.576375 
[20]	train-mlogloss:0.558628 
[21]	train-mlogloss:0.543467 
[22]	train-mlogloss:0.530412 
[23]	train-mlogloss:0.514335 
[24]	train-mlogloss:0.500144 
[25]	train-mlogloss:0.487722 
[26]	train-mlogloss:0.478307 
[27]	train-mlogloss:0.465854 
[28]	train-mlogloss:0.455731 
[29]	train-mlogloss:0.443521 
[30]	train-mlogloss:0.434945 
[31]	train-mlogloss:0.428245 
[32]	train-mlogloss:0.417627 
[33]	train-mlogloss:0.409425 
[34]	train-mlogloss

In [357]:
pred <- predict(xgb, data.matrix(sparse_matrix_test), type="probs")

In [358]:
pred_team_tie <- pred[seq(3,length(pred)+2,3)]
pred_team_1 <- pred[seq(1,length(pred),3)]
pred_team_2 <- pred[seq(2,length(pred)+1,3)]

In [359]:
test$team_tie <- as.numeric(test$result == 'tie')
test$team_1_win <- as.numeric(test$result == 'team_1_win')
test$team_2_win <- as.numeric(test$result == 'team_2_win')

test$team_tie_prob_xgb_tree <- pred_team_tie
test$team_1_prob_xgb_tree <- pred_team_1
test$team_2_prob_xgb_tree <- pred_team_2

In [360]:
pROC::roc(response = test$team_tie, predictor = test$team_tie_prob_xgb_tree)


Call:
roc.default(response = test$team_tie, predictor = test$team_tie_prob_xgb_tree)

Data: test$team_tie_prob_xgb_tree in 654 controls (test$team_tie 0) < 223 cases (test$team_tie 1).
Area under the curve: 0.5803

In [361]:
pROC::roc(response = test$team_1_win, predictor = test$team_1_prob_xgb_tree)


Call:
roc.default(response = test$team_1_win, predictor = test$team_1_prob_xgb_tree)

Data: test$team_1_prob_xgb_tree in 468 controls (test$team_1_win 0) < 409 cases (test$team_1_win 1).
Area under the curve: 0.677

In [362]:
pROC::roc(response = test$team_2_win, predictor = test$team_2_prob_xgb_tree)


Call:
roc.default(response = test$team_2_win, predictor = test$team_2_prob_xgb_tree)

Data: test$team_2_prob_xgb_tree in 632 controls (test$team_2_win 0) < 245 cases (test$team_2_win 1).
Area under the curve: 0.706

### Results are within the norm. Try Linear booster

In [363]:
xgb_lin <- xgboost(data = data.matrix(sparse_matrix_train), label = train_label,
            booster = 'gblinear',
            lambda = 0.1,
            lambda_bias = 0.1,
            alpha = 0.1,
             seed = 1,
             eval_metric = "mlogloss",
             #num_parallel_tree,
             num_class = 3,
             nthread = 8,
             nround=50,
             #feval
              objective = "multi:softprob")

[1]	train-mlogloss:0.962955 
[2]	train-mlogloss:0.950577 
[3]	train-mlogloss:0.947382 
[4]	train-mlogloss:0.946421 
[5]	train-mlogloss:0.945864 
[6]	train-mlogloss:0.945456 
[7]	train-mlogloss:0.945121 
[8]	train-mlogloss:0.944846 
[9]	train-mlogloss:0.944583 
[10]	train-mlogloss:0.944343 
[11]	train-mlogloss:0.944100 
[12]	train-mlogloss:0.943889 
[13]	train-mlogloss:0.943709 
[14]	train-mlogloss:0.943530 
[15]	train-mlogloss:0.943361 
[16]	train-mlogloss:0.943201 
[17]	train-mlogloss:0.943059 
[18]	train-mlogloss:0.942911 
[19]	train-mlogloss:0.942765 
[20]	train-mlogloss:0.942621 
[21]	train-mlogloss:0.942491 
[22]	train-mlogloss:0.942367 
[23]	train-mlogloss:0.942230 
[24]	train-mlogloss:0.942098 
[25]	train-mlogloss:0.941997 
[26]	train-mlogloss:0.941871 
[27]	train-mlogloss:0.941754 
[28]	train-mlogloss:0.941631 
[29]	train-mlogloss:0.941514 
[30]	train-mlogloss:0.941403 
[31]	train-mlogloss:0.941294 
[32]	train-mlogloss:0.941195 
[33]	train-mlogloss:0.941092 
[34]	train-mlogloss

In [364]:
pred_lin <- predict(xgb_lin, data.matrix(sparse_matrix_test), type="probs")

In [365]:
pred_lin_team_tie <- pred_lin[seq(3,length(pred_lin)+2,3)]
pred_lin_team_1 <- pred_lin[seq(1,length(pred_lin),3)]
pred_lin_team_2 <- pred_lin[seq(2,length(pred_lin)+1,3)]

In [366]:
test$team_tie <- as.numeric(test$result == 'tie')
test$team_1_win <- as.numeric(test$result == 'team_1_win')
test$team_2_win <- as.numeric(test$result == 'team_2_win')

test$team_tie_prob_xgb_lin <- pred_lin_team_tie
test$team_1_prob_xgb_lin <- pred_lin_team_1
test$team_2_prob_xgb_lin <- pred_lin_team_2

In [367]:
pROC::roc(response = test$team_tie, predictor = test$team_tie_prob_xgb_lin)


Call:
roc.default(response = test$team_tie, predictor = test$team_tie_prob_xgb_lin)

Data: test$team_tie_prob_xgb_lin in 654 controls (test$team_tie 0) < 223 cases (test$team_tie 1).
Area under the curve: 0.5802

In [368]:
pROC::roc(response = test$team_1_win, predictor = test$team_1_prob_xgb_lin)


Call:
roc.default(response = test$team_1_win, predictor = test$team_1_prob_xgb_lin)

Data: test$team_1_prob_xgb_lin in 468 controls (test$team_1_win 0) < 409 cases (test$team_1_win 1).
Area under the curve: 0.701

In [369]:
pROC::roc(response = test$team_2_win, predictor = test$team_2_prob_xgb_lin)


Call:
roc.default(response = test$team_2_win, predictor = test$team_2_prob_xgb_lin)

Data: test$team_2_prob_xgb_lin in 632 controls (test$team_2_win 0) < 245 cases (test$team_2_win 1).
Area under the curve: 0.7153

# Test on holdout

In [370]:
sparse_matrix_holdout <- sparse.model.matrix(result ~ .-1, data = holdout)

In [371]:
label <- as.numeric(holdout$result)-1

In [372]:
holdout$result[1:15]

In [373]:
label[1:15]

In [374]:
pred_holdout <- predict(xgb, data.matrix(sparse_matrix_holdout), type="probs")

In [375]:
holdout_pred_team_tie <- pred_holdout[seq(3,length(pred_holdout)+2,3)]
holdout_pred_team_1 <- pred_holdout[seq(1,length(pred_holdout),3)]
holdout_pred_team_2 <- pred_holdout[seq(2,length(pred_holdout)+1,3)]

In [376]:
holdout$team_tie <- as.numeric(holdout$result == 'tie')
holdout$team_1_win <- as.numeric(holdout$result == 'team_1_win')
holdout$team_2_win <- as.numeric(holdout$result == 'team_2_win')

holdout$team_tie_prob_xgb_tree <- holdout_pred_team_tie
holdout$team_1_prob_xgb_tree <- holdout_pred_team_1
holdout$team_2_prob_xgb_tree <- holdout_pred_team_2

In [377]:
pROC::roc(response = holdout$team_tie, predictor = holdout$team_tie_prob_xgb_tree)


Call:
roc.default(response = holdout$team_tie, predictor = holdout$team_tie_prob_xgb_tree)

Data: holdout$team_tie_prob_xgb_tree in 180 controls (holdout$team_tie 0) < 52 cases (holdout$team_tie 1).
Area under the curve: 0.6189

In [378]:
pROC::roc(response = holdout$team_1_win, predictor = holdout$team_1_prob_xgb_tree)


Call:
roc.default(response = holdout$team_1_win, predictor = holdout$team_1_prob_xgb_tree)

Data: holdout$team_1_prob_xgb_tree in 126 controls (holdout$team_1_win 0) < 106 cases (holdout$team_1_win 1).
Area under the curve: 0.7537

In [379]:
pROC::roc(response = holdout$team_2_win, predictor = holdout$team_2_prob_xgb_tree)


Call:
roc.default(response = holdout$team_2_win, predictor = holdout$team_2_prob_xgb_tree)

Data: holdout$team_2_prob_xgb_tree in 158 controls (holdout$team_2_win 0) < 74 cases (holdout$team_2_win 1).
Area under the curve: 0.7873

In [384]:
df_comp <- df %>%
    filter(data_type == 'comp') %>%
    #na.omit %>%
    select(-data_type, -team_1_score, -team_2_score, -id)

In [397]:
df_comp

result,is_february,is_november,d_ability_1,d_ability_3,d_ability_4,d_form_4,d_h2h_2,team_1_name,team_2_name,...,team_2_team_score_ma_10,team_2_team_win_index,team_2_team_loss_index,team_2_tie_index,x_year,y_year,x_week,y_week,x_day,y_day
tie,-0.3178843,-0.3385736,-2.20500635,-2.2454484,-2.2445428,-0.94887132,-1.525527,West Ham,Chelsea,...,1.7,0.8953355,1.459592,0.9748441,0.9291414,-0.3697245,0.2225209,-0.9749279,-1.0,1.224647e-16
tie,-0.3178843,-0.3385736,-0.88925776,-0.8380912,-0.8429574,0.03982277,-1.525527,Huddersfield,Brighton,...,1.0,1.0714286,1.428571,0.4285714,0.9291414,-0.3697245,0.3302791,-0.9438833,1.0,0.0
tie,-0.3178843,-0.3385736,-0.02423344,-0.1066833,-0.1148918,0.7435476,-1.525527,Swansea,West Brom,...,0.8,0.6683673,1.121498,1.1080827,0.9291414,-0.3697245,0.3302791,-0.9438833,1.0,0.0
tie,-0.3178843,-0.3385736,0.3648399,0.3435022,0.3538497,0.71709936,-1.525527,Burnley,Watford,...,1.8,0.8892463,1.235383,0.671875,0.9291414,-0.3697245,0.3302791,-0.9438833,1.0,0.0
tie,-0.3178843,-0.3385736,-0.33941503,-0.427606,-0.4371238,0.28919047,-1.525527,Crystal Palace,Bournemouth,...,1.0,0.8238683,1.107407,1.0111111,0.9291414,-0.3697245,0.3302791,-0.9438833,1.0,0.0
tie,-0.3178843,-0.3385736,2.02210898,2.0842651,2.0996903,0.79409032,-1.525527,Tottenham,Stoke City,...,1.2,0.6071429,1.314685,0.990099,0.9291414,-0.3697245,0.3302791,-0.9438833,1.0,0.0
tie,-0.3178843,-0.3385736,-0.55800277,-0.4606444,-0.441627,0.21156075,-1.525527,Newcastle Utd,Leicester,...,1.3,0.7169811,1.32,0.974359,0.9291414,-0.3697245,0.399892,-0.9165623,-1.0,1.224647e-16
tie,-0.3178843,-0.3385736,-1.16595526,-1.1310483,-1.1045425,-0.73148374,-1.525527,Southampton,Arsenal,...,2.2,0.8211312,1.45675,1.0573135,0.9353679,-0.3536761,0.9009689,-0.4338837,1.0,0.0
tie,-0.3178843,-0.3385736,1.17455248,1.2793569,1.2666328,-0.29539431,-1.525527,Liverpool,Everton,...,1.7,0.6824645,1.235669,1.1948052,0.9353679,-0.3536761,0.9308737,-0.365341,6.123234000000001e-17,1.0
tie,-0.3178843,-0.3385736,-0.56000922,-0.3529263,-0.3716552,-1.48296742,-1.525527,Manchester United,Manchester City,...,3.0,0.7537313,1.323944,1.1785714,0.9353679,-0.3536761,0.9555728,-0.2947552,-1.0,1.224647e-16


# submit

In [399]:
sparse_matrix_comp <- sparse.model.matrix(result ~ .-1, data = df_comp)

In [401]:
pred_comp <- predict(xgb, data.matrix(sparse_matrix_comp), type="probs")

In [405]:
comp_pred_team_tie <- pred_comp[seq(3,length(pred_comp)+2,3)]
comp_pred_team_1 <- pred_comp[seq(1,length(pred_comp),3)]
comp_pred_team_2 <- pred_comp[seq(2,length(pred_comp)+1,3)]

In [406]:
df_comp$team_tie_prob <- comp_pred_team_tie
df_comp$team_1_prob <- comp_pred_team_1
df_comp$team_2_prob <- comp_pred_team_2

In [413]:
names(df)

In [415]:
cbind(df %>%
    filter(data_type == 'comp') %>% select(id, team_1_name, team_2_name), df_comp %>% select(team_1_prob, team_2_prob, team_tie_prob) )

id,team_1_name,team_2_name,team_1_prob,team_2_prob,team_tie_prob
5a19f6719772221c48d2f9ec,West Ham,Chelsea,0.1262285,0.69770116,0.17607033
5a19f6719772221c48d2f9ef,Huddersfield,Brighton,0.2380701,0.32086292,0.44106704
5a19f6719772221c48d2f9f0,Swansea,West Brom,0.3491643,0.38985953,0.2609762
5a19f6719772221c48d2f9ed,Burnley,Watford,0.4990587,0.24181381,0.25912747
5a19f6719772221c48d2f9ee,Crystal Palace,Bournemouth,0.2830107,0.53522688,0.18176244
5a19f6719772221c48d2f9f1,Tottenham,Stoke City,0.9089789,0.03562783,0.05539323
5a19f6719772221c48d2f9f2,Newcastle Utd,Leicester,0.2727764,0.63662666,0.09059697
5a1ac45d4825080ad7e3b9c2,Southampton,Arsenal,0.3058268,0.49828175,0.19589151
5a1ad9014825081aeace6799,Liverpool,Everton,0.7974147,0.02765434,0.17493099
5a1b13b99772220c94eb5151,Manchester United,Manchester City,0.5753282,0.24864997,0.1760219
