# Libraries, Preferences and Data Input

In [47]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score, KFold, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures

In [48]:
%config Completer.use_jedi = False

In [49]:
# read in the training data
players_training_week1 = pd.read_csv("players_training_week1.csv")
players_training_week2 = pd.read_csv("players_training_week2.csv")
# merge training data into one file and set index
players_training = pd.merge(players_training_week1, players_training_week2, how='outer').set_index('player')

# read in the input data for predictions
players_pred_week1 = pd.read_csv("players_test_week1.csv", index_col='player', usecols=[0,1,2,3,4,5])
players_pred_week2 = pd.read_csv("players_test_week2.csv", index_col='player', usecols=[0,1,2,3,4,5])
players_pred_week3 = pd.read_csv("players_test_week3.csv", index_col='player', usecols=[0,1,2,3,4,5])

# read in the output data for model evaluation
players_test_week1 = pd.read_csv("players_test_week1.csv", index_col='player', usecols=[0,6])
players_test_week2 = pd.read_csv("players_test_week2.csv", index_col='player', usecols=[0,6])

# read in the historical and predicted. results for training and prediction
results_week1 = pd.read_csv("results_week1.csv", index_col='country')
results_week2 = pd.read_csv("results_week2.csv", index_col='country')
results_week3 = pd.read_csv("results_week3.csv", index_col='country')

# collate list of possible kickers for each team
kickers = [
    'O. Farrell', 'G. Ford', 'M. Malins', # england
    'M. Jalibert', 'L. Carbonel', # france
    'J. Sexton', 'B. Burns', 'R. Byrne', # ireland
    'P. Garbisi', 'T. Allan', 'C. Canna', # italy
    'F. Russell', 'J. van der Walt' # scotland
    'D. Biggar', 'L. Halfpenny', 'C. Sheedy', 'J. Evans' #wales
]

In [50]:
players_training.head()

Unnamed: 0_level_0,cost,country,position,mins,tackles,d_tackles,t_breaks,m_ball,mom,tries,conversions,penalties,d_goals,yellow,red,started,week,m_played
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
T. Curry,15.2,England,Back Row,80.0,15.0,1.0,2.0,40.0,,,,,,,,1,1,1
A. Vincent,16.3,France,Centre,80.0,15.0,,5.0,100.0,,1.0,,,,,,1,1,1
E. Daly,15.2,England,Full Back,80.0,1.0,,3.0,85.0,,,,,,,,1,1,1
O. Farrell,14.9,England,Centre,80.0,5.0,,1.0,38.0,,,,2.0,,,,1,1,1
J. Hill,12.1,England,Second Row,80.0,19.0,1.0,,5.0,,,,,,,,1,1,1


# Helper Code

In [4]:
def team_points_calculator(country, mins, week):
    """
    Calculates the number of team points assigned to each player, based on their country, the number of minutes they played, and the result.
    """
    
    if week == 1:
        results = results_week1
    elif week == 2:
        results = results_week2
    elif week == 3:
        results = results_week3
    elif week == 4:
        results = results_week4
    elif week == 5:
        results = results_week5
    
    
    # calculate the the home/away points
    outcome = 0
    if results.loc[country, 'home'] == 1:
        if results.loc[country, 'for'] > results.loc[country, 'against']:
            outcome += 12
        elif results.loc[country, 'for'] == results.loc[country, 'against']:
            outcome += 4
        else:
            outcome += 1
    else:
        if results.loc[country, 'for'] > results.loc[country, 'against']:
            outcome += 18
        elif results.loc[country, 'for'] == results.loc[country, 'against']:
            outcome += 10
        else:
            outcome += 3
    
    # calculate the score difference points
    score_diff = (results.loc[country, 'for'] - results.loc[country, 'against']) * 0.5
    
    # calculate the points assigned to each player
    team_points = round((outcome + score_diff) * (mins / 80), 2)

    return team_points

In [5]:
def ind_points_calculator(tackles, d_tackles, t_breaks, m_ball, mom, tries, conversions, penalties, d_goals, yellow, red):
    """
    Calculates the number of individual points assigned to each play, based on their fixure metrics.
    """
    
    ind_points = round(
        tackles * 1 +
        d_tackles * 2 +
        t_breaks * 2 +
        m_ball * 0.3 +
        mom * 15 +
        tries * 15 +
        conversions * 3 +
        penalties * 3 +
        d_goals * 6 +
        yellow * -5 +
        red * -10
    ,2)
    
    return ind_points

In [6]:
def v_italy_generator(week, country):
    """
    Determines whether each player is playing against Italy or not in a given week.
    """
    
    if week == 1 and country == 'France':
        return 1
    elif week == 2 and country == 'England':
        return 1
    elif week == 3 and country == 'Ireland':
        return 1
    elif week == 4 and country == 'Wales':
        return 1
    elif week == 5 and country == 'Scotland':
        return 1
    else:
        return 0

In [7]:
# create lists of columns that will not be used for modelling purposes
unused_features = ['mom', 'yellow', 'red', 'd_goals', 'week']
unused_scores = ['team_score', 'ind_score']

# create a list of all columns that we will be individually addressing as target features
outputs = ['mins', 'tackles', 'd_tackles', 't_breaks', 'm_ball', 'tries', 'conversions', 'penalties']

# create a list of all columns that we will be using as input features
features = ['cost', 'started', 'England', 'France', 'Ireland', 'Italy', 'Scotland', 'Wales', 'Back Row', 'Centre', 'Fly Half', 'Full Back', 'Hooker', 
            'Prop', 'Scrum Half', 'Second Row', 'Wing', 'v_italy', 'kicker']

# create a list of all feature interactions that we will be evaluating
feature_interactions = [
    'cost_started', 'cost_England', 'cost_France', 'cost_Ireland', 'cost_Italy', 'cost_Scotland', 'cost_Wales', 'cost_Back Row',
    'cost_Centre', 'cost_Fly Half', 'cost_Full Back', 'cost_Hooker', 'cost_Prop', 'cost_Scrum Half', 'cost_Second Row', 'cost_Wing',
    'cost_v_italy', 'cost_kicker', 'Back Row_v_italy', 'Centre_v_italy', 'Fly Half_v_italy', 'Full Back_v_italy',
    'Hooker_v_italy', 'Prop_v_italy', 'Scrum Half_v_italy', 'Second Row_v_italy', 'Wing_v_italy', 'started_England', 'started_France',
    'started_Ireland', 'started_Italy', 'started_Scotland', 'started_Wales', 'started_Back Row', 'started_Centre', 'started_Fly Half',
    'started_Full Back', 'started_Hooker', 'started_Prop', 'started_Scrum Half', 'started_Second Row', 'started_Wing', 'started_kicker',
    'started_v_italy'
]

# Training Data Prep

In [8]:
def training_data_processor(data):
    """
    Prepares the training data for modelling purposes.
    """
    
    # replace any NaN values with zero.
    data = data.fillna(0)
    
    # designate the prospective kickers
    data = data.reset_index()
    data['kicker'] = data['player'].apply(lambda x: 1 if x in kickers else 0)
    data = data.set_index('player')
    
    # designate the players playing against Italy
    data['v_italy'] = data.apply(lambda x: v_italy_generator(x['week'], x['country']), axis=1)
    
    # calculate the team scores based on mins and result
    data['team_score'] = data.apply(lambda x: team_points_calculator(x['country'], x['mins'], x['week']), axis=1)

    # calculate the individual score based on metrics
    data['ind_score'] = data.apply(lambda x: ind_points_calculator(
        x['tackles'], x['d_tackles'], x['t_breaks'], x['m_ball'], x['mom'],
        x['tries'], x['conversions'], x['penalties'], x['d_goals'], x['yellow'], x['red']
    ), axis=1)
    
    # calculate the overall score
    data['overall_score'] = data['team_score'] + data['ind_score']
    
    # convert the categorical columns into dummy variables
    data = pd.get_dummies(data, columns=['country', 'position'], prefix="", prefix_sep="")
    
    # generate feature interactions for all dummy variables with the cost variable
    data['cost_started'] = data['cost'] * data['started'] 
    data['cost_England'] = data['cost'] * data['England'] 
    data['cost_France'] = data['cost'] * data['France'] 
    data['cost_Ireland'] = data['cost'] * data['Ireland'] 
    data['cost_Italy'] = data['cost'] * data['Italy'] 
    data['cost_Scotland'] = data['cost'] * data['Scotland'] 
    data['cost_Wales'] = data['cost'] * data['Wales'] 
    data['cost_Back Row'] = data['cost'] * data['Back Row'] 
    data['cost_Centre'] = data['cost'] * data['Centre'] 
    data['cost_Fly Half'] = data['cost'] * data['Fly Half'] 
    data['cost_Full Back'] = data['cost'] * data['Full Back'] 
    data['cost_Hooker'] = data['cost'] * data['Hooker'] 
    data['cost_Prop'] = data['cost'] * data['Prop'] 
    data['cost_Scrum Half'] = data['cost'] * data['Scrum Half'] 
    data['cost_Second Row'] = data['cost'] * data['Second Row'] 
    data['cost_Wing'] = data['cost'] * data['Wing'] 
    data['cost_v_italy'] = data['cost'] * data['v_italy'] 
    data['cost_kicker'] = data['cost'] * data['kicker'] 
    
    data['Back Row_v_italy'] = data['Back Row'] * data['v_italy']
    data['Centre_v_italy'] = data['Centre'] * data['v_italy']
    data['Fly Half_v_italy'] = data['Fly Half'] * data['v_italy']
    data['Full Back_v_italy'] = data['Full Back'] * data['v_italy']
    data['Hooker_v_italy'] = data['Hooker'] * data['v_italy']
    data['Prop_v_italy'] = data['Prop'] * data['v_italy']
    data['Scrum Half_v_italy'] = data['Scrum Half'] * data['v_italy']
    data['Second Row_v_italy'] = data['Second Row'] * data['v_italy']
    data['Wing_v_italy'] = data['Wing'] * data['v_italy']
    
    data['started_England'] = data['started'] * data['England'] 
    data['started_France'] = data['started'] * data['France'] 
    data['started_Ireland'] = data['started'] * data['Ireland'] 
    data['started_Italy'] = data['started'] * data['Italy'] 
    data['started_Scotland'] = data['started'] * data['Scotland'] 
    data['started_Wales'] = data['started'] * data['Wales'] 
    data['started_Back Row'] = data['started'] * data['Back Row'] 
    data['started_Centre'] = data['started'] * data['Centre'] 
    data['started_Fly Half'] = data['started'] * data['Fly Half'] 
    data['started_Full Back'] = data['started'] * data['Full Back'] 
    data['started_Hooker'] = data['started'] * data['Hooker'] 
    data['started_Prop'] = data['started'] * data['Prop'] 
    data['started_Scrum Half'] = data['started'] * data['Scrum Half'] 
    data['started_Second Row'] = data['started'] * data['Second Row'] 
    data['started_Wing'] = data['started'] * data['Wing'] 
    data['started_v_italy'] = data['started'] * data['v_italy'] 
    data['started_kicker'] = data['started'] * data['kicker'] 
    
    
    # drop the features which won't be used for modelling
    data = data.drop(unused_features + unused_scores, axis=1)
    
    # move the target variable to the end for readibility
    data['overall_score'] = data.pop('overall_score')
    
    return data

In [9]:
# process the training data for modelling
players_training_processed = training_data_processor(players_training)

In [10]:
def feature_coefficients(model):
    """
    Returns a dataframe of all non-zero coefficients for the model under consideration.
    """
    
    features_list = players_training_processed[features + feature_interactions].columns
    coefficients = model.best_estimator_[1].coef_
    
    df = pd.DataFrame({'features': features_list, 'coefficient': coefficients})
    
    return df[df['coefficient'] != 0]

In [11]:
def test_data_predictor_multiple(players):
    """
    
    """

    players = players.reset_index()
    players['kicker'] = players['player'].apply(lambda x: 1 if x in kickers else 0)
    players = players.set_index('player')
    
    players['v_italy'] = players.apply(lambda x: v_italy_generator(x['week'], x['country']), axis=1)

    players_interim = players.drop('week', axis=1).copy()
    players_interim = pd.get_dummies(players_interim, columns=['country', 'position'], prefix="", prefix_sep="")
    players_interim['France'] = 0
    players_interim['Scotland'] = 0

    
    players_interim['cost_started'] = players_interim['cost'] * players_interim['started'] 
    players_interim['cost_England'] = players_interim['cost'] * players_interim['England'] 
    players_interim['cost_France'] = players_interim['cost'] * players_interim['France'] 
    players_interim['cost_Ireland'] = players_interim['cost'] * players_interim['Ireland'] 
    players_interim['cost_Italy'] = players_interim['cost'] * players_interim['Italy'] 
    players_interim['cost_Scotland'] = players_interim['cost'] * players_interim['Scotland'] 
    players_interim['cost_Wales'] = players_interim['cost'] * players_interim['Wales'] 
    players_interim['cost_Back Row'] = players_interim['cost'] * players_interim['Back Row'] 
    players_interim['cost_Centre'] = players_interim['cost'] * players_interim['Centre'] 
    players_interim['cost_Fly Half'] = players_interim['cost'] * players_interim['Fly Half'] 
    players_interim['cost_Full Back'] = players_interim['cost'] * players_interim['Full Back'] 
    players_interim['cost_Hooker'] = players_interim['cost'] * players_interim['Hooker'] 
    players_interim['cost_Prop'] = players_interim['cost'] * players_interim['Prop'] 
    players_interim['cost_Scrum Half'] = players_interim['cost'] * players_interim['Scrum Half'] 
    players_interim['cost_Second Row'] = players_interim['cost'] * players_interim['Second Row'] 
    players_interim['cost_Wing'] = players_interim['cost'] * players_interim['Wing'] 
    players_interim['cost_v_italy'] = players_interim['cost'] * players_interim['v_italy'] 
    players_interim['cost_kicker'] = players_interim['cost'] * players_interim['kicker']
    
    players_interim['Back Row_v_italy'] = players_interim['Back Row'] * players_interim['v_italy']
    players_interim['Centre_v_italy'] = players_interim['Centre'] * players_interim['v_italy']
    players_interim['Fly Half_v_italy'] = players_interim['Fly Half'] * players_interim['v_italy']
    players_interim['Full Back_v_italy'] = players_interim['Full Back'] * players_interim['v_italy']
    players_interim['Hooker_v_italy'] = players_interim['Hooker'] * players_interim['v_italy']
    players_interim['Prop_v_italy'] = players_interim['Prop'] * players_interim['v_italy']
    players_interim['Scrum Half_v_italy'] = players_interim['Scrum Half'] * players_interim['v_italy']
    players_interim['Second Row_v_italy'] = players_interim['Second Row'] * players_interim['v_italy']
    players_interim['Wing_v_italy'] = players_interim['Wing'] * players_interim['v_italy']
    
    players_interim['started_England'] = players_interim['started'] * players_interim['England'] 
    players_interim['started_France'] = players_interim['started'] * players_interim['France'] 
    players_interim['started_Ireland'] = players_interim['started'] * players_interim['Ireland'] 
    players_interim['started_Italy'] = players_interim['started'] * players_interim['Italy'] 
    players_interim['started_Scotland'] = players_interim['started'] * players_interim['Scotland'] 
    players_interim['started_Wales'] = players_interim['started'] * players_interim['Wales'] 
    players_interim['started_Back Row'] = players_interim['started'] * players_interim['Back Row'] 
    players_interim['started_Centre'] = players_interim['started'] * players_interim['Centre'] 
    players_interim['started_Fly Half'] = players_interim['started'] * players_interim['Fly Half'] 
    players_interim['started_Full Back'] = players_interim['started'] * players_interim['Full Back'] 
    players_interim['started_Hooker'] = players_interim['started'] * players_interim['Hooker'] 
    players_interim['started_Prop'] = players_interim['started'] * players_interim['Prop'] 
    players_interim['started_Scrum Half'] = players_interim['started'] * players_interim['Scrum Half'] 
    players_interim['started_Second Row'] = players_interim['started'] * players_interim['Second Row'] 
    players_interim['started_Wing'] = players_interim['started'] * players_interim['Wing'] 
    players_interim['started_v_italy'] = players_interim['started'] * players_interim['v_italy'] 
    players_interim['started_kicker'] = players_interim['started'] * players_interim['kicker']
    
    players['mins'] = model_lasso_interactions_mins.best_estimator_.predict(players_interim).round()
    players['tackles'] = model_lasso_interactions_tackles.best_estimator_.predict(players_interim).round(2)
    players['d_tackles'] = model_lasso_interactions_d_tackles.best_estimator_.predict(players_interim).round(2)
    players['t_breaks'] = model_lasso_interactions_t_breaks.best_estimator_.predict(players_interim).round(2)
    players['m_ball'] = model_lasso_interactions_m_ball.best_estimator_.predict(players_interim).round(2)
    players['tries'] = model_lasso_interactions_tries.best_estimator_.predict(players_interim).round(2)
    players['conversions'] = model_lasso_interactions_conversions.best_estimator_.predict(players_interim).round(2)
    players['penalties'] = model_lasso_interactions_penalties.best_estimator_.predict(players_interim).round(2)
    
    
    players['d_goals'] = 0
    players['mom'] = 0
    players['yellow'] = 0
    players['red'] = 0
    
    players['team_score'] = players.apply(lambda x: team_points_calculator(x['country'], x['mins'], x['week']), axis=1)
    players = players.drop('week', axis=1)
    
    players['ind_score'] = players.apply(lambda x: ind_points_calculator(
        x['tackles'], x['d_tackles'], x['t_breaks'], x['m_ball'], x['mom'],
        x['tries'], x['conversions'], x['penalties'], x['d_goals'], x['yellow'], x['red']
    ), axis=1)

    players['overall_score'] = (players['team_score'] + players['ind_score']).round(2)
    
    return players

# Train Models

In [12]:
def lasso_model_fitter(target):
    """
    
    """
    
    X = players_training_processed[features + feature_interactions]
    y = players_training_processed[target]
    
    model = GridSearchCV(
        estimator = Pipeline([
            ('scaler', StandardScaler()),
            ('lasso', Lasso(max_iter=1000000))
        ]),
        param_grid = {'lasso__alpha': np.linspace(start=0.01, stop=1, num=1000)},
        cv = KFold(n_splits=10, shuffle=True, random_state=101),
        scoring = 'neg_root_mean_squared_error'
    ).fit(X,y)
    
    return model

## mins

In [13]:
model_lasso_interactions_mins = lasso_model_fitter('mins')

print("RMSE: {}".format(-model_lasso_interactions_mins.best_score_.round(2)))
print("alpha: {}".format(model_lasso_interactions_mins.best_params_['lasso__alpha'].round(2)))

RMSE: 10.39
alpha: 0.53


In [14]:
feature_coefficients(model_lasso_interactions_mins)

Unnamed: 0,features,coefficient
0,cost,1.883465
1,started,18.607402
5,Italy,1.332407
19,cost_started,0.713052
25,cost_Wales,0.209713
27,cost_Centre,0.37245
46,started_England,0.16362
50,started_Scotland,0.630701
55,started_Full Back,0.873856
57,started_Prop,-0.118688


## tackles

In [15]:
model_lasso_interactions_tackles = lasso_model_fitter('tackles')

print("RMSE: {}".format(-model_lasso_interactions_tackles.best_score_.round(2)))
print("alpha: {}".format(model_lasso_interactions_tackles.best_params_['lasso__alpha'].round(2)))

RMSE: 3.31
alpha: 0.13


In [16]:
feature_coefficients(model_lasso_interactions_tackles)

Unnamed: 0,features,coefficient
1,started,0.804134
4,Ireland,-0.115902
6,Scotland,-0.300149
11,Full Back,-0.448738
12,Hooker,0.065839
13,Prop,0.418259
14,Scrum Half,-0.152044
17,v_italy,-0.361934
19,cost_started,0.614902
21,cost_France,0.058094


## d_tackles

In [17]:
model_lasso_interactions_d_tackles = lasso_model_fitter('d_tackles')

print("RMSE: {}".format(-model_lasso_interactions_d_tackles.best_score_.round(2)))
print("alpha: {}".format(model_lasso_interactions_d_tackles.best_params_['lasso__alpha'].round(2)))

RMSE: 0.45
alpha: 0.04


In [18]:
feature_coefficients(model_lasso_interactions_d_tackles)

Unnamed: 0,features,coefficient
6,Scotland,-0.005248
19,cost_started,0.010625
25,cost_Wales,0.002786
26,cost_Back Row,0.032923
33,cost_Second Row,0.005946
44,Second Row_v_italy,0.020169
46,started_England,0.013659
47,started_France,0.046698
48,started_Ireland,-0.020203
56,started_Hooker,0.016584


## t_breaks

In [19]:
model_lasso_interactions_t_breaks = lasso_model_fitter('t_breaks')

print("RMSE: {}".format(-model_lasso_interactions_t_breaks.best_score_.round(2)))
print("alpha: {}".format(model_lasso_interactions_t_breaks.best_params_['lasso__alpha'].round(2)))

RMSE: 1.17
alpha: 0.04


In [20]:
feature_coefficients(model_lasso_interactions_t_breaks)

Unnamed: 0,features,coefficient
0,cost,0.422786
5,Italy,0.168006
7,Wales,-0.016668
9,Centre,0.047977
17,v_italy,0.006044
19,cost_started,0.176205
22,cost_Ireland,0.074023
24,cost_Scotland,0.122087
25,cost_Wales,-0.001834
29,cost_Full Back,0.353379


## m_ball

In [21]:
model_lasso_interactions_m_ball = lasso_model_fitter('m_ball')

print("RMSE: {}".format(-model_lasso_interactions_m_ball.best_score_.round(2)))
print("alpha: {}".format(model_lasso_interactions_m_ball.best_params_['lasso__alpha'].round(2)))

RMSE: 21.67
alpha: 0.19


In [22]:
feature_coefficients(model_lasso_interactions_m_ball)

Unnamed: 0,features,coefficient
0,cost,7.433387
1,started,-10.132091
2,England,-0.026299
5,Italy,1.326846
9,Centre,3.783191
10,Fly Half,1.292833
14,Scrum Half,-0.370912
16,Wing,-3.667251
17,v_italy,2.309266
19,cost_started,21.927474


## tries

In [23]:
model_lasso_interactions_tries = lasso_model_fitter('tries')

print("RMSE: {}".format(-model_lasso_interactions_tries.best_score_.round(2)))
print("alpha: {}".format(model_lasso_interactions_tries.best_params_['lasso__alpha'].round(2)))

RMSE: 0.31
alpha: 0.04


In [24]:
feature_coefficients(model_lasso_interactions_tries)

Unnamed: 0,features,coefficient
0,cost,0.03012
29,cost_Full Back,0.006898
34,cost_Wing,0.012688
35,cost_v_italy,0.007608
40,Full Back_v_italy,0.025853
45,Wing_v_italy,0.030008
60,started_Wing,0.091424
62,started_v_italy,0.024064


## conversions

In [25]:
model_lasso_interactions_conversions = lasso_model_fitter('conversions')

print("RMSE: {}".format(-model_lasso_interactions_conversions.best_score_.round(2)))
print("alpha: {}".format(model_lasso_interactions_conversions.best_params_['lasso__alpha'].round(2)))

RMSE: 0.24
alpha: 0.16


In [26]:
feature_coefficients(model_lasso_interactions_conversions)

Unnamed: 0,features,coefficient
61,started_kicker,0.089315


## penalties

In [27]:
model_lasso_interactions_penalties = lasso_model_fitter('penalties')

print("RMSE: {}".format(-model_lasso_interactions_penalties.best_score_.round(2)))
print("alpha: {}".format(model_lasso_interactions_penalties.best_params_['lasso__alpha'].round(2)))

RMSE: 0.21
alpha: 0.02


In [28]:
feature_coefficients(model_lasso_interactions_penalties)

Unnamed: 0,features,coefficient
10,Fly Half,-0.008114
23,cost_Italy,-0.008026
36,cost_kicker,0.151741
38,Centre_v_italy,0.002844
39,Fly Half_v_italy,-0.092911
51,started_Wales,0.013587
54,started_Fly Half,-0.055412
55,started_Full Back,0.023018
61,started_kicker,0.360097


# Test Models

## Week 1

In [29]:
mean_squared_error(players_test_week1, test_data_predictor_multiple(players_pred_week1)['overall_score'])**0.5

11.47661649123802

## Week 2

In [30]:
mean_squared_error(players_test_week2, test_data_predictor_multiple(players_pred_week2)['overall_score'])**0.5

11.496808000610368

# Make Predictions

In [38]:
def view_predictions(players_pred):
    """
    
    """
    
    df = players_pred.copy()
    df['points'] = test_data_predictor_multiple(df)['overall_score']
    df = df.sort_values('points', ascending=False)
    
    return df

In [39]:
week1_preds = view_predictions(players_pred_week1)

from IPython.display import display
with pd.option_context('display.max_rows', 150, 'display.max_columns', 20):
    display(week1_preds)

Unnamed: 0_level_0,country,position,cost,started,week,points
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
T. Thomas,France,Wing,17.5,1,1,98.23
G. Villiere,France,Wing,12.5,1,1,85.33
B. Dulin,France,Full Back,15.5,1,1,81.6
G. Alldritt,France,Back Row,16.6,1,1,67.53
S. Hogg,Scotland,Full Back,17.3,1,1,63.13
C. Ollivon,France,Back Row,15.1,1,1,63.07
A. Dupont,France,Scrum Half,17.8,1,1,62.52
A. Vincent,France,Centre,16.3,1,1,61.27
P. Willemse,France,Second Row,15.1,1,1,60.39
G. North,Wales,Wing,17.0,1,1,60.29


In [40]:
week2_preds = view_predictions(players_pred_week2)

from IPython.display import display
with pd.option_context('display.max_rows', 150, 'display.max_columns', 20):
    display(week2_preds)

Unnamed: 0_level_0,country,position,cost,started,week,points
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
J. May,England,Wing,14.1,1,2,74.36
A. Watson,England,Wing,13.3,1,2,72.53
E. Daly,England,Full Back,15.2,1,2,65.53
B. Dulin,France,Full Back,15.5,1,2,56.22
L. Halfpenny,Wales,Full Back,13.8,1,2,55.59
L. Williams,Wales,Wing,13.7,1,2,54.9
L. Rees-Zammit,Wales,Wing,13.3,1,2,53.83
D. Penaud,France,Wing,14.2,1,2,52.35
J. Tipuric,Wales,Back Row,16.3,1,2,52.26
T. Faletau,Wales,Back Row,16.0,1,2,51.44


In [45]:
week3_preds = view_predictions(players_pred_week3)

from IPython.display import display
with pd.option_context('display.max_rows', 150, 'display.max_columns', 20):
    display(week3_preds)

Unnamed: 0_level_0,cost,country,position,week,started,points
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
J. Lowe,14.0,Ireland,Wing,3,1,86.89
J. Larmour,13.2,Ireland,Wing,3,1,84.71
H. Keenan,14.1,Ireland,Full Back,3,1,75.26
G. Ringrose,15.6,Ireland,Centre,3,1,68.0
C. Stander,15.9,Ireland,Back Row,3,1,66.73
R. Henshaw,14.8,Ireland,Centre,3,1,65.39
J. Sexton,15.1,Ireland,Fly Half,3,1,63.32
J. Ryan,14.0,Ireland,Second Row,3,1,58.48
T. Furlong,14.2,Ireland,Prop,3,1,57.04
A. Watson,16.8,England,Wing,3,1,54.51


# Export Predictions to Xpress

In [35]:
def export_pred(players_pred, file_name, week):
    """
    
    """
    
    df = players_pred.copy()
    df['points'] = test_data_predictor_multiple(df)['overall_score']
   
    # reformat data for Xpress
    df = df.reset_index()
    df = df.rename({'player': 'player_names'}, axis=1)
    df['position'] = df['position'].apply(lambda x: x.replace(' ', '_'))
    df = pd.get_dummies(df, columns=['country', 'position'], prefix='', prefix_sep='')
    
    # week 3 adjustments
    if week == 3:
        df['France'] = 0
        df['Scotland'] = 0
    
    df = df.drop('week', axis=1)
    
    if week == 3:
        df['index'] = np.arange(1,93)
    else:
        df['index'] = np.arange(1,139)
        
    df.insert(0, 'index', df.pop('index'))
    
    # week 3 adjustments
    if week == 3:
        df = df[['index', 'player_names', 'cost', 'started', 'points', 'England', 'France',
           'Ireland', 'Italy', 'Scotland', 'Wales', 'Back_Row', 'Centre', 'Fly_Half',
           'Full_Back', 'Hooker', 'Prop', 'Scrum_Half', 'Second_Row', 'Wing']]
    
    # export file to csv
    df.to_csv(file_name, index=False)

In [215]:
export_pred(players_pred_week1, "players_pred_week1.csv", week=1)

In [216]:
export_pred(players_pred_week2, "players_pred_week2.csv", week=2)

In [46]:
export_pred(players_pred_week3, "players_pred_week3.csv", week=3)