# Libraries, Preferences and Data Input

In [59]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

In [60]:
%config Completer.use_jedi = False

In [61]:
# read in the training data
players_training_week1 = pd.read_csv("players_training_week1.csv")
players_training_week2 = pd.read_csv("players_training_week2.csv")
# merge training data into one file and set index
df = pd.merge(players_training_week1, players_training_week2, how='outer').set_index('player')

results_week1 = pd.read_csv("results_week1.csv", index_col='country')
results_week2 = pd.read_csv("results_week2.csv", index_col='country')

In [62]:
# collate list of possible kickers for each team
kickers = [
    'O. Farrell', 'G. Ford', 'M. Malins', # england
    'M. Jalibert', 'L. Carbonel', # france
    'J. Sexton', 'B. Burns', 'R. Byrne', # ireland
    'P. Garbisi', 'T. Allan', 'C. Canna', # italy
    'F. Russell', 'J. van der Walt' # scotland
    'D. Biggar', 'L. Halfpenny', 'C. Sheedy', 'J. Evans' #wales
]

# Data Analysis 

In [63]:
# replace NaNs with zero values
df.fillna(0, inplace=True)
# replace binary values for 'started' for plotting purposes
df['started'].replace({1: True, 0: False}, inplace=True)

df = df.reset_index()
df['kicker'] = df['player'].apply(lambda x: True if x in kickers else False)
df = df.set_index('player')

In [64]:
def team_points_calculator(country, mins, week):
    """
    Calculates the number of team points assigned to each player, based on their country, the number of minutes they played, and the result.
    """
    
    if week == 1:
        results = results_week1
    elif week == 2:
        results = results_week2
    elif week == 3:
        results = results_week3
    elif week == 4:
        results = results_week4
    elif week == 5:
        results = results_week5
    
    
    # calculate the the home/away points
    outcome = 0
    if results.loc[country, 'home'] == 1:
        if results.loc[country, 'for'] > results.loc[country, 'against']:
            outcome += 12
        elif results.loc[country, 'for'] == results.loc[country, 'against']:
            outcome += 4
        else:
            outcome += 1
    else:
        if results.loc[country, 'for'] > results.loc[country, 'against']:
            outcome += 18
        elif results.loc[country, 'for'] == results.loc[country, 'against']:
            outcome += 10
        else:
            outcome += 3
    
    # calculate the score difference points
    score_diff = (results.loc[country, 'for'] - results.loc[country, 'against']) * 0.5
    
    # calculate the points assigned to each player
    team_points = round((outcome + score_diff) * (mins / 80), 2)

    return team_points

In [65]:
def ind_points_calculator(tackles, d_tackles, t_breaks, m_ball, mom, tries, conversions, penalties, d_goals, yellow, red):
    """
    Calculates the number of individual points assigned to each play, based on their fixure metrics.
    """
    
    ind_points = round(
        tackles * 1 +
        d_tackles * 2 +
        t_breaks * 2 +
        m_ball * 0.3 +
        mom * 15 +
        tries * 15 +
        conversions * 3 +
        penalties * 3 +
        d_goals * 6 +
        yellow * -5 +
        red * -10
    ,2)
    
    return ind_points

In [66]:
def v_italy_generator(week, country):
    """
    Determines whether each player is playing against Italy or not in a given week.
    """
    
    if week == 1 and country == 'France':
        return True
    elif week == 2 and country == 'England':
        return True
    elif week == 3 and country == 'Ireland':
        return True
    elif week == 4 and country == 'Wales':
        return True
    elif week == 5 and country == 'Scotland':
        return True
    else:
        return False

In [67]:
# designate the players playing against Italy
df['v_italy'] = df.apply(lambda x: v_italy_generator(x['week'], x['country']), axis=1)

# append the team_scores to the analysis dataframe
df['team_points'] = df.apply(lambda x: team_points_calculator(x['country'], x['mins'], x['week']), axis=1)

# append the individual_scores to the analysis dataframe
df['ind_points'] = df.apply(lambda x: ind_points_calculator(
    x['tackles'], x['d_tackles'], x['t_breaks'], x['m_ball'], x['mom'],
    x['tries'], x['conversions'], x['penalties'], x['d_goals'], x['yellow'], x['red']
), axis=1)

# append the total_scores to the analysis dataframe
df['total_points'] = df['team_points'] + df['ind_points']

df['week'] = df['week'].astype('string')

In [68]:
df.head()

Unnamed: 0_level_0,cost,country,position,mins,tackles,d_tackles,t_breaks,m_ball,mom,tries,...,yellow,red,started,week,m_played,kicker,v_italy,team_points,ind_points,total_points
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
T. Curry,15.2,England,Back Row,80.0,15.0,1.0,2.0,40.0,0.0,0.0,...,0.0,0.0,True,1,1,False,False,-1.5,33.0,31.5
A. Vincent,16.3,France,Centre,80.0,15.0,0.0,5.0,100.0,0.0,1.0,...,0.0,0.0,True,1,1,False,True,38.0,70.0,108.0
E. Daly,15.2,England,Full Back,80.0,1.0,0.0,3.0,85.0,0.0,0.0,...,0.0,0.0,True,1,1,False,False,-1.5,32.5,31.0
O. Farrell,14.9,England,Centre,80.0,5.0,0.0,1.0,38.0,0.0,0.0,...,0.0,0.0,True,1,1,True,False,-1.5,24.4,22.9
J. Hill,12.1,England,Second Row,80.0,19.0,1.0,0.0,5.0,0.0,0.0,...,0.0,0.0,True,1,1,False,False,-1.5,22.5,21.0


In [69]:
def plotter(y, color=None, title=""):
    
    if color == 'country':
        color_discrete_sequence=['white', 'royalblue', 'green', 'blue', 'deepskyblue', 'red']
    else:
        color_discrete_sequence=None
        
    if color == 'v_italy':
        color_discrete_map = {True: 'royalblue', False: 'red'}
    else:
        color_discrete_map = {}
    
    fig = px.scatter(
        df,
        x='cost',
        y=y,
        color=color,
        color_discrete_sequence=color_discrete_sequence,
        color_discrete_map=color_discrete_map,
        hover_name=df.index,
        hover_data={'cost': False, y: True},
        trendline='ols',
        template='plotly_dark',
        title=title
    )
    return fig

In [70]:
plotter('total_points', 'week')

## Mins

In [71]:
plotter('mins')

In [72]:
plotter('mins', 'started')

In [73]:
plotter('mins', 'position')

In [74]:
plotter('mins', 'country')

In [75]:
plotter('mins', 'v_italy')

## Tackles

In [76]:
plotter('tackles')

In [77]:
plotter('tackles', 'started')

In [78]:
plotter('tackles', 'position')

In [79]:
plotter('tackles', 'country')

In [80]:
plotter('tackles', 'v_italy')

In [81]:
plotter('d_tackles')

In [82]:
plotter('d_tackles', 'started')

In [83]:
plotter('d_tackles', 'position')

In [84]:
plotter('d_tackles', 'country')

In [85]:
plotter('d_tackles', 'v_italy')

In [86]:
plotter('t_breaks')

In [87]:
plotter('t_breaks', 'started')

In [88]:
plotter('t_breaks', 'position')

In [89]:
plotter('t_breaks', 'country')

In [90]:
plotter('t_breaks', 'v_italy')

In [91]:
plotter('m_ball')

In [92]:
plotter('m_ball', 'started')

In [93]:
plotter('m_ball', 'position')

In [94]:
plotter('m_ball', 'country')

In [95]:
plotter('m_ball', 'v_italy')

In [96]:
plotter('tries')

In [97]:
plotter('tries', 'started')

In [98]:
plotter('tries', 'position')

In [99]:
plotter('tries', 'country')

In [100]:
plotter('tries', 'v_italy')

In [101]:
plotter('penalties')

In [102]:
plotter('penalties', 'started')

In [103]:
plotter('penalties', 'position')


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars



In [104]:
plotter('penalties', 'country')

In [105]:
plotter('penalties', 'v_italy')

In [106]:
plotter('penalties', 'kicker')


invalid value encountered in double_scalars



In [107]:
plotter('conversions')

In [108]:
plotter('conversions', 'started')

In [109]:
plotter('conversions', 'position')


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars



In [110]:
plotter('conversions', 'country')

In [111]:
plotter('conversions', 'v_italy')

In [112]:
plotter('conversions', 'kicker')


invalid value encountered in double_scalars

