Load and setup dependencies and data

In [88]:
# 3rd party imports
import pandas as pd

In [89]:
# Load raw data downloaded from Six Nations website to DataFrame
raw_data = pd.read_csv('raw_data.csv')

In [90]:
# Create dict of player act and points values, these values are from the game rules.
# ** A DUMMY VARIABLE OF ONE (1) HAS BEEN ADDED TO THE 'MINUTES PLAYED' COLUMN SO IT REMAINS UNCHANGED UPON MULTIPLICATION.**
values_dict = {'MINUTES PLAYED': 1,
               'TRIES': 15,
               'TRY ASSISTS': 9,
               'CONVERSIONS': 2,
               'PENALTY GOALS': 3,
               'DROP GOALS': 3,
               'METRES MADE': 0.1,
               'CARRIES': 0,
               'METRES KICKED': 0,
               'BALL PLAYED BY HAND': 0,
               'PASSES MADE': 0,
               'OFFLOADS': 2,
               'BROKEN TACKLES': 2,
               'KNOCK ONS': 0,
               'TACKLES MADE': 1,
               'MISSED TACKLES': -1,
               'DOMINANT TACKLES': 0,
               'TURNOVERS WON': 4,
               'TURNOVERS WON IN THE TACKLE': 4,
               'TURNOVERS CONCEDED': -1,
               'HANDLING ERRORS': -1,
               'PENS CONCEDED': -1,
               'OFFSIDE PENALTIES': 0,
               'SCRUM PENALTIES': 0,
               'LINEOUTS WON': 1,
               'LINEOUTS STOLEN': 5,
               'YELLOW CARDS': -5,
               'RED CARD': -10}

Data wrangling and analysis

In [91]:
# Set index col to 'PLAYER'
raw_data.set_index('PLAYER', inplace = True)

In [92]:
# Multiply raw_data by values dict, the resultant DataFrame has each players points total for each playing aspect
points_data = raw_data.mul(values_dict)

In [93]:
# The raw data has excess columns which are not relevant to the game, remove any columns which are all zero.
points_data = points_data.loc[:, (points_data!=0).any(axis=0)]

In [94]:
# Calculate total points for each player (every column except 'MINUTES PLAYED)
points_data['TOTAL'] = points_data.drop('MINUTES PLAYED', axis=1).sum(axis=1)

In [95]:
# Calculate points per minute
points_data['POINTS PER MINUTE'] = points_data['TOTAL'] / points_data['MINUTES PLAYED']

# Sort by points per minute
points_data.sort_values(by='POINTS PER MINUTE', inplace=True, ascending=False)

The `points_data` DataFrame can now be assessed by column to establish typical expected scoring pattern for any player/position