Load and setup dependencies and data

In [None]:
# 3rd party imports
import pandas as pd

In [None]:
# Load raw data downloaded from Six Nations website to DataFrame
raw_data = pd.read_csv('raw_data.csv')

In [None]:
# Create dict of player act and points values, these values are from the game rules.
# ** A DUMMY VARIABLE OF ONE (1) HAS BEEN ADDED TO THE 'MINUTES PLAYED' COLUMN SO IT REMAINS UNCHANGED UPON MULTIPLICATION.**
values_dict = {'MINUTES PLAYED': 1,
               'TRIES': 15,
               'TRY ASSISTS': 7,
               'CONVERSIONS': 2,
               'PENALTY GOALS': 5,
               'DROP GOALS': 7,
               'METRES MADE': 0.2,
               'BROKEN TACKLES': 7,
               'TACKLES MADE': 1,
               'DOMINANT TACKLES': 7,
               'TURNOVERS WON': 7,
               'TURNOVERS WON IN THE TACKLE': 7,
               'LINEOUTS STOLEN': 7,
               'YELLOW CARDS': -5,
               'RED CARD': -10}

Data wrangling and analysis

In [None]:
# Set index col to 'PLAYER'
raw_data.set_index('PLAYER', inplace = True)

In [None]:
# The raw data has excess columns which are not relevant to the game, extract only the columns which are included in the scoring dictionary.
raw_data = raw_data[values_dict.keys()]

In [None]:
# Multiply raw_data by values dict, the resultant DataFrame has each players points total for each playing aspect
points_data = raw_data.mul(values_dict)

In [None]:
# Calculate total points for each player (every column except 'MINUTES PLAYED)
points_data['TOTAL'] = points_data.drop('MINUTES PLAYED', axis=1).sum(axis=1)

In [None]:
# Calculate points per minute
points_data['POINTS PER MINUTE'] = points_data['TOTAL'] / points_data['MINUTES PLAYED']

# Sort by points per minute
points_data.sort_values(by='POINTS PER MINUTE', inplace=True, ascending=False)

The `points_data` DataFrame can now be assessed by column to establish typical expected scoring pattern for any player/position