In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
pd.set_option('display.max_columns', 999)
pd.set_option('display.max_rows', 20)

In [3]:
df_footywire_data = pd.read_csv(r'inputs/footywire_player_matches.csv', index_col=0)
# Create a column for a standard round name e.g. Grand Final should be GF
round_map_data = {
    "R1": "Round 1",
    "R2": "Round 2",
    "R3": "Round 3",
    "R4": "Round 4",
    "R5": "Round 5",
    "R6": "Round 6",
    "R7": "Round 7",
    "R8": "Round 8",
    "R9": "Round 9",
    "R10": "Round 10",
    "R11": "Round 11",
    "R12": "Round 12",
    "R13": "Round 13",
    "R14": "Round 14",
    "R15": "Round 15",
    "R16": "Round 16",
    "R17": "Round 17",
    "R18": "Round 18",
    "R19": "Round 19",
    "R20": "Round 20",
    "R21": "Round 21",
    "R22": "Round 22",
    "R23": "Round 23",
    "R24": "Round 24",
    "SF": "Semi Final",
    "GF": "Grand Final",
    "PF": "Preliminary Final",
    "QF": "Qualifying Final",
    "EF": "Elimination Final"
}

df_round_mapping = pd.DataFrame.from_dict(round_map_data, orient='index')
df_round_mapping = df_round_mapping.reset_index().rename(columns={'index': 'short_round', 0: 'long_round'})
round_mapping = dict(df_round_mapping[['long_round', 'short_round']].values)
df_footywire_data['short_round'] = df_footywire_data['Round'].map(round_mapping)
df_footywire_data

Unnamed: 0,Date,Season,Round,Venue,Player,Team,Opposition,Status,Match_id,CP,UP,ED,DE,CM,GA,MI5,One.Percenters,BO,TOG,K,HB,D,M,G,B,T,HO,GA1,I50,CL,CG,R50,FF,FA,AF,SC,CCL,SCL,SI,MG,TO,ITC,T5,short_round
1,2010-03-25,2010,Round 1,MCG,Daniel Connors,Richmond,Carlton,Home,5089,8,15,16,66.7,0,0,0,1,0,69,14,10,24,3,0,0,1,0,0,2,2,4,6,2,0,77,85,,,,,,,,R1
2,2010-03-25,2010,Round 1,MCG,Daniel Jackson,Richmond,Carlton,Home,5089,11,10,14,60.9,1,0,0,0,0,80,11,12,23,2,0,0,5,0,0,8,5,4,1,2,0,85,89,,,,,,,,R1
3,2010-03-25,2010,Round 1,MCG,Brett Deledio,Richmond,Carlton,Home,5089,7,14,16,76.2,0,0,0,0,0,89,12,9,21,5,1,0,6,0,0,4,3,4,3,1,2,94,93,,,,,,,,R1
4,2010-03-25,2010,Round 1,MCG,Ben Cousins,Richmond,Carlton,Home,5089,9,10,11,57.9,0,1,0,0,0,69,13,6,19,1,1,0,1,0,1,1,2,3,4,1,0,65,70,,,,,,,,R1
5,2010-03-25,2010,Round 1,MCG,Trent Cotchin,Richmond,Carlton,Home,5089,8,10,13,68.4,1,0,0,0,1,77,11,8,19,6,0,0,1,0,0,2,3,3,2,0,2,65,63,,,,,,,,R1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90416,2019-09-28,2019,Grand Final,MCG,Harrison Himmelberg,GWS,Richmond,Away,9927,3,9,7,58.3,1,0,2,4,0,92,9,3,12,7,1,1,1,2,0,2,0,3,0,1,1,65,51,0.0,0.0,3.0,153.0,4.0,2.0,1.0,GF
90417,2019-09-28,2019,Grand Final,MCG,Samuel Reid,GWS,Richmond,Away,9927,5,6,8,66.7,0,0,0,4,0,96,5,7,12,2,0,0,3,0,0,0,1,2,1,0,0,47,55,0.0,1.0,1.0,97.0,2.0,5.0,0.0,GF
90418,2019-09-28,2019,Grand Final,MCG,Adam Kennedy,GWS,Richmond,Away,9927,4,8,9,81.8,0,0,0,1,0,71,5,6,11,4,0,0,4,0,0,2,1,0,0,1,0,56,55,0.0,1.0,1.0,121.0,0.0,5.0,1.0,GF
90419,2019-09-28,2019,Grand Final,MCG,Shane Mumford,GWS,Richmond,Away,9927,8,2,7,70.0,0,0,0,4,0,75,2,8,10,0,0,0,1,15,0,1,2,2,1,1,0,42,62,1.0,1.0,0.0,0.0,1.0,2.0,0.0,GF


In [4]:
# Create columns for standard team names e.g. Saint Kilda should be STK
team_map_data = {
    "Adelaide": { "footywire_team_name":"Adelaide", "abbrev_team_name":"ADE"},
    "Brisbane Lions": { "footywire_team_name":"Brisbane", "abbrev_team_name":"BRI"},
    "Carlton": { "footywire_team_name":"Carlton", "abbrev_team_name":"CAR"},
    "Collingwood": { "footywire_team_name":"Collingwood", "abbrev_team_name":"COL"},
    "Essendon": { "footywire_team_name":"Essendon", "abbrev_team_name":"ESS"},
    "Fitzroy": { "footywire_team_name":"", "abbrev_team_name":"FIT"},
    "Footscray": { "footywire_team_name":"Western Bulldogs", "abbrev_team_name":"WBD"},
    "Fremantle": { "footywire_team_name":"Fremantle", "abbrev_team_name":"FRE"},
    "Geelong": { "footywire_team_name":"Geelong", "abbrev_team_name":"GEE"},
    "Gold Coast": { "footywire_team_name":"Gold Coast", "abbrev_team_name":"GCS"},
    "GWS": { "footywire_team_name":"GWS", "abbrev_team_name":"GWS"},
    "Hawthorn": { "footywire_team_name":"Hawthorn", "abbrev_team_name":"HAW"},
    "Melbourne": { "footywire_team_name":"Melbourne", "abbrev_team_name":"MEL"},
    "North Melbourne": { "footywire_team_name":"North Melbourne", "abbrev_team_name":"NM"},
    "Port Adelaide": { "footywire_team_name":"Port Adelaide", "abbrev_team_name":"PTA"},
    "Richmond": { "footywire_team_name":"Richmond", "abbrev_team_name":"RIC"},
    "St Kilda": { "footywire_team_name":"St Kilda", "abbrev_team_name":"STK"},
    "Sydney": { "footywire_team_name":"Sydney", "abbrev_team_name":"SYD"},
    "University": { "footywire_team_name":"", "abbrev_team_name":"UNI"},
    "West Coast": { "footywire_team_name":"West Coast", "abbrev_team_name":"WCE"}
}
df_team_mapping = pd.DataFrame.from_dict(team_map_data, orient='index')
df_team_mapping = df_team_mapping.reset_index().rename(columns={'index': 'afl_tables_team_name'})
team_mapping = dict(df_team_mapping[['footywire_team_name', 'abbrev_team_name']].values)
df_footywire_data['Team_abbrev'] = df_footywire_data['Team'].map(team_mapping)
df_footywire_data['Opposition_abbrev'] = df_footywire_data['Opposition'].map(team_mapping)
df_footywire_data.tail()

Unnamed: 0,Date,Season,Round,Venue,Player,Team,Opposition,Status,Match_id,CP,UP,ED,DE,CM,GA,MI5,One.Percenters,BO,TOG,K,HB,D,M,G,B,T,HO,GA1,I50,CL,CG,R50,FF,FA,AF,SC,CCL,SCL,SI,MG,TO,ITC,T5,short_round,Team_abbrev,Opposition_abbrev
90416,2019-09-28,2019,Grand Final,MCG,Harrison Himmelberg,GWS,Richmond,Away,9927,3,9,7,58.3,1,0,2,4,0,92,9,3,12,7,1,1,1,2,0,2,0,3,0,1,1,65,51,0.0,0.0,3.0,153.0,4.0,2.0,1.0,GF,GWS,RIC
90417,2019-09-28,2019,Grand Final,MCG,Samuel Reid,GWS,Richmond,Away,9927,5,6,8,66.7,0,0,0,4,0,96,5,7,12,2,0,0,3,0,0,0,1,2,1,0,0,47,55,0.0,1.0,1.0,97.0,2.0,5.0,0.0,GF,GWS,RIC
90418,2019-09-28,2019,Grand Final,MCG,Adam Kennedy,GWS,Richmond,Away,9927,4,8,9,81.8,0,0,0,1,0,71,5,6,11,4,0,0,4,0,0,2,1,0,0,1,0,56,55,0.0,1.0,1.0,121.0,0.0,5.0,1.0,GF,GWS,RIC
90419,2019-09-28,2019,Grand Final,MCG,Shane Mumford,GWS,Richmond,Away,9927,8,2,7,70.0,0,0,0,4,0,75,2,8,10,0,0,0,1,15,0,1,2,2,1,1,0,42,62,1.0,1.0,0.0,0.0,1.0,2.0,0.0,GF,GWS,RIC
90420,2019-09-28,2019,Grand Final,MCG,Jeremy Finlayson,GWS,Richmond,Away,9927,1,1,1,100.0,0,0,0,3,0,63,1,0,1,1,0,0,1,1,0,0,1,1,0,0,1,8,7,1.0,0.0,0.0,26.0,1.0,0.0,1.0,GF,GWS,RIC
