# Fanfooty Supercoach Prediction Model
Selecting your supercoach team is hard... this model is designed as a tool to assist supercoach users in selecting their team week-to-week.

This model uses rolling stats, opposition data, and past injury data to provide a predicted score for matches.

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
pd.set_option('display.max_columns', 999)
pd.set_option('display.max_rows', 30)

### Get Fanfooty Data
This data was scraped using scrape-fanfooty.pynb in this directory
- Remove bad SC data + convert set SC column as integer
- Create columns to allow merging with fixture data

In [2]:
df_fanfooty_player_raw = pd.read_csv("exports/scrape_20200831-093824/fanfooty_match_data_20200831-093824.csv", error_bad_lines=False, index_col=0)
df_fanfooty_player_raw = df_fanfooty_player_raw.loc[df_fanfooty_player_raw['SC'] != '-']
df_fanfooty_player_raw['SC'] = df_fanfooty_player_raw['SC'].astype('int64')
df_fanfooty_player_raw['match_identifier'] = df_fanfooty_player_raw['Fanfooty Match ID'].astype(str) + '_' + df_fanfooty_player_raw['Team']
df_fanfooty_player_raw['opposition_match_identifier'] = df_fanfooty_player_raw['Fanfooty Match ID'].astype(str) + '_' + df_fanfooty_player_raw['Opposition']
df_fanfooty_player_raw

  interactivity=interactivity, compiler=compiler, result=result)
  res_values = method(rvalues)


Unnamed: 0,Fanfooty Match ID,Fanfooty Match URL,Round,Year,Player ID,First Name,Surname,Team,null,DT,SC,null2,null3,null4,Kicks,Handballs,Marks,Tackles,Hitouts,Frees for,Frees against,Goals,Behinds,Not sure,Tag,Tag Notes,Tag 2,Tag 2 Notes,null5,null6,null7,null8,Position,Jumper Number,null9,null10,null11,DT own %,SC own %,AF own %,null12,AF Breakeven,null13,Contested Possessions,Clearances,Clangers,Disposal efficiency,Time on ground,Metres gained,Injured,Opposition,match_identifier,opposition_match_identifier
0,3425,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,990020.0,Andrew,Embley,WC,30,111,98,144,79,112,20,8,1,6,1,1,0,1,0,Full Time,gun,Dempsey going with him... %s from %O and %T,,,,,,,,,,,,,,,,,,,,,,,,False,ES,3425_WC,3425_ES
1,3425,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,230254.0,Adam,Selwood,WC,50,107,107,143,79,108,10,9,4,11,0,3,2,1,0,Full Time,hot,Tagged by Lonergan... %D and %M with %T plus %s,,,,,,,,,,,,,,,,,,,,,,,,False,ES,3425_WC,3425_ES
2,3425,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,200112.0,Dean,Cox,WC,27,99,118,114,88,106,9,10,2,2,30,4,1,1,1,Full Time,news,%H and %P with %s,,,,,,,,,,,,,,,,,,,,,,,,False,ES,3425_WC,3425_ES
3,3425,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,240016.0,Beau,Waters,WC,26,98,84,130,79,117,15,13,5,6,0,0,4,0,0,Full Time,news,%P and %M with %F... clangers and FA dampening...,,,,,,,,,,,,,,,,,,,,,,,,False,ES,3425_WC,3425_ES
4,3425,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,261911.0,Brad,Ebert,WC,26,94,109,121,70,96,12,9,3,6,0,1,0,1,0,Full Time,news,Matched up on Winderlich... %D and %T,,,,,,,,,,,,,,,,,,,,,,,,False,ES,3425_WC,3425_ES
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93895,7422,http://live.fanfooty.com.au/game/matchcentre.h...,R14,2020,290199.0,Majak,Daw,NM,2,32,0,31,28,38,2,7,1,2,2,2,1,0,0,Full Time,spearhead,%H... also %P and %T... In attack on Collins,,,,,,,Ruck,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,7.0,1.0,4.0,77.0,71.0,-1.0,False,GC,7422_NM,7422_GC
93896,7422,http://live.fanfooty.com.au/game/matchcentre.h...,R14,2020,1001351.0,Lachlan,Hosie,NM,2,27,0,18,23,30,4,3,3,0,0,0,0,0,0,Full Time,chicken,%O and %M... Playing tall forward on Ballard,,,,,,,Forward,46.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,5.0,0.0,0.0,42.0,73.0,49.0,False,GC,7422_NM,7422_GC
93897,7422,http://live.fanfooty.com.au/game/matchcentre.h...,R14,2020,291550.0,Josh,Walker,NM,2,24,0,15,22,30,4,3,3,0,0,0,1,0,0,Full Time,job,%O and %M... In defence on Day,,,,,,,Forward,19.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,5.0,0.0,2.0,57.0,87.0,118.0,False,GC,7422_NM,7422_GC
93898,7422,http://live.fanfooty.com.au/game/matchcentre.h...,R14,2020,1002143.0,Ben,McKay,NM,1,14,0,12,15,21,1,5,1,0,0,1,1,0,0,Full Time,job,%B among %P... In defence on Corbett,,,,,,,Forward,23.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,5.0,0.0,2.0,66.0,88.0,35.0,False,GC,7422_NM,7422_GC


### Get Fanfooty fixture data
This data was scraped using scrape-fanfooty.pynb in this directory

- Only pull in games that have SC total figures

In [3]:
df_fixture = pd.read_csv("exports/scrape_20200831-093824/fanfooty_fixture.csv", index_col=0)
df_fixture = df_fixture.loc[(df_fixture['Home SC total'] > 0 ) & (df_fixture['Away SC total'] > 0 )]
df_fixture['FanFooty draw ID'] = df_fixture['FanFooty draw ID'].astype('int')
df_fixture

Unnamed: 0,FanFooty draw ID,year,competition,round,gametime (AET),day,home team,away team,ground,timeslot,TV coverage,home supergoals,home goals,home behinds,home points,away supergoals,away goals,away behinds,away points,match status,home_team_short,away_team_short,Home SC total,Away SC total
3191,3425,2010,HA,4,2010-04-16 20:40:00,Friday,West Coast,Essendon,Subiaco,N,Seven,0.0,15.0,11.0,101.0,0.0,11.0,12.0,78.0,Full Time,WC,ES,1739.0,1568.0
3192,3426,2010,HA,4,2010-04-17 14:10:00,Saturday,North Melbourne,Sydney,Docklands,D,Fox Sports,0.0,8.0,12.0,60.0,0.0,14.0,16.0,100.0,Full Time,NM,SY,1504.0,1797.0
3193,3427,2010,HA,4,2010-04-17 15:10:00,Saturday,Adelaide,Carlton,Footy Park,D,Ten,0.0,6.0,19.0,55.0,0.0,16.0,7.0,103.0,Full Time,AD,CA,1513.0,1826.0
3194,3428,2010,HA,4,2010-04-17 19:10:00,Saturday,Collingwood,Hawthorn,MCG,N,Ten,0.0,17.0,21.0,123.0,0.0,8.0,11.0,59.0,Full Time,CO,HW,1873.0,1447.0
3195,3429,2010,HA,4,2010-04-17 19:10:00,Saturday,Brisbane Lions,Western Bulldogs,Gabba,N,Fox Sports,0.0,13.0,23.0,101.0,0.0,12.0,7.0,79.0,Full Time,BL,WB,1781.0,1545.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5626,7417,2020,HA,14,2020-08-28 19:50:00,Friday,Western Bulldogs,Geelong,Gold Coast,N,,0.0,9.0,7.0,61.0,0.0,10.0,12.0,72.0,Full Time,WB,GE,1538.0,1762.0
5627,7418,2020,HA,14,2020-08-29 13:45:00,Saturday,Port Adelaide,Sydney,Adelaide,N,,0.0,11.0,7.0,73.0,0.0,7.0,5.0,47.0,Full Time,PA,SY,1688.0,1612.0
5628,7419,2020,HA,14,2020-08-29 16:35:00,Saturday,Fremantle,GWS,Perth,N,,0.0,8.0,5.0,53.0,0.0,14.0,7.0,91.0,Full Time,FR,WS,1447.0,1856.0
5629,7420,2020,HA,14,2020-08-29 19:40:00,Saturday,Melbourne,St Kilda,Alice Springs,D,,0.0,8.0,4.0,52.0,0.0,7.0,7.0,49.0,Full Time,ME,SK,1693.0,1606.0


### Create a function that returns rolling averages for all stats given to it
e.g. I want the average supercoach score for a player for their last 5 games.

In [4]:
# Get rolling averages
def rolling_average(df, window):
    return df.rolling(min_periods=1, window=window).mean().shift(1)

### Get the opposition team's 5, 3, and 1 round rolling average Supercoach Score

- Split the fixture columns so there's a row for every team, for every game. As opposed to a single row for each game.

In [5]:
# Get rolling previous team SC scores
renamed_cols = {
    "home_team_short": "team", 
    "home points": "score",
    "Home SC total": "SC_total", 
    "away_team_short": "team", 
    "away points": "score", 
    "Away SC total": "SC_total", 
}

home_match_details = df_fixture.loc[:, ['year', 'gametime (AET)', 'ground', 'FanFooty draw ID', 'home_team_short', 'home points', 'Home SC total']].rename(columns=renamed_cols)
away_match_details = df_fixture.loc[:, ['year', 'gametime (AET)', 'ground', 'FanFooty draw ID', 'away_team_short', 'away points', 'Away SC total']].rename(columns=renamed_cols)
df_matches_by_team = pd.concat([home_match_details, away_match_details]).sort_index().reset_index()
df_matches_by_team['previous SC match score'] =  df_matches_by_team.groupby('team')['SC_total'].apply(lambda team: rolling_average(team, 1))
df_matches_by_team['rolling 3 round SC match average'] =  df_matches_by_team.groupby('team')['SC_total'].apply(lambda team: rolling_average(team, 3))
df_matches_by_team['rolling 5 round SC match average'] =  df_matches_by_team.groupby('team')['SC_total'].apply(lambda team: rolling_average(team, 5))
df_matches_by_team['match_identifier'] = df_matches_by_team['FanFooty draw ID'].astype(str) + '_' + df_matches_by_team['team']
df_matches_by_team

Unnamed: 0,index,year,gametime (AET),ground,FanFooty draw ID,team,score,SC_total,previous SC match score,rolling 3 round SC match average,rolling 5 round SC match average,match_identifier
0,3191,2010,2010-04-16 20:40:00,Subiaco,3425,WC,101.0,1739.0,,,,3425_WC
1,3191,2010,2010-04-16 20:40:00,Subiaco,3425,ES,78.0,1568.0,,,,3425_ES
2,3192,2010,2010-04-17 14:10:00,Docklands,3426,SY,100.0,1797.0,,,,3426_SY
3,3192,2010,2010-04-17 14:10:00,Docklands,3426,NM,60.0,1504.0,,,,3426_NM
4,3193,2010,2010-04-17 15:10:00,Footy Park,3427,AD,55.0,1513.0,,,,3427_AD
...,...,...,...,...,...,...,...,...,...,...,...,...
4251,5628,2020,2020-08-29 16:35:00,Perth,7419,FR,53.0,1447.0,1771.0,1794.000000,1719.2,7419_FR
4252,5629,2020,2020-08-29 19:40:00,Alice Springs,7420,SK,49.0,1606.0,1685.0,1629.666667,1677.8,7420_SK
4253,5629,2020,2020-08-29 19:40:00,Alice Springs,7420,ME,52.0,1693.0,1503.0,1671.666667,1651.6,7420_ME
4254,5630,2020,2020-08-30 15:35:00,Gabba,7421,CA,48.0,1492.0,1908.0,1680.666667,1637.2,7421_CA


### Feature Building: Merge team + opposition rolling SC scores back into Fanfooty dataset

In [7]:
df_fanfooty_player_raw = pd.merge(
    df_fanfooty_player_raw,
    df_matches_by_team[['match_identifier', 'score', 'SC_total', 'previous SC match score', 'rolling 3 round SC match average', 'rolling 5 round SC match average']],
    how='left',
    right_on='match_identifier',
    left_on='opposition_match_identifier', 
)

In [7]:
df_fanfooty_player_raw = df_fanfooty_player_raw.rename(columns={'match_identifier_x':'match_identifier'})
df_fanfooty_player_raw

Unnamed: 0,Fanfooty Match ID,Fanfooty Match URL,Round,Year,Player ID,First Name,Surname,Team,null,DT,SC,null2,null3,null4,Kicks,Handballs,Marks,Tackles,Hitouts,Frees for,Frees against,Goals,Behinds,Not sure,Tag,Tag Notes,Tag 2,Tag 2 Notes,null5,null6,null7,null8,Position,Jumper Number,null9,null10,null11,DT own %,SC own %,AF own %,null12,AF Breakeven,null13,Contested Possessions,Clearances,Clangers,Disposal efficiency,Time on ground,Metres gained,Injured,Opposition,match_identifier_x,opposition_match_identifier,match_identifier_y,score,SC_total,previous SC match score,rolling 3 round SC match average,rolling 5 round SC match average
0,3425,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,990020.0,Andrew,Embley,WC,30,111,98,144,79,112,20,8,1,6,1,1,0,1,0,Full Time,gun,Dempsey going with him... %s from %O and %T,,,,,,,,,,,,,,,,,,,,,,,,False,ES,3425_WC,3425_ES,3425_ES,78.0,1568.0,,,
1,3425,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,230254.0,Adam,Selwood,WC,50,107,107,143,79,108,10,9,4,11,0,3,2,1,0,Full Time,hot,Tagged by Lonergan... %D and %M with %T plus %s,,,,,,,,,,,,,,,,,,,,,,,,False,ES,3425_WC,3425_ES,3425_ES,78.0,1568.0,,,
2,3425,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,200112.0,Dean,Cox,WC,27,99,118,114,88,106,9,10,2,2,30,4,1,1,1,Full Time,news,%H and %P with %s,,,,,,,,,,,,,,,,,,,,,,,,False,ES,3425_WC,3425_ES,3425_ES,78.0,1568.0,,,
3,3425,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,240016.0,Beau,Waters,WC,26,98,84,130,79,117,15,13,5,6,0,0,4,0,0,Full Time,news,%P and %M with %F... clangers and FA dampening...,,,,,,,,,,,,,,,,,,,,,,,,False,ES,3425_WC,3425_ES,3425_ES,78.0,1568.0,,,
4,3425,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,261911.0,Brad,Ebert,WC,26,94,109,121,70,96,12,9,3,6,0,1,0,1,0,Full Time,news,Matched up on Winderlich... %D and %T,,,,,,,,,,,,,,,,,,,,,,,,False,ES,3425_WC,3425_ES,3425_ES,78.0,1568.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93726,7422,http://live.fanfooty.com.au/game/matchcentre.h...,R14,2020,290199.0,Majak,Daw,NM,2,32,0,31,28,38,2,7,1,2,2,2,1,0,0,Full Time,spearhead,%H... also %P and %T... In attack on Collins,,,,,,,Ruck,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,7.0,1.0,4.0,77.0,71.0,-1.0,False,GC,7422_NM,7422_GC,,,,,,
93727,7422,http://live.fanfooty.com.au/game/matchcentre.h...,R14,2020,1001351.0,Lachlan,Hosie,NM,2,27,0,18,23,30,4,3,3,0,0,0,0,0,0,Full Time,chicken,%O and %M... Playing tall forward on Ballard,,,,,,,Forward,46.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,5.0,0.0,0.0,42.0,73.0,49.0,False,GC,7422_NM,7422_GC,,,,,,
93728,7422,http://live.fanfooty.com.au/game/matchcentre.h...,R14,2020,291550.0,Josh,Walker,NM,2,24,0,15,22,30,4,3,3,0,0,0,1,0,0,Full Time,job,%O and %M... In defence on Day,,,,,,,Forward,19.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,5.0,0.0,2.0,57.0,87.0,118.0,False,GC,7422_NM,7422_GC,,,,,,
93729,7422,http://live.fanfooty.com.au/game/matchcentre.h...,R14,2020,1002143.0,Ben,McKay,NM,1,14,0,12,15,21,1,5,1,0,0,1,1,0,0,Full Time,job,%B among %P... In defence on Corbett,,,,,,,Forward,23.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,5.0,0.0,2.0,66.0,88.0,35.0,False,GC,7422_NM,7422_GC,,,,,,


In [8]:
# df_fanfooty_player_raw = pd.merge(
#     df_fanfooty_player_raw,
#     df_matches_by_team,
#     how='left',
#     left_on='opposition_match_identifier', 
#     right_on='match_identifier', 
# #     suffixes=('_player', '_opposition')
# )

In [9]:
# df_fanfooty_player_raw