In [165]:
import pandas as pd
import numpy as np
from dateutil.relativedelta import *
from pandas import DataFrame
import datetime


In [166]:
## Read in match dataset
df = pd.read_csv(r'C:\Users\anoble\Git\Outside_Work\AFL Fantasy\Data\Inputs\fryziggafl.csv')
df['year'] = pd.DatetimeIndex(df['match_date']).year

## Filter to only games from 2010 onwards. Remove shortened season stats (2019 preds and 2020 stats)
df = df.query('year >= 2010').query('year != 2019').query('year != 2020')

## Read in Player Database
players = pd.read_csv(r'C:\Users\anoble\Git\Outside_Work\AFL Fantasy\Data\Inputs\player_ages.csv')

## Merge game stats with player info
df['player_name'] = df['player_last_name'].astype(str) + ", " + df['player_first_name']
joined_data = df.merge(players, on=['player_name'],how='left')

## Calculate age on match day (take year of game less year of birth, so will remain same age all season)
joined_data['age'] = pd.DatetimeIndex(joined_data['match_date']).year - pd.DatetimeIndex(joined_data['dob']).year

# Write to csv to check
# joined_data.to_csv(r'C:\Users\anoble\Git\Outside_Work\AFL Fantasy\Data\Inputs\fryziggafl_limited.csv')

# Aggregate 
avg_scores = joined_data.groupby(['player_team', 'player_first_name', 'player_last_name','player_name', 'age', 'year'])\
.agg({'afl_fantasy_score': 'mean'})\
.sort_values(by=['afl_fantasy_score'], ascending=False)\

pd.set_option('display.max_rows', 10)
# print(avg_scores)
avg_scores.query('age < 22').head(10)

  df = pd.read_csv(r'C:\Users\anoble\Git\Outside_Work\AFL Fantasy\Data\Inputs\fryziggafl.csv')


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,afl_fantasy_score
player_team,player_first_name,player_last_name,player_name,age,year,Unnamed: 6_level_1
Essendon,Zach,Merrett,"Merrett, Zach",21.0,2016,117.590909
Sydney,Errol,Gulden,"Gulden, Errol",21.0,2023,112.25
Gold Coast,Brayden,Fiorini,"Fiorini, Brayden",19.0,2016,111.5
Carlton,Sam,Walsh,"Walsh, Sam",21.0,2021,108.954545
Collingwood,Nick,Daicos,"Daicos, Nick",20.0,2023,108.409091
Melbourne,Clayton,Oliver,"Oliver, Clayton",21.0,2018,107.16
Western Bulldogs,Jack,Macrae,"Macrae, Jack",20.0,2014,104.380952
Western Bulldogs,Jack,Macrae,"Macrae, Jack",21.0,2015,103.857143
Collingwood,Steele,Sidebottom,"Sidebottom, Steele",21.0,2012,103.541667
Greater Western Sydney,Adam,Treloar,"Treloar, Adam",21.0,2014,103.45


# Data cleaning (Priyanshu). 
Regular season games only (exclude finals)
Exclude TOG <50%
Scale up 2020 numbers by 1.2x



In [167]:
# Identify the seasons played for each player. 
# Remove seasons where game played <=5 
# Rank seasons for each player - e.g. most recent season with >5 games = 1, then 2 and so on. 
# It will be different depending on player, for example 2021 for Zac Williams will be Season 2 whereas for Jack Steele thats season 3

# Games player per year. Filter to only where games >= 5, and remove 2023 as we have no response variable for recent scores
player_summ = joined_data.groupby(['player_name','year']).agg(matches=('match_date', 'count'),year_played=('year', 'mean'))\
    .query('matches >=5' )\

player_summ

Unnamed: 0_level_0,Unnamed: 1_level_0,matches,year_played
player_name,year,Unnamed: 2_level_1,Unnamed: 3_level_1
"Aarts, Jake",2021,21,2021.0
"Aarts, Jake",2022,7,2022.0
"Ablett, Gary",2010,24,2010.0
"Ablett, Gary",2011,20,2011.0
"Ablett, Gary",2012,20,2012.0
...,...,...,...
"van Berlo, Nathan",2015,20,2015.0
"van Rooyen, Jacob",2023,20,2023.0
"vandenBerg, Aaron",2015,14,2015.0
"vandenBerg, Aaron",2016,14,2016.0


# Seasons active factor
 

In [168]:
seasons_active = player_summ.groupby(['player_name']).agg(years_active=('year_played', 'count'))

seasons_active

Unnamed: 0_level_0,years_active
player_name,Unnamed: 1_level_1
"Aarts, Jake",2
"Ablett, Gary",9
"Acres, Blake",7
"Adams, Leigh",5
"Adams, Marcus",5
...,...
"de Boer, Matt",10
"van Berlo, Jay",2
"van Berlo, Nathan",5
"van Rooyen, Jacob",1


## In Season change factors

In [169]:
# ## Add scores during 1st half of season. Convert match round to numeric, return AFL score if valid, else return Null
joined_data['h1_fantasy_score'] = np.where(pd.to_numeric(joined_data['match_round'], errors='coerce') <= 10, joined_data['afl_fantasy_score'], np.NaN)
joined_data['h2_fantasy_score'] = np.where(pd.to_numeric(joined_data['match_round'], errors='coerce') > 10, joined_data['afl_fantasy_score'], np.NaN)


# Test
joined_data.agg({'afl_fantasy_score': 'mean','h1_fantasy_score': 'mean','h2_fantasy_score': 'mean'})

afl_fantasy_score    69.913854
h1_fantasy_score     69.993452
h2_fantasy_score     69.969046
dtype: float64

## In game stat factors

In [170]:
#Explode player data by years so that each year matches to every other active year
player_summ_exp = player_summ.merge(joined_data, on =['player_name'], how='inner')\
                

## Limit to games in the same year for 1yr factors, as well as 3 years up to the predicted season, and 5 years up to predicted season
player_summ_exp_1yr = player_summ_exp.query(('year_played == year'))
player_summ_exp_3yr = player_summ_exp.query(('year_played < year + 3')).query(('year_played >= year'))
player_summ_exp_5yr = player_summ_exp.query(('year_played < year + 5')).query(('year_played >= year'))

print(player_summ_exp.shape)
print(player_summ_exp_1yr.shape)
print(player_summ_exp_3yr.shape)
print(player_summ_exp_5yr.shape)

## Test matching works - should see 
pd.set_option('display.max_rows', 50)
player_summ_exp_3yr.groupby(['year_played','year']).agg(countall=('player_name', 'count')).head(50)

(728863, 92)
(107461, 92)
(239416, 92)
(318909, 92)


Unnamed: 0_level_0,Unnamed: 1_level_0,countall
year_played,year,Unnamed: 2_level_1
2010.0,2010,8055
2011.0,2010,7088
2011.0,2011,8467
2012.0,2010,6357
2012.0,2011,7377
2012.0,2012,8940
2013.0,2011,6844
2013.0,2012,7852
2013.0,2013,8955
2014.0,2012,6994


In [171]:
## Average factors over 1 year of game data
model_factors_agg_1yr = player_summ_exp_1yr.groupby(['player_name', 'year_played'])\
.agg({'player_height_cm': 'mean',
'player_weight_kg': 'mean',
'age': 'mean',
'kicks': 'mean',
'marks': 'mean',
'handballs': 'mean',
'handballs': 'mean',
'disposals': 'mean',
'effective_disposals': 'mean',
'disposal_efficiency_percentage': 'mean',
'goals': 'mean',
'behinds': 'mean',
'hitouts': 'mean',
'tackles': 'mean',
'rebounds': 'mean',
'inside_fifties': 'mean',
'clearances': 'mean',
'clangers': 'mean',
'free_kicks_for': 'mean',
'free_kicks_against': 'mean',
'brownlow_votes': 'mean',
'contested_possessions': 'mean',
'uncontested_possessions': 'mean',
'contested_marks': 'mean',
'marks_inside_fifty': 'mean',
'one_percenters': 'mean',
'bounces': 'mean',
'goal_assists': 'mean',
'time_on_ground_percentage': 'mean',
'centre_clearances': 'mean',
'stoppage_clearances': 'mean',
'score_involvements': 'mean',
'metres_gained': 'mean',
'turnovers': 'mean',
'intercepts': 'mean',
'tackles_inside_fifty': 'mean',
'contest_def_losses': 'mean',
'contest_def_one_on_ones': 'mean',
'contest_off_one_on_ones': 'mean',
'contest_off_wins': 'mean',
'def_half_pressure_acts': 'mean',
'effective_kicks': 'mean',
'f50_ground_ball_gets': 'mean',
'ground_ball_gets': 'mean',
'hitouts_to_advantage': 'mean',
'hitout_win_percentage': 'mean',
'intercept_marks': 'mean',
'marks_on_lead': 'mean',
'pressure_acts': 'mean',
'rating_points': 'mean',
'ruck_contests': 'mean',
'score_launches': 'mean',
'shots_at_goal': 'mean',
'spoils': 'mean',
'afl_fantasy_score': 'mean',
'h1_fantasy_score': 'mean',
'h2_fantasy_score': 'mean'
})

model_factors_agg_1yr.columns = [str(col) + '_1yr' for col in model_factors_agg_1yr.columns]

model_factors_agg_1yr

Unnamed: 0_level_0,Unnamed: 1_level_0,player_height_cm_1yr,player_weight_kg_1yr,age_1yr,kicks_1yr,marks_1yr,handballs_1yr,disposals_1yr,effective_disposals_1yr,disposal_efficiency_percentage_1yr,goals_1yr,...,marks_on_lead_1yr,pressure_acts_1yr,rating_points_1yr,ruck_contests_1yr,score_launches_1yr,shots_at_goal_1yr,spoils_1yr,afl_fantasy_score_1yr,h1_fantasy_score_1yr,h2_fantasy_score_1yr
player_name,year_played,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
"Aarts, Jake",2021.0,177.0,76.0,,5.809524,2.523810,4.761905,10.571429,8.000000,69.809524,0.857143,...,0.190476,13.000000,6.747619,0.000000,0.809524,1.523810,0.476190,47.428571,52.700000,42.636364
"Aarts, Jake",2022.0,177.0,76.0,,2.857143,0.857143,4.142857,7.000000,4.857143,67.000000,0.285714,...,0.000000,8.428571,4.414286,0.142857,0.857143,1.142857,0.142857,27.571429,22.800000,39.500000
"Ablett, Gary",2010.0,182.0,87.0,,14.083333,4.416667,17.416667,31.500000,,,1.833333,...,,,,,,,,116.916667,127.666667,112.583333
"Ablett, Gary",2011.0,182.0,87.0,,15.200000,2.250000,15.050000,30.250000,,,0.900000,...,,,,,,,,112.150000,101.000000,118.153846
"Ablett, Gary",2012.0,182.0,87.0,,19.450000,3.050000,14.300000,33.750000,23.789474,69.000000,1.300000,...,0.210526,18.789474,21.370000,0.105263,1.684211,2.526316,0.684211,124.700000,131.625000,120.083333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"van Berlo, Nathan",2015.0,,,,9.350000,3.950000,5.950000,15.300000,10.400000,68.350000,0.200000,...,0.150000,12.350000,7.385000,0.000000,0.850000,0.600000,0.950000,67.000000,71.100000,64.250000
"van Rooyen, Jacob",2023.0,193.0,96.0,,4.500000,3.100000,4.200000,8.700000,6.800000,78.800000,1.400000,...,0.550000,9.700000,7.295000,11.900000,0.800000,2.000000,1.450000,50.900000,49.000000,53.545455
"vandenBerg, Aaron",2015.0,188.0,94.0,,10.214286,3.142857,6.571429,16.785714,9.785714,56.000000,0.357143,...,0.428571,15.285714,8.685714,0.000000,0.857143,1.500000,0.428571,73.928571,74.900000,71.500000
"vandenBerg, Aaron",2016.0,188.0,94.0,,5.714286,3.071429,8.928571,14.642857,9.785714,66.785714,0.857143,...,0.642857,15.071429,8.864286,0.071429,0.357143,1.714286,0.642857,61.142857,53.333333,63.272727


In [172]:
# Build 3 Year Player Metrics
model_factors_agg_3yr = player_summ_exp_3yr.groupby(['player_name', 'year_played'])\
.agg({'kicks': 'mean',
'marks': 'mean',
'handballs': 'mean',
'handballs': 'mean',
'disposals': 'mean',
'effective_disposals': 'mean',
'disposal_efficiency_percentage': 'mean',
'goals': 'mean',
'behinds': 'mean',
'hitouts': 'mean',
'tackles': 'mean',
'rebounds': 'mean',
'inside_fifties': 'mean',
'clearances': 'mean',
'clangers': 'mean',
'free_kicks_for': 'mean',
'free_kicks_against': 'mean',
'brownlow_votes': 'mean',
'contested_possessions': 'mean',
'uncontested_possessions': 'mean',
'contested_marks': 'mean',
'marks_inside_fifty': 'mean',
'one_percenters': 'mean',
'bounces': 'mean',
'goal_assists': 'mean',
'time_on_ground_percentage': 'mean',
'centre_clearances': 'mean',
'stoppage_clearances': 'mean',
'score_involvements': 'mean',
'metres_gained': 'mean',
'turnovers': 'mean',
'intercepts': 'mean',
'tackles_inside_fifty': 'mean',
'contest_def_losses': 'mean',
'contest_def_one_on_ones': 'mean',
'contest_off_one_on_ones': 'mean',
'contest_off_wins': 'mean',
'def_half_pressure_acts': 'mean',
'effective_kicks': 'mean',
'f50_ground_ball_gets': 'mean',
'ground_ball_gets': 'mean',
'hitouts_to_advantage': 'mean',
'hitout_win_percentage': 'mean',
'intercept_marks': 'mean',
'marks_on_lead': 'mean',
'pressure_acts': 'mean',
'rating_points': 'mean',
'ruck_contests': 'mean',
'score_launches': 'mean',
'shots_at_goal': 'mean',
'spoils': 'mean',
'afl_fantasy_score': 'mean',
'h1_fantasy_score': 'mean',
'h2_fantasy_score': 'mean'
})

model_factors_agg_3yr.columns = [str(col) + '_3yr' for col in model_factors_agg_3yr.columns]

model_factors_agg_3yr

Unnamed: 0_level_0,Unnamed: 1_level_0,kicks_3yr,marks_3yr,handballs_3yr,disposals_3yr,effective_disposals_3yr,disposal_efficiency_percentage_3yr,goals_3yr,behinds_3yr,hitouts_3yr,tackles_3yr,...,marks_on_lead_3yr,pressure_acts_3yr,rating_points_3yr,ruck_contests_3yr,score_launches_3yr,shots_at_goal_3yr,spoils_3yr,afl_fantasy_score_3yr,h1_fantasy_score_3yr,h2_fantasy_score_3yr
player_name,year_played,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
"Aarts, Jake",2021.0,5.809524,2.523810,4.761905,10.571429,8.000000,69.809524,0.857143,0.571429,0.000000,2.523810,...,0.190476,13.000000,6.747619,0.000000,0.809524,1.523810,0.476190,47.428571,52.700000,42.636364
"Aarts, Jake",2022.0,5.071429,2.107143,4.607143,9.678571,7.214286,69.107143,0.714286,0.500000,0.000000,2.392857,...,0.142857,11.857143,6.164286,0.035714,0.821429,1.428571,0.392857,42.464286,42.733333,42.153846
"Ablett, Gary",2010.0,14.083333,4.416667,17.416667,31.500000,,,1.833333,1.083333,0.000000,4.208333,...,,,,,,,,116.916667,127.666667,112.583333
"Ablett, Gary",2011.0,14.590909,3.431818,16.340909,30.931818,,,1.409091,0.977273,0.000000,5.000000,...,,,,,,,,114.750000,116.000000,115.480000
"Ablett, Gary",2012.0,16.109375,3.312500,15.703125,31.812500,23.789474,69.000000,1.375000,0.968750,0.000000,5.156250,...,0.210526,18.789474,21.370000,0.105263,1.684211,2.526316,0.684211,117.859375,121.208333,116.972973
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"van Berlo, Nathan",2015.0,10.153846,3.794872,5.974359,16.128205,10.461538,65.282051,0.307692,0.384615,0.076923,3.487179,...,0.282051,13.051282,7.656410,0.000000,0.846154,0.820513,0.871795,67.717949,69.764706,67.000000
"van Rooyen, Jacob",2023.0,4.500000,3.100000,4.200000,8.700000,6.800000,78.800000,1.400000,0.450000,3.700000,2.150000,...,0.550000,9.700000,7.295000,11.900000,0.800000,2.000000,1.450000,50.900000,49.000000,53.545455
"vandenBerg, Aaron",2015.0,10.214286,3.142857,6.571429,16.785714,9.785714,56.000000,0.357143,0.857143,0.071429,5.000000,...,0.428571,15.285714,8.685714,0.000000,0.857143,1.500000,0.428571,73.928571,74.900000,71.500000
"vandenBerg, Aaron",2016.0,7.964286,3.107143,7.750000,15.714286,9.785714,61.392857,0.607143,0.750000,0.071429,4.285714,...,0.535714,15.178571,8.775000,0.035714,0.607143,1.607143,0.535714,67.535714,69.923077,65.466667


In [173]:
# Build 5 Year Player Metrics

model_factors_agg_5yr = player_summ_exp_5yr.groupby(['player_name', 'year_played'])\
.agg({'kicks': 'mean',
'marks': 'mean',
'handballs': 'mean',
'handballs': 'mean',
'disposals': 'mean',
'effective_disposals': 'mean',
'disposal_efficiency_percentage': 'mean',
'goals': 'mean',
'behinds': 'mean',
'hitouts': 'mean',
'tackles': 'mean',
'rebounds': 'mean',
'inside_fifties': 'mean',
'clearances': 'mean',
'clangers': 'mean',
'free_kicks_for': 'mean',
'free_kicks_against': 'mean',
'brownlow_votes': 'mean',
'contested_possessions': 'mean',
'uncontested_possessions': 'mean',
'contested_marks': 'mean',
'marks_inside_fifty': 'mean',
'one_percenters': 'mean',
'bounces': 'mean',
'goal_assists': 'mean',
'time_on_ground_percentage': 'mean',
'centre_clearances': 'mean',
'stoppage_clearances': 'mean',
'score_involvements': 'mean',
'metres_gained': 'mean',
'turnovers': 'mean',
'intercepts': 'mean',
'tackles_inside_fifty': 'mean',
'contest_def_losses': 'mean',
'contest_def_one_on_ones': 'mean',
'contest_off_one_on_ones': 'mean',
'contest_off_wins': 'mean',
'def_half_pressure_acts': 'mean',
'effective_kicks': 'mean',
'f50_ground_ball_gets': 'mean',
'ground_ball_gets': 'mean',
'hitouts_to_advantage': 'mean',
'hitout_win_percentage': 'mean',
'intercept_marks': 'mean',
'marks_on_lead': 'mean',
'pressure_acts': 'mean',
'rating_points': 'mean',
'ruck_contests': 'mean',
'score_launches': 'mean',
'shots_at_goal': 'mean',
'spoils': 'mean',
'afl_fantasy_score': 'mean',
'h1_fantasy_score': 'mean',
'h2_fantasy_score': 'mean'
})

model_factors_agg_5yr.columns = [str(col) + '_5yr' for col in model_factors_agg_5yr.columns]

model_factors_agg_5yr

Unnamed: 0_level_0,Unnamed: 1_level_0,kicks_5yr,marks_5yr,handballs_5yr,disposals_5yr,effective_disposals_5yr,disposal_efficiency_percentage_5yr,goals_5yr,behinds_5yr,hitouts_5yr,tackles_5yr,...,marks_on_lead_5yr,pressure_acts_5yr,rating_points_5yr,ruck_contests_5yr,score_launches_5yr,shots_at_goal_5yr,spoils_5yr,afl_fantasy_score_5yr,h1_fantasy_score_5yr,h2_fantasy_score_5yr
player_name,year_played,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
"Aarts, Jake",2021.0,5.809524,2.523810,4.761905,10.571429,8.000000,69.809524,0.857143,0.571429,0.000000,2.523810,...,0.190476,13.000000,6.747619,0.000000,0.809524,1.523810,0.476190,47.428571,52.700000,42.636364
"Aarts, Jake",2022.0,5.071429,2.107143,4.607143,9.678571,7.214286,69.107143,0.714286,0.500000,0.000000,2.392857,...,0.142857,11.857143,6.164286,0.035714,0.821429,1.428571,0.392857,42.464286,42.733333,42.153846
"Ablett, Gary",2010.0,14.083333,4.416667,17.416667,31.500000,,,1.833333,1.083333,0.000000,4.208333,...,,,,,,,,116.916667,127.666667,112.583333
"Ablett, Gary",2011.0,14.590909,3.431818,16.340909,30.931818,,,1.409091,0.977273,0.000000,5.000000,...,,,,,,,,114.750000,116.000000,115.480000
"Ablett, Gary",2012.0,16.109375,3.312500,15.703125,31.812500,23.789474,69.000000,1.375000,0.968750,0.000000,5.156250,...,0.210526,18.789474,21.370000,0.105263,1.684211,2.526316,0.684211,117.859375,121.208333,116.972973
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"van Berlo, Nathan",2015.0,11.290698,4.406977,7.337209,18.627907,11.063492,66.687500,0.430233,0.441860,0.186047,3.930233,...,0.238095,14.174603,8.525000,0.000000,0.873016,0.904762,0.793651,78.593023,82.944444,76.111111
"van Rooyen, Jacob",2023.0,4.500000,3.100000,4.200000,8.700000,6.800000,78.800000,1.400000,0.450000,3.700000,2.150000,...,0.550000,9.700000,7.295000,11.900000,0.800000,2.000000,1.450000,50.900000,49.000000,53.545455
"vandenBerg, Aaron",2015.0,10.214286,3.142857,6.571429,16.785714,9.785714,56.000000,0.357143,0.857143,0.071429,5.000000,...,0.428571,15.285714,8.685714,0.000000,0.857143,1.500000,0.428571,73.928571,74.900000,71.500000
"vandenBerg, Aaron",2016.0,7.964286,3.107143,7.750000,15.714286,9.785714,61.392857,0.607143,0.750000,0.071429,4.285714,...,0.535714,15.178571,8.775000,0.035714,0.607143,1.607143,0.535714,67.535714,69.923077,65.466667


In [174]:
# Most common position for the year
# Calculate most common position played over past year
# Consider grouping if needed to remove specificity (e.g. Left and Right indicators probably not useful)


# Games player per year. Filter to only where games >= 5
# position_mapp = joined_data.groupby(['player_name','year']).agg({'match_date': 'count'})\
#     .query('match_date >=5')

position_data = joined_data

position_data['position_grouped'] = np.select(
    [
     position_data['player_position'].isin(["FB"]), 
     position_data['player_position'].isin(["BPL", "BPR"]),
     position_data['player_position'].isin(["CHB"]), 
     position_data['player_position'].isin(["HBFL", "HBFR"]),
     position_data['player_position'].isin(["RK"]), 
     position_data['player_position'].isin(["C", "RR" ,"R"]),
     position_data['player_position'].isin(["WL", "WR"]), 
     position_data['player_position'].isin(["FF"]),
     position_data['player_position'].isin(["FPL", "FPR"]),
     position_data['player_position'].isin(["HFFL", "HFFR"]),
     position_data['player_position'].isin(["CHF"]),
     position_data['player_position'].isin(["INT", "SUB"])
    ],
    [
      'FB',
      'BP',
      'CHB',
      'HBF',
      'RUCK',
      'C',
      'W',
      'FF',
      'FP',
      'HFF',
      'CHF',
      'Ignore'  
    ], 
    default='Other'
     )

## Test no positions have not been allocated. Result = 0 rows
# joined_data.query('position_grouped in ["Other"]')\
#     .groupby(['player_position']).agg(matches=('match_date', 'count'))

position_data['matches'] = position_data.groupby(['player_name', 'year','position_grouped'])['match_id'].transform('count')

# Find most common position, ignoring sub and interchange scores
position_data['rank'] = position_data.query('position_grouped not in ["Ignore"]').groupby(['player_name', 'year'])['matches'].rank(method="first", ascending=False)

player_pos_yearly = position_data.query('rank == 1')[['player_name', 'year','position_grouped']]

# Test output
player_pos_yearly.query('player_name == "Himmelberg, Harry"')

Unnamed: 0,player_name,year,position_grouped
92293,"Himmelberg, Harry",2016,FP
92298,"Himmelberg, Harry",2017,FP
92308,"Himmelberg, Harry",2018,FF
92344,"Himmelberg, Harry",2021,CHF
92356,"Himmelberg, Harry",2022,CHF
92390,"Himmelberg, Harry",2023,CHB


## Response variable

In [175]:
# Average AFL fantasy score per season
response_var = joined_data.groupby(['player_name','year']).agg(matches=('match_date', 'count'),year_int=('year', 'mean'),avg_score=('afl_fantasy_score', 'mean'))\
    .query('matches >= 5').query('avg_score > 0')\
    .sort_values(by=['avg_score'], ascending=False)

# Subtract 1 year from AFL score so it matches to factors from last year
response_var['predicted_year'] = response_var['year_int'] - 1

response_var  

Unnamed: 0_level_0,Unnamed: 1_level_0,matches,year_int,avg_score,predicted_year
player_name,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Rockliff, Tom",2014,18,2014.0,134.777778,2013.0
"Swan, Dane",2012,21,2012.0,130.666667,2011.0
"Mitchell, Tom",2018,24,2018.0,128.166667,2017.0
"Mitchell, Tom",2017,22,2017.0,127.181818,2016.0
"Ablett, Gary",2012,20,2012.0,124.700000,2011.0
...,...,...,...,...,...
"Wanganeen, Tex",2022,5,2022.0,17.600000,2021.0
"Berry, Thomas",2021,11,2021.0,16.909091,2020.0
"Morris, Josh",2021,5,2021.0,8.600000,2020.0
"Davis, Ben",2022,5,2022.0,8.400000,2021.0


## Join all factors together


In [176]:
## Player / year combos to include
factors = player_summ.merge(seasons_active, left_on=['player_name'], right_on=['player_name'], how='inner')\
           .merge(player_pos_yearly, left_on=['player_name', 'year_played'], right_on=['player_name', 'year'], how='inner')\
           .merge(model_factors_agg_1yr, left_on=['player_name', 'year_played'], right_on=['player_name', 'year_played'], how='inner')\
           .merge(model_factors_agg_3yr, left_on=['player_name', 'year_played'], right_on=['player_name', 'year_played'], how='inner')\
           .merge(model_factors_agg_5yr, left_on=['player_name', 'year_played'], right_on=['player_name', 'year_played'], how='inner')\
           .merge(response_var, left_on=['player_name', 'year_played'], right_on=['player_name', 'predicted_year'], how='left')\


## Add delta factors. (Lazy method for now, just using the 1yr 3yr and 5yr averages)

factors['fantasy_score_yearly_change'] = factors['afl_fantasy_score_1yr'] - factors['afl_fantasy_score_3yr']
factors['fantasy_score_inyear_change'] = factors['h2_fantasy_score_1yr'] - factors['h1_fantasy_score_1yr']



print(factors.shape)
print(factors.head(10))
factors.to_csv(r'C:\Users\anoble\Git\Outside_Work\AFL Fantasy\Data\Inputs\factors.csv',mode='w+')

(6434, 174)
    player_name  matches_x  year_played  years_active  year position_grouped  \
0   Aarts, Jake         21       2021.0             2  2021              HFF   
1   Aarts, Jake          7       2022.0             2  2022              HFF   
2  Ablett, Gary         24       2010.0             9  2010                C   
3  Ablett, Gary         20       2011.0             9  2011                C   
4  Ablett, Gary         20       2012.0             9  2012                C   
5  Ablett, Gary         21       2013.0             9  2013                C   
6  Ablett, Gary         15       2014.0             9  2014                C   
7  Ablett, Gary          6       2015.0             9  2015                C   
8  Ablett, Gary         14       2016.0             9  2016                C   
9  Ablett, Gary         14       2017.0             9  2017                C   

   player_height_cm_1yr  player_weight_kg_1yr  age_1yr  kicks_1yr  ...  \
0                 177.0          

In [179]:
factors[['afl_fantasy_score_1yr','afl_fantasy_score_3yr','fantasy_score_yearly_change', 'fantasy_score_inyear_change']]

Unnamed: 0,afl_fantasy_score_1yr,afl_fantasy_score_3yr,fantasy_score_yearly_change,fantasy_score_inyear_change
0,47.428571,47.428571,0.000000,-10.063636
1,27.571429,42.464286,-14.892857,16.700000
2,116.916667,116.916667,0.000000,-15.083333
3,112.150000,114.750000,-2.600000,17.153846
4,124.700000,117.859375,6.840625,-11.541667
...,...,...,...,...
6429,67.000000,67.717949,-0.717949,-6.850000
6430,50.900000,50.900000,0.000000,4.545455
6431,73.928571,73.928571,0.000000,-3.400000
6432,61.142857,67.535714,-6.392857,9.939394
