# AFL Functions

In [1]:
#packages
import polars as pl
import pickle
import numpy as np
import pandas as pd
import xlsx2csv
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
#set up R extension
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects.lib.dplyr import DataFrame
from rpy2.robjects import pandas2ri
from rpy2.robjects import rl
import rpy2.robjects as ro
#import rpy2_arrow.polars as rpy2polars

pl.Config.set_fmt_str_lengths(150);
pl.Config.set_tbl_cols(40);
pl.Config.set_tbl_rows(50);



## Function to Get Fitzroy Data

In [2]:
def replaceTeamNames(data, col):
    return (data.with_columns(pl.col(col).replace(old=['GWS GIANTS',
                                                       'GWS',
                                                        'Gold Coast SUNS',
                                                        'Gold Coast',
                                                        'Narrm',
                                                        'Yartapuulti',
                                                        'Walyalup',
                                                        'Euro-Yroke',
                                                        'Waalitj Marawar',
                                                        'West Coast',
                                                        'Kuwarna',
                                                        'Adelaide',
                                                        'Footscray',
                                                        'Sydney',
                                                        'Geelong'],
                                                new=['GWS Giants',
                                                     'GWS Giants',
                                                    'Gold Coast Suns',
                                                    'Gold Coast Suns',
                                                    'Melbourne',
                                                    'Port Adelaide',
                                                    'Fremantle',
                                                    'St Kilda',
                                                    'West Coast Eagles',
                                                    'West Coast Eagles',
                                                    'Adelaide Crows',
                                                    'Adelaide Crows',
                                                    'Western Bulldogs',
                                                    'Sydney Swans',
                                                    'Geelong Cats'])))

In [3]:
def getCurrentRoundNumbers():

    #set r functions
    r_fitzroy= importr('fitzRoy')
    r_dyplr = importr('dplyr')

    #get current round
    currentRound = DataFrame(r_fitzroy.fetch_fixture()).select('compSeason.currentRoundNumber').distinct()[0][0]
    
    return currentRound


In [4]:
def getSeason():
    return datetime.today().year


In [5]:
def getPastRoundNumber():
    #set r functions
    r_fitzroy= importr('fitzRoy')
    r_dyplr = importr('dplyr')

    if getCurrentRoundNumbers() == 0:

        past = (pl.from_pandas(pandas2ri.rpy2py_dataframe(DataFrame(r_fitzroy.fetch_results_afl(season=getSeason() - 1))
                                                        .select( 'round.roundNumber', 'round.year')))
                    .filter(pl.col('round.roundNumber') == pl.col('round.roundNumber').max())
            )
    #If the results are empty i.e. It is the first round of a new Season
    else:
        past = (pl.from_pandas(pandas2ri.rpy2py_dataframe(DataFrame(r_fitzroy.fetch_results_afl(season=getSeason()))
                                                        .select( 'round.roundNumber', 'round.year')))
                #.filter(pl.col('round.year') == pl.col('round.year').max())
                .filter(pl.col('round.roundNumber') == pl.col('round.roundNumber').max())
        )
    pastRound = past.select('round.roundNumber').unique().item()
    pastSeason = past.select(pl.col('round.year').cast(pl.Int64)).unique().item()

    return pastRound, pastSeason

In [6]:
def getResults():
    pastRound, pastSeason = getPastRoundNumber()

    #set r functions
    r_fitzroy= importr('fitzRoy')
    r_dyplr = importr('dplyr')
    
    #Get last rounds results
    results = (pl.from_pandas(pandas2ri.rpy2py_dataframe(DataFrame(r_fitzroy.fetch_results_afl(season=pastSeason, round=pastRound)).select('match.date', 'round.roundNumber', 'venue.name', 'match.homeTeam.name', 'match.awayTeam.name',
                                                                'round.year', 'homeTeamScore.matchScore.totalScore', 'homeTeamScore.matchScore.behinds',
                                                                'homeTeamScore.matchScore.goals', 'awayTeamScore.matchScore.totalScore', 'awayTeamScore.matchScore.behinds',
                                                                'awayTeamScore.matchScore.goals').mutate(matchDate=rl("as.character(match.date)"))))
                .with_columns(pl.col('matchDate').str.to_datetime().dt.date().alias('match.date'))
                .drop('matchDate'))
    
    #Do some work on the results
    #check all the names
    results = replaceTeamNames(results, 'match.homeTeam.name')
    results = replaceTeamNames(results, 'match.awayTeam.name')

    indexTeams = getTeamIndex()

    #Clean & prep the dataframe
    results = (results
                .with_columns(pl.col('match.date').dt.date())
                .rename({'match.date' : 'Date',
                      'round.year' : 'compSeason.year',
                      'homeTeamScore.matchScore.totalScore' : 'home.team.totalScore',
                      'homeTeamScore.matchScore.behinds' : 'home.team.behinds',
                      'homeTeamScore.matchScore.goals' : 'home.team.goals',
                      'awayTeamScore.matchScore.totalScore' : 'away.team.totalScore',
                      'awayTeamScore.matchScore.behinds' : 'away.team.behinds',
                      'awayTeamScore.matchScore.goals' : 'away.team.goals',
                      'match.homeTeam.name' : 'home.team.name',
                      'match.awayTeam.name' : 'away.team.name'})
                #cast to int64
                .with_columns(pl.col('compSeason.year').cast(pl.Int64),
                           pl.col('round.roundNumber').cast(pl.Int64),
                           pl.col('home.team.totalScore').cast(pl.Int64),
                           pl.col('home.team.behinds').cast(pl.Int64),
                           pl.col('home.team.goals').cast(pl.Int64),
                           pl.col('away.team.totalScore').cast(pl.Int64),
                           pl.col('away.team.behinds').cast(pl.Int64),
                           pl.col('away.team.goals').cast(pl.Int64))
                 #Add the indexs
                .join(indexTeams, how='left', left_on='home.team.name', right_on='team.A.name', coalesce=True)
                .drop('team.A.club.id')
                .rename({'EloIndex' : 'home.team.id'})
                .join(indexTeams, how='left', left_on='away.team.name', right_on='team.A.name', coalesce=True)
                .rename({'EloIndex' : 'away.team.id'})
            )
        
    
    return results


In [7]:
def getNextFixture():
    #set r functions
    r_fitzroy= importr('fitzRoy')
    r_dyplr = importr('dplyr')
    r_tidyr = importr('tidyr')

    #Get straight fixture
    fixture = r_fitzroy.fetch_fixture()
    fixture = (pl.from_pandas(pandas2ri.rpy2py_dataframe(DataFrame(fixture).select(rl('-round.byes')).select(rl('-metadata.ticket_link'))))
                    #Get only the most upcoming round
                    .with_columns(pl.col('utcStartTime').str.to_datetime().dt.date().alias('match.date'))
                    #get start date of each round
                    .with_columns(pl.col('match.date').min().over('round.id'))
                    #remove any fixtures that have already been
                    .filter(pl.col('match.date') >= datetime.today())
                    #find the closest next start date
                    .with_columns(pl.col('match.date').min().alias('start.next.round'))
                    #only keep the next round
                    .filter(pl.col('match.date') == pl.col('start.next.round'))
                    #drop the columns
                    .drop('match.date', 'start.next.round')
                )
    
    currentRound = fixture.select('round.roundNumber').unique().item()
    pastRound, pastSeason = getPastRoundNumber()

    #Get the bye
    bye = pl.from_pandas(pandas2ri.rpy2py_dataframe(r_tidyr.unnest(DataFrame(r_fitzroy.fetch_fixture(season=pastSeason, round_number=pastRound)).select(rl('round.byes')).distinct())))
    
    return fixture, bye

In [8]:
def getLadder():
    pastRound, pastSeason = getPastRoundNumber()
    season = getSeason()

    #set r functions
    r_fitzroy= importr('fitzRoy')
    r_dyplr = importr('dplyr')

    if getCurrentRoundNumbers() != 0:
        ladder = r_fitzroy.fetch_ladder_afl(season=season, round_number = pastRound)
    else:
        #Last round before finals
        ladder = r_fitzroy.fetch_ladder_afl(season=pastSeason, round_number=24)

    #Only keep the relevent info and move to polars
    ladder = pl.from_pandas(pandas2ri.rpy2py_dataframe(DataFrame(ladder).select('team.name', 'position')))

    return ladder

In [35]:
def getPlayerRanking():
    #Set up season list
    season = getSeason()
    currentRound = getCurrentRoundNumbers()
     #Don't try and get player ratings from a season that hasn't started yet
    if currentRound == 0:
        seasonList = [season-1]
    else:
        seasonList = [season-1, season]
    first = True

    #set r functions
    r_fitzroy= importr('fitzRoy')
    r_dyplr = importr('dplyr')

    #loop for joining the seasons
    for s in seasonList:

        playerRanking = pl.from_pandas(pandas2ri.rpy2py_dataframe(DataFrame(r_fitzroy.fetch_player_stats(season=s))
                            .mutate(compSeasonYear = s)
                            .select('compSeason.shortName',
                                    'compSeasonYear',
                                    'round.roundNumber',
                                    'teamId',
                                    'team.name',
                                    'player.playerId',
                                    'player.givenName',
                                    'player.surname',
                                    'ratingPoints')))
        
        if first:
            print('Hi')
            players = playerRanking
            first = False
        else:
            players = players.vstack(playerRanking)

    return players.rename({'compSeasonYear':'compSeason.year'})


In [None]:
getPlayerRanking()

i Returning data for "All Rounds, 2025"
v Returning data for "All Rounds, 2025" [324ms]

i Fetching match ids
v Fetching match ids [391ms]

i Finding player stats for 216 matches.


## Functions to get/update data

In [35]:
def getTeamIndex():
    return replaceTeamNames(pl.read_excel('Data/indexTeams.xlsx'), 'team.A.name')

In [36]:
def getDistance():
    return replaceTeamNames(pl.read_excel('Data/distanceTravelled.xlsx'), 'team.name')

In [37]:
def getElo():
    with open("Data/eloScores", "rb") as fp:   # Unpickling
        eloScores = pickle.load(fp)
    return eloScores

In [38]:
def getMatchDays():
    return replaceTeamNames(pl.read_excel('Data/matchDay.xlsx'), 'Team')

In [39]:
def getMatchesBothSides():
    v1 = replaceTeamNames(pl.read_excel('Data/matchesBothSides.xlsx'), 'team.A.name')
    final = replaceTeamNames(v1, 'team.B.name')
    return final

In [40]:
def updateElo(results_df, updateType):

    #create a function to find a teams prediction
    def eloTeamPrediction(ratingTeamA, ratingTeamB):
        exp = (-1*(ratingTeamA - ratingTeamB))/400
        b = 1 + 10 ** exp
        result = 1/b
        return result
    
    #create a function to find the teams result, inital B = 0.004 per Multifactorial analysis of factors influencing elite australian football match outcomes: a machine learning approach
    def eloTeamResult(scoreTeamA, scoreTeamB):
        exp = -0.04*(scoreTeamA - scoreTeamB)
        b = 1 + np.exp(exp)
        result = 1/b
        return result
    
    #Create a function to get change in teams elo, set K=67.559 per Multifactorial analysis of factors influencing elite australian football match outcomes: a machine learning approach
    def eloChange(scoreTeamA, scoreTeamB, ratingTeamA, ratingTeamB):
        #get Team prediction
        prediction = eloTeamPrediction(ratingTeamA, ratingTeamB)    
        #get actual result
        actual = eloTeamResult(scoreTeamA , scoreTeamB)    
        #get elo change
        result = 20*(actual - prediction)    
        #get new elo
        newElo = ratingTeamA + result
        return newElo
    
    def eloNewSeason(lastElo):
        carryOver = 0.80
        result = carryOver * lastElo + 1500 * (1 - carryOver)
        return result
    
    #Update the elos
    def eloNumber(indexA, indexB, scoreA, scoreB, updateType):

        #Get current Elo Scores
        elo = getElo()

        #Update elo score based on place in season
        if updateType == 'New Season':
            eloA = eloNewSeason(eloChange(scoreA, scoreB, elo[indexA], elo[indexB]))
            eloB = eloNewSeason(eloChange(scoreB, scoreA, elo[indexB], elo[indexA]) )
        else:
           #Get the new elo score for both teams
            eloA = eloChange(scoreA, scoreB, elo[indexA], elo[indexB])
            eloB = eloChange(scoreB, scoreA, elo[indexB], elo[indexA]) 

        
        #Update list of list to reflect the new score
        elo[indexA] = eloA
        elo[indexB] = eloB

        #Write New Elo
        with open("DataeloScores", "wb") as fp:
            pickle.dump(elo, fp)
    
    #update based on past round
    results_df.with_columns(pl.struct(['home.team.id', 'away.team.id', 'home.team.totalScore', 'away.team.totalScore'])
                           .map_elements(lambda x: eloNumber(x['home.team.id'], x['away.team.id'], x['home.team.totalScore'], x['away.team.totalScore'])))
        


In [41]:
def updateMatchDays(results_df):
    matchDays = getMatchDays()
    
    matchDays = (matchDays
                    .join(results_df.unpivot(index='Date', on=['home.team.name', 'away.team.name'], value_name='team.name').select('Date', 'team.name'),
                          how='left',
                          left_on='Team',
                          right_on='team.name',
                          coalesce=True)
                     .with_columns(pl.when(pl.col('Date_right').is_null()).then(pl.col('Date')).otherwise(pl.col('Date_right')).alias('Date'))
                     .drop('Date_right')
                )
       
    #save updated version
    matchDays.write_excel('Data/matchDay.xlsx')

In [42]:
def updateMatchBothSides(results_df):
    matchesBothSides = getMatchesBothSides()

    matchesBothSides = (matchesBothSides
                            .drop('round.Id')
                            .vstack((results_df
                                .select('compSeason.year', 'round.roundNumber', 'venue.name', 'home.team.id', 'home.team.name', 'home.team.totalScore',
                                 'home.team.goals', 'home.team.behinds', 'away.team.id', 'away.team.name', 'away.team.totalScore', 'away.team.goals', 'away.team.behinds')
                                .rename({'home.team.name': 'team.A.name',
                                  'home.team.id' : 'team.A.club.id',
                                  'home.team.totalScore' : 'team.A.score.totalScore',
                                  'home.team.goals' : 'team.A.score.goals',
                                  'home.team.behinds' : 'team.A.score.behinds',
                                  'away.team.name': 'team.B.name',
                                  'away.team.id' : 'team.B.club.id',
                                  'away.team.totalScore' : 'team.B.score.totalScore',
                                  'away.team.goals' : 'team.B.score.goals',
                                  'away.team.behinds' : 'team.B.score.behinds'})
                                .vstack(results_df
                                    .select('compSeason.year', 'round.roundNumber', 'venue.name', 'away.team.id', 'away.team.name', 'away.team.totalScore',
                                            'away.team.goals', 'away.team.behinds', 'home.team.id', 'home.team.name', 'home.team.totalScore', 'home.team.goals', 'home.team.behinds')
                                    .rename({'home.team.name': 'team.B.name',
                                              'home.team.id' : 'team.B.club.id',
                                              'home.team.totalScore' : 'team.B.score.totalScore',
                                              'home.team.goals' : 'team.B.score.goals',
                                              'home.team.behinds' : 'team.B.score.behinds',
                                              'away.team.name': 'team.A.name',
                                              'away.team.id' : 'team.A.club.id',
                                              'away.team.totalScore' : 'team.A.score.totalScore',
                                              'away.team.goals' : 'team.A.score.goals',
                                              'away.team.behinds' : 'team.A.score.behinds'}))
                                        ))
                            .sort('compSeason.year', 'round.roundNumber')   
                        )
    #sort out the round id
    roundId = (matchesBothSides
             .select('compSeason.year', 'round.roundNumber')
             .unique()
             .sort('compSeason.year', 'round.roundNumber')
             .with_columns(pl.col('round.roundNumber').cum_count().alias('round.Id'))
          )
    #Add in the roundId
    matchesBothSides = (matchesBothSides
                      #.drop('round.Id') #remove for round 0
                      .join(roundId, how='left', on=['compSeason.year', 'round.roundNumber'], coalesce=True))
    #save updated version
    matchesBothSides.write_excel('Data/matchesBothSides.xlsx')



## Functions That Are Checks

In [43]:
def checkUpdateStatus():

    mostRecentUpdate = (getMatchesBothSides()
        .filter(pl.col('compSeason.year') == pl.col('compSeason.year').max())
        .filter(pl.col('round.roundNumber') == pl.col('round.roundNumber').max())
        .select('compSeason.year', 'round.roundNumber')
        .unique()
        )
    
    mostRecentUpdateSize = (getMatchesBothSides()
        .filter(pl.col('compSeason.year') == pl.col('compSeason.year').max())
        .filter(pl.col('round.roundNumber') == pl.col('round.roundNumber').max())
        .height)
    
    past = getPastRoundNumber()[0]

    #Check if new season
    if mostRecentUpdateSize == 4:
        updateType = 'New Season'
    elif mostRecentUpdate.select('round.roundNumber').item() != past:
        updateType = 'New Round'
    else:
        updateType = 'No Update'

    return updateType


In [44]:
def checkTeamNames(results_df):

    nullDF = results_df.filter(pl.col('home.team.totalScore').is_null() | pl.col('away.team.totalScore').is_null())

    if not nullDF.is_empty():
        print('There are at least {nullDF.height} name issues: Please update team replace name fucntion')
        print('Team Names that are not matching:')
        print(nullDF.unpivot(index='Date', on=['home.team.name', 'away.team.name']).select('value'))
        print('Team Names should be:')
        print(getTeamIndex().select('team.A.name'))
        #exit the program
        SystemExit()

## Update Function

In [45]:
def updateData():
    #Complete check for update type
    updateType = checkUpdateStatus()

    if updateType != 'No Update':
        #get latest results
        results = getResults()

        #check team names
        checkTeamNames()

        #Update
        updateElo(results, updateType)
        updateMatchDays(results)
        updateMatchBothSides(results)

        print('Data Updated for New Round')
    else:
        print('No update Required')