# AFL Modelling
Predicting the winner of games, with the probability of winning and the margin of win. <br>
Using data from 2015 till 2023 with 2023 to be used as testing data.

In [1]:
#import packages
import polars as pl
import numpy as np
pl.Config.set_fmt_str_lengths(150);
pl.Config.set_tbl_rows(1000);

In [2]:
#set up R extension
%load_ext rpy2.ipython



Get the fixtures from 2015-2023, taking into account the bye infomation 2019 onwards. 

In [3]:
%%R -o seasons,seasonBye
library('fitzRoy')
library('dplyr')
library('tidyr')

seasonList <- c(2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023)
first <- TRUE
for(s in seasonList) {
    
    df <- fetch_fixture(season = s, comp = "AFLM")

    if (first) {
        seasons <- df
        first <- FALSE
    }else{
        seasons <- seasons %>% 
                      bind_rows(df)
    }
    
}

seasonBye <- seasons %>% 
                select(compSeason.id, compSeason.year, round.roundNumber, round.byes) %>%
                filter(compSeason.year >= 2019) %>%
                unnest(round.byes) %>%
                distinct() %>%
                mutate(round.nextRound = round.roundNumber + 1,
                      ByeFlag = 1) %>%
                select(compSeason.id, round.nextRound, club.id, club.name, ByeFlag)

seasons <- seasons %>% select(-round.byes)
    

i Returning data for "All Rounds, 2015"
v Returning data for "All Rounds, 2015" ... done

i Returning data for "All Rounds, 2016"
v Returning data for "All Rounds, 2016" ... done

i Returning data for "All Rounds, 2017"
v Returning data for "All Rounds, 2017" ... done

i Returning data for "All Rounds, 2018"
v Returning data for "All Rounds, 2018" ... done

i Returning data for "All Rounds, 2019"
v Returning data for "All Rounds, 2019" ... done

i Returning data for "All Rounds, 2020"
v Returning data for "All Rounds, 2020" ... done

i Returning data for "All Rounds, 2021"
v Returning data for "All Rounds, 2021" ... done

i Returning data for "All Rounds, 2022"
v Returning data for "All Rounds, 2022" ... done

i Returning data for "All Rounds, 2023"
v Returning data for "All Rounds, 2023" ... done




Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Assign -1 as the winner id for a draw, since 2015 there have been 15 draws. We also have a cancelled game, Cats vs. Crows 2015 R14 which we are just going to remove.

In [4]:
#Clean and prep the season bye info
seasonBye = pl.from_pandas(seasonBye).drop('club.name').with_columns(pl.col('round.nextRound').cast(pl.Int32))

In [5]:
#The inital cleaning working with the gneral match details. We can use this as our base for creating the desired attributes
seasonsBase = (pl.from_pandas(seasons)
                .sort('round.id')
                #Keep only the columns we want
                .select('id', 'status', 'compSeason.id', 'compSeason.year', 'round.roundNumber', 'home.team.club.id', 'home.team.name',
                        'away.team.club.id', 'away.team.name', 'home.score.goals', 'home.score.behinds', 'home.score.totalScore',
                        'away.score.goals', 'away.score.behinds', 'away.score.totalScore', 'venue.id', 'venue.name', 'venue.state')
                #Label the id winner (-1 for a draw)
                .with_columns(pl.when(pl.col('home.score.totalScore') > pl.col('away.score.totalScore')).then(pl.col('home.team.club.id'))
                                .when(pl.col('home.score.totalScore') < pl.col('away.score.totalScore')).then(pl.col('away.team.club.id'))
                                .otherwise(pl.lit(-1)).alias('winner'))
                #Remove the Crows vs. Cats 2015 R14 game which was cancelled
                .filter(pl.col('id') != 847)
                #Label Previous comp year
                .with_columns((pl.col('compSeason.year') - 1).alias('prevComp.year'))
                #Label in sequential order (currently missing numbers in existing round.id) to get the previous games 
     .join((pl.from_pandas(seasons)
                .select('compSeason.year', 'round.roundNumber')
                .unique()
                .sort('compSeason.year', 'round.roundNumber')
                .with_columns(pl.col('round.roundNumber').cum_count().alias('round.id'))), how='left', on=['compSeason.year', 'round.roundNumber'])
     #create the margin
     .with_columns((pl.col('home.score.totalScore') - pl.col('away.score.totalScore')).abs().alias('win.margin'))   
     #Add a bye flag
     .join(seasonBye, how='left', left_on=['compSeason.id', 'round.roundNumber', 'home.team.club.id'], right_on=['compSeason.id', 'round.nextRound', 'club.id'])
     .join(seasonBye, how='left', left_on=['compSeason.id', 'round.roundNumber', 'away.team.club.id'], right_on=['compSeason.id', 'round.nextRound', 'club.id'])
     #Rename bye flag
     .rename({'ByeFlag' : 'home.bye.flag',
              'ByeFlag_right' : 'away.bye.flag'})
     #Fill in bye flag nulls
     .with_columns(pl.col('home.bye.flag').fill_null(0).cast(pl.Int32),
                   pl.col('away.bye.flag').fill_null(0).cast(pl.Int32))
    #Fill venue blank states with china (1 is not china but NZ but is in 2015 so we wont worry about that)
    .with_columns(pl.col('venue.state').fill_null('China'))
    
)

In [6]:
#Create a dataframe with all the match details (repeated so that boths teams get to be team A and team B)
allMatchesBothSides = (seasonsBase
     .select('compSeason.year', 'round.roundNumber', 'round.id', 'home.team.club.id', 'home.team.name', 'home.score.totalScore', 'away.team.club.id', 'away.team.name', 'away.score.totalScore')
     .rename({'home.team.name': 'team.A.name',
              'home.team.club.id' : 'team.A.club.id',
              'home.score.totalScore' : 'team.A.score.totalScore',
              'away.team.name': 'team.B.name',
              'away.team.club.id' : 'team.B.club.id',
              'away.score.totalScore' : 'team.B.score.totalScore'})
     .vstack(seasonsBase
     .select('compSeason.year', 'round.roundNumber', 'round.id', 'away.team.club.id', 'away.team.name', 'away.score.totalScore', 'home.team.club.id', 'home.team.name', 'home.score.totalScore')
     .rename({'home.team.name': 'team.B.name',
              'home.team.club.id' : 'team.B.club.id',
              'home.score.totalScore' : 'team.B.score.totalScore',
              'away.team.name': 'team.A.name',
              'away.team.club.id' : 'team.A.club.id',
              'away.score.totalScore' : 'team.A.score.totalScore'}))
     .sort('team.A.club.id', 'round.id')
)

## Get the ladder poistions at beginning round and last seasons final ladder position.

In [7]:
%%R -o ladder

season <- c(2015,2016,2017,2018,2019,2020,2021, 2022, 2023)
rounds <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23)
first <- TRUE

for(s in season){
    for(r in rounds){

        if(s==2020 && r <= 18){
            df <- fetch_ladder_afl(season=s, round_number=r)
        }else if(s!=2020){
            df <- fetch_ladder_afl(season=s, round_number=r)
        }

        if(first){
            ladder <- df
            first <- FALSE
        }else{
            ladder <- ladder %>% bind_rows(df)
        }
    }

    if(s== 2023){
        ladder <- ladder %>% bind_rows(fetch_ladder_afl(season=s, round_number=24))
    }
}

ladder = ladder %>% select(season, round_number, position, team.club.id, team.club.name)
        

In [8]:
ladderNormal = (pl.from_pandas(ladder)
     .with_columns((pl.col('round_number') + 1).alias('nextRound'))
     .drop('team.club.name', 'round_number')
                  )

In [9]:
#get the final ladder of each season
ladderFinal = (pl.from_pandas(ladder)
                   #Get the final ladder at the end of the season
                   .filter(pl.col('round_number') == pl.col('round_number').max().over('season'))
                   #get the next season (for assigning to the nest round 1)
                   .with_columns((pl.col('season') + 1).alias('Nextseason'),
                                 pl.lit(1).alias('first_round'))
              )

In [10]:
#Add home and away ladder positions before 
ladderPosition = (seasonsBase
     .select('compSeason.year', 'round.roundNumber')
     .unique()
     .sort('compSeason.year', 'round.roundNumber')
     #from round 2 till finals
     .join(ladderNormal, how='left', left_on=['compSeason.year', 'round.roundNumber'], right_on=['season', 'nextRound'])
     #for round 1 the final ladder of previous season
     .join(ladderFinal.drop('season', 'round_number'), how='left', left_on=['compSeason.year', 'round.roundNumber'], right_on=['Nextseason', 'first_round'])
     .rename({'position_right' : 'position1',
              'team.club.id_right' : 'team.club.id1'})
     #for the finals
     .join(ladderFinal.drop('PrevSeason'), how='left', left_on='compSeason.year', right_on='season')
     #put all the parts together
     .with_columns(pl.when(~pl.col('position').is_null()).then(pl.col('position'))
                     .when(~pl.col('position1').is_null()).then(pl.col('position1')).otherwise(pl.col('position_right')).alias('position'),
                   pl.when(~pl.col('team.club.id').is_null()).then(pl.col('team.club.id'))
                     .when(~pl.col('team.club.id1').is_null()).then(pl.col('team.club.id1')).otherwise(pl.col('team.club.id_right')).alias('team.club.id'))
     #select only what we want
     .select('compSeason.year', 'round.roundNumber', 'team.club.id', 'position')
     .unique()
     .sort('compSeason.year', 'round.roundNumber', 'position')
     #get the fianl positioning of the previous year
     .join(ladderFinal.drop('season', 'round_number', 'first_round', 'team.club.name'), how='left', left_on=['compSeason.year', 'team.club.id'], right_on=['Nextseason', 'team.club.id'])
     .rename({'position_right': 'last.season.position'})
                           
)           

In [11]:
seasonsStep2 = (seasonsBase
                 #join the ladderPosition to get the home teams ladder info
                 .join(ladderPosition.rename({'position':'home.ladder.position', 'last.season.position': 'home.last.season.position'}),
                       how='left',
                       left_on=['compSeason.year', 'round.roundNumber', 'home.team.club.id'],
                       right_on=['compSeason.year', 'round.roundNumber', 'team.club.id'])
                 #Clean up any duplicates
                 .unique()
                 #join the ladderPosition to get the away teams ladder info
                 .join(ladderPosition.rename({'position':'away.ladder.position', 'last.season.position': 'away.last.season.position'}),
                       how='left',
                       left_on=['compSeason.year', 'round.roundNumber', 'away.team.club.id'],
                       right_on=['compSeason.year', 'round.roundNumber', 'team.club.id'])
                 #clean up any duplicates
                 .unique()
                 #sort into order
                 .sort('compSeason.year', 'round.roundNumber')
     
                )

## Get the distnace travelled

In [12]:
#Create a table for distance between states and where the team is based (based on capital to capital)
vic = pl.Series([0, 654, 714, 2727, 1373, 0, 0, 1373, 0, 0, 0, 654, 0, 0, 0, 2727, 714, 0])
nsw = pl.Series([714, 1165, 0, 3297, 730, 714, 714, 730, 714, 714, 714, 1165, 714, 714, 714, 3297, 0, 714])
qld = pl.Series([1373, 1602, 730, 3613, 0, 1373, 1373, 0, 1373, 1373, 1373, 1602, 1373, 1373, 1373, 3613, 730, 1373])
nt = pl.Series([3140, 2609, 3144, 2647, 2846, 3140, 3140, 2846, 3140, 3140, 3140, 2609, 3140, 3140, 3140, 2647, 3144, 3140])
wa = pl.Series([2727, 2135, 3297, 0, 3613, 2727, 2727, 3613, 2727,2727, 2727, 2135, 2727, 2727, 2727, 0, 3297, 2727])
sa = pl.Series([654, 0, 1165, 2135, 1602, 654, 654, 1602, 654, 654, 654, 0, 654, 654, 654, 2135, 1165, 654])
tas = pl.Series([597, 1161, 1056, 3015, 1786, 597, 597, 1786, 597, 597, 597, 1161, 597, 597, 597, 3015, 1161, 597])
act = pl.Series([467, 960, 942, 3095, 942, 467, 467, 247, 467, 467, 467, 960, 467, 467, 467, 3095, 247, 467])
china = pl.Series([8036, 6956, 7592, 7050, 6225, 8036, 8036, 6225, 8036, 8036, 8036, 6956, 8036, 8036, 8036, 7050, 7592, 8036])
distanceTravelled = (seasonsBase
                         .select('home.team.name', 'home.team.club.id').unique()
                         .sort('home.team.club.id')
                         .with_columns(vic.alias('VIC'),
                                       nsw.alias('NSW'),
                                       qld.alias('QLD'),
                                       nt.alias('NT'),
                                       wa.alias('WA'),
                                       sa.alias('SA'),
                                       tas.alias('TAS'),
                                       act.alias('ACT'),
                                       china.alias('China'))
                         .rename({'home.team.club.id' : 'team.club.id'})
                         .drop('home.team.name')
                         .melt(id_vars='team.club.id', variable_name='State', value_name='Distance')
                        )

In [13]:
seasonStep3 = (seasonsStep2
                 #Home team distance travelled to venue
                 .join(distanceTravelled.rename({'Distance' : 'home.distance'}), how='left', left_on=['home.team.club.id', 'venue.state'], right_on=['team.club.id', 'State'])
                 #Away team distance travelled to venue
                 .join(distanceTravelled.rename({'Distance' : 'away.distance'}), how='left', left_on=['away.team.club.id', 'venue.state'], right_on=['team.club.id', 'State'])
                )

## Team ELO Rating

In [14]:
#Create a link between teams club id and the elo index 0-17 
eloIndexTeams = allMatchesBothSides.select('team.A.name', 'team.A.club.id').unique().sort('team.A.club.id').with_columns((pl.col('team.A.club.id').cum_count() - 1).alias('EloIndex'))

In [15]:
#create a function to find a teams prediction
def eloTeamPrediction(ratingTeamA, ratingTeamB):
    exp = (-1*(ratingTeamA - ratingTeamB))/400
    b = 1 + 10 ** exp
    result = 1/b
    return result

In [16]:
#create a function to find the teams result, inital B = 0.004 per Multifactorial analysis of factors influencing elite australian football match outcomes: a machine learning approach
def eloTeamResult(scoreTeamA, scoreTeamB):
    exp = -0.04*(scoreTeamA - scoreTeamB)
    b = 1 + np.exp(exp)
    result = 1/b
    return result

In [17]:
#Create a function to get change in teams elo, set K=67.559 per Multifactorial analysis of factors influencing elite australian football match outcomes: a machine learning approach
def eloChange(ratingTeamA, ratingTeamB, scoreTeamA, scoreTeamB):
    #get Team prediction
    prediction = eloTeamPrediction(ratingTeamA, ratingTeamB)
    
    #get actual result
    actual = eloTeamResult(scoreTeamA , scoreTeamB)
    
    #get elo change
    result = 20*(actual - prediction)
    
    #get new elo
    newElo = ratingTeamA + result
    
    return newElo

In [18]:
#Create a function to get the elo for the start of the season, set Carry over at 0.70628 per Multifactorial analysis of factors influencing elite australian football match outcomes: a machine learning approach
def eloNewSeason(lastElo):
    carryOver = 0.80
    result = carryOver * lastElo + 1500 * (1 - carryOver)
    return result

In [19]:
#Function to rule them all! assign elo values
def eloFucntions(roundNumber, teamAScore, teamBScore, prevEloA, prevEloB):
    #If first round of the season get the carry over elo value
    if roundNumber == 1:
        elo = eloNewSeason(prevEloA)
    #If normal round then find new score
    else:
        elo = eloChange(prevEloA, prevEloB, teamAScore, teamBScore)
    #return current elo 
    return elo   
   
    

In [20]:
#Elo list of lists to record elo's and access past elo values, with index = elo index translation found in eloIndexTeams table
#Starting elo as default 1500
eloScores = [[1500] * 18]
#starting roundIndex 

In [21]:
def eloNumber(roundId, roundNumber, clubA, clubB, roundIdA, roundIdB, indexA, indexB):
    #set roundId for indexing
    roundId = roundId - 1
    #create default elo
    elo=1500.0
    #Check if new round by checking a new list has been added for the round
    if roundId >= np.shape(eloScores)[0]:
        #Append new list for new round
        eloScores.append(eloScores[roundId - 1])
        
    #Deal with very first round we consider, simply create base for next round
    if roundId == 0:
        elo = 1500.0
    #For all other rounds get the elo
    else: 
        
        #Get the new elo score
        elo = eloFucntions(roundNumber, teamScores[clubA][roundIdA-1], teamScores[clubB][roundIdB-1], eloScores[roundId-1][indexA], eloScores[roundId-1][indexB])
        #Change list of list to reflect the new score
        eloScores[roundId][indexA] = elo
        

    #Return Elo
    return elo
        

In [22]:
teamScores = dict(allMatchesBothSides.drop('team.B.club.id', 'team.B.name', 'team.B.score.totalScore').group_by('team.A.club.id').agg(pl.col('team.A.score.totalScore')).to_numpy())

In [23]:
eloScoreDF = (allMatchesBothSides
                 #Get the cleaned indexes for A teams
                 .join(eloIndexTeams.rename({'EloIndex': 'elo.index.A'}).drop('team.A.name'), how='left', on='team.A.club.id')
                 #Get the cleaned indexes for B teams
                 .join(eloIndexTeams.rename({'EloIndex': 'elo.index.B'}).drop('team.A.name'), how='left', left_on='team.B.club.id', right_on='team.A.club.id')
                 .with_columns((pl.col('team.A.name').cum_count() - 1).over(pl.col('team.A.name')).alias('team.A.round.index'))
                 .sort('round.id', 'team.B.club.id')
                 .with_columns((pl.col('team.B.name').cum_count() - 1).over(pl.col('team.B.name')).alias('team.B.round.index'))
                 #sort so the assigning of elo scores are pasted on past club info
                 .sort('round.id', 'team.A.club.id')
                 #Get the elo scores
                 .with_columns(pl.struct(['round.roundNumber', 'round.id', 'team.A.club.id', 'team.B.club.id', 'team.A.round.index', 'team.B.round.index', 'elo.index.A', 'elo.index.B'])
                                 .map_elements(lambda x : eloNumber(x['round.id'], x['round.roundNumber'], x['team.A.club.id'], x['team.B.club.id'], x['team.A.round.index'], x['team.B.round.index'], x['elo.index.A'], x['elo.index.B']))
                                 .alias('elo.score'))
                 #Keep only the info we actually want
                 .select('compSeason.year', 'round.roundNumber', 'round.id', 'team.A.club.id', 'elo.score')
                )

In [24]:
#Join the elo scores up into the main dataframe
seasonStep3 = (seasonsStep2
     #elo scores for the home teams
     .join(eloScoreDF.rename({'elo.score' : 'home.elo.score'}),
           how='left',
           left_on=['compSeason.year', 'round.roundNumber', 'round.id', 'home.team.club.id'],
           right_on=['compSeason.year', 'round.roundNumber', 'round.id', 'team.A.club.id'])
     #elo scores for the away teams
    .join(eloScoreDF.rename({'elo.score' : 'away.elo.score'}),
           how='left',
           left_on=['compSeason.year', 'round.roundNumber', 'round.id', 'away.team.club.id'],
           right_on=['compSeason.year', 'round.roundNumber', 'round.id', 'team.A.club.id'])
)

In [25]:
#Test the predictive power of the ELO scores (sitting at about 62% accuracy in 2023 so at least more than simply choosing the home team!)
(seasonStep3.with_columns(pl.when(pl.col('home.elo.score') > pl.col('away.elo.score')).then(pl.col('home.team.club.id'))
                           .when(pl.col('away.elo.score') > pl.col('home.elo.score')).then(pl.col('away.team.club.id')).otherwise(pl.lit(-1)).alias('Prediction'))
             .with_columns(pl.when(pl.col('Prediction') == pl.col('winner')).then(pl.lit(1)).otherwise(pl.lit(0)).alias('Correct'))
             .filter(pl.col('compSeason.year') == 2023)
             .group_by('Correct')
             .len()
)

Correct,len
i32,u32
1,134
0,82


## Home Advantage/Disadvantage
Turns out (even not considering 2020, which we are) the term 'home team' can mean fairly little which might go some way to explaining how the chances of winning if the home team is only slightly elvated due to been the home team. Therefore to consider this, we will get the portion of games each time played at the venue in the previous year, this of course can mean that there are some venues that no-one played at last year and the advantage/disadvantage will be 0 (and not undefinied).

In [56]:
#Get the portion of games played at venue current season
teamRoundVenue = (allMatchesBothSides
                      #Get the venues attached for both team A and team B
                     .join(seasonsBase.select('round.id', 'home.team.club.id', 'venue.name'), how='left', left_on=['round.id', 'team.A.club.id'], right_on=['round.id', 'home.team.club.id'])
                     .join(seasonsBase.select('round.id', 'away.team.club.id', 'venue.name'), how='left', left_on=['round.id', 'team.A.club.id'], right_on=['round.id', 'away.team.club.id'])
                     #Get all the venues in the same column
                     .with_columns(pl.when(pl.col('venue.name').is_null()).then(pl.col('venue.name_right')).otherwise(pl.col('venue.name')).alias('venue.name'))
                      #Drop columns not required going forward
                     .drop('venue.name_right', 'team.A.score.totalScore', 'team.B.club.id', 'team.B.name', 'team.B.score.totalScore')
                     #Get the total number of games played each season by each team
                     .with_columns(pl.col('round.id').len().over('compSeason.year', 'team.A.club.id').alias('GamesPlayed'))
                     #Get the number of times each team played at a venue during a season
                     .group_by('compSeason.year', 'team.A.club.id', 'venue.name')
                     .agg(pl.col('team.A.name').first(),
                          pl.col('GamesPlayed').first(),
                          pl.col('round.id').len().alias('VenuePlayed'))
                     #How much of their time did the team spend at any venue
                     .with_columns((pl.col('VenuePlayed')/pl.col('GamesPlayed')).alias('VenuePortion'))
                     .sort('compSeason.year', 'team.A.club.id', 'VenuePlayed')
                     #Create the next season so we can join to get portion of time played last season
                     .with_columns((pl.col('compSeason.year') + 1).alias('NextSeason'))
                      #Keep only what we need going forward
                     .select('NextSeason', 'team.A.club.id', 'venue.name', 'VenuePortion')
                 )

In [57]:
#Get portion of games played at venue last season
teamRoundVenue = (allMatchesBothSides
                      #Again get all the venues in the same column and keep only what required
                     .join(seasonsBase.select('round.id', 'home.team.club.id', 'venue.name'), how='left', left_on=['round.id', 'team.A.club.id'], right_on=['round.id', 'home.team.club.id'])
                     .join(seasonsBase.select('round.id', 'away.team.club.id', 'venue.name'), how='left', left_on=['round.id', 'team.A.club.id'], right_on=['round.id', 'away.team.club.id'])
                     .with_columns(pl.when(pl.col('venue.name').is_null()).then(pl.col('venue.name_right')).otherwise(pl.col('venue.name')).alias('venue.name'))
                     .drop('venue.name_right', 'team.A.score.totalScore', 'team.B.club.id', 'team.B.name', 'team.B.score.totalScore')
                     #Get the previous seasons portion of games played at any venue
                     .join(teamRoundVenue, how='left', left_on=['compSeason.year', 'team.A.club.id', 'venue.name'], right_on=['NextSeason', 'team.A.club.id', 'venue.name'])
                     #For venue not played at the previous 
                     .with_columns(pl.col('VenuePortion').fill_null(0))
                     .select('round.id', 'team.A.club.id', 'venue.name', 'VenuePortion')
                )

In [58]:
teamRoundVenue.head(1)

round.id,team.A.club.id,venue.name,VenuePortion
u32,i32,str,f64
1,1,"""MCG""",0.0


In [70]:
#Attach back to the seasonsStep3 and calculate the home advatage and disadvantage
seasonStep4 = (seasonStep3
     #join portion for home teams
     .join(teamRoundVenue, how='left', left_on=['round.id', 'home.team.club.id', 'venue.name'], right_on=['round.id', 'team.A.club.id', 'venue.name'])
     #join portion for away.teams
     .join(teamRoundVenue, how='left', left_on=['round.id', 'away.team.club.id', 'venue.name'], right_on=['round.id', 'team.A.club.id', 'venue.name'])
     #claify the column names
     .rename({'VenuePortion' : 'home.venue.portion',
              'VenuePortion_right' : 'away.venue.portion'})
     #create home advantage
     .with_columns(pl.when(pl.col('away.venue.portion') == 0).then(pl.lit(1)).otherwise((pl.col('home.venue.portion') / pl.col('away.venue.portion'))).alias('home.advantage'))
     #Create away disadvantage
     .with_columns(pl.when(pl.col('home.venue.portion') == 0).then(pl.lit(1)).otherwise((pl.col('away.venue.portion')/pl.col('home.venue.portion'))).alias('away.advantage'))
)

## Days Between Matches
Have to go in and get afttables match info to get dates (would this work with upcoming matches, may need to be revised if not)

In [None]:
%%R
