In [1]:
import pandas as pd
import numpy as np

# Our own code
from src.data.data_builder import DataBuilder, BettingData, MatchData
from src.data.data_transformer import DataTransformer
from src.data.feature_builder import FeatureBuilder
from src.model.metrics import measure_estimators, regression_accuracy

In [13]:
# Set up data and create cumulative features

csv_paths = ('data/afl_betting.csv', 'data/ft_match_list.csv')
data_classes = (BettingData, MatchData)

raw_df = DataBuilder(data_classes, csv_paths).concat()
model_df = DataTransformer(raw_df).clean()
# fb = FeatureBuilder(model_df)
# fb.transform()
# team_df = fb.df.dropna()
# team_df
model_df

Unnamed: 0,home_team,away_team,home_win_odds,home_line_odds,away_win_odds,away_line_odds,home_score,away_score,round_number,year
0,Richmond,Carlton,2.89,16.5,1.42,-16.5,64.0,120.0,1,2010
1,Geelong,Essendon,1.21,-28.5,4.50,28.5,125.0,94.0,1,2010
2,Sydney,St Kilda,2.64,14.5,1.49,-14.5,88.0,96.0,1,2010
3,Brisbane,West Coast,1.32,-21.5,3.42,21.5,114.0,82.0,1,2010
4,Melbourne,Hawthorn,4.95,31.5,1.18,-31.5,61.0,117.0,1,2010
5,Port Adelaide,North Melbourne,1.42,-16.5,2.89,16.5,96.0,82.0,1,2010
6,Fremantle,Adelaide,1.96,1.5,1.85,-1.5,118.0,62.0,1,2010
7,Western Bulldogs,Collingwood,1.62,-8.5,2.31,8.5,93.0,129.0,1,2010
8,Brisbane,Carlton,1.34,-19.5,3.30,19.5,107.0,88.0,2,2010
9,West Coast,Port Adelaide,1.63,-10.5,2.30,10.5,86.0,89.0,2,2010


In [18]:
predict_df = (model_df.loc[:, ['year', 'round_number', 'home_team', 'away_team']]
                      .assign(model='oddsmakers',
                              predicted_home_margin=model_df['home_line_odds'] * -1,
                              home_margin=model_df['home_score'] - model_df['away_score'],
                              predicted_home_win=((model_df['home_win_odds'] < model_df['away_win_odds']) |
                                                  (model_df['home_line_odds'] < model_df['away_line_odds']) |
                                                  # If odds are all equal, predict home team
                                                  ((model_df['home_win_odds'] == model_df['away_win_odds']) &
                                                   (model_df['home_line_odds'] == model_df['away_line_odds']))).astype(int),
                              home_win=(model_df['home_score'] > model_df['away_score']).astype(int),
                              draw=(model_df['home_score'] == model_df['away_score']).astype(int))
                      .assign(tip_point=lambda x: ((x['predicted_home_win'] == x['home_win']) | (x['draw'])).astype(int)))
predict_df

Unnamed: 0,year,round_number,home_team,away_team,model,predicted_home_margin,home_margin,predicted_home_win,home_win,draw,tip_point
0,2010,1,Richmond,Carlton,oddsmakers,-16.5,-56.0,0,0,0,1
1,2010,1,Geelong,Essendon,oddsmakers,28.5,31.0,1,1,0,1
2,2010,1,Sydney,St Kilda,oddsmakers,-14.5,-8.0,0,0,0,1
3,2010,1,Brisbane,West Coast,oddsmakers,21.5,32.0,1,1,0,1
4,2010,1,Melbourne,Hawthorn,oddsmakers,-31.5,-56.0,0,0,0,1
5,2010,1,Port Adelaide,North Melbourne,oddsmakers,16.5,14.0,1,1,0,1
6,2010,1,Fremantle,Adelaide,oddsmakers,-1.5,56.0,0,1,0,0
7,2010,1,Western Bulldogs,Collingwood,oddsmakers,8.5,-36.0,1,0,0,0
8,2010,2,Brisbane,Carlton,oddsmakers,19.5,19.0,1,1,0,1
9,2010,2,West Coast,Port Adelaide,oddsmakers,10.5,-3.0,1,0,0,0


In [19]:
predict_df.to_csv('../data/model_predictions.csv', index=False)