Required initial steps:
    
    * The text file from https://afltables.com/afl/stats/biglists/bg3.txt is saved as llist.txt
    * The file is then processed through the bash script file a_updater.sh to convert it to a csv and make other modifications.
    * The bash file saves the processed file as newlist.csv, ready to be imported from within Python

# Round 21 2018

In [1]:
%%html
<img src="img/ess_stk.png" alt="Drawing" align="left" style="width: 800px;"/>

In [2]:
import pandas as pd
cols = ['seq','date','round','team1','gls_1','bhs_1','pts_1','team2','gls_2','bhs_2','pts_2','venue']

In [3]:
upd_round = pd.read_csv("/Users/stevegabriel/data_projects/sports_data/newlist.csv", names = cols)
trans = upd_round

In [4]:
# Modify index to start at 1 
trans.index += 1
trans.tail()

Unnamed: 0,seq,date,round,team1,gls_1,bhs_1,pts_1,team2,gls_2,bhs_2,pts_2,venue
15376,15376,11-Aug-2018,R21,GW Sydney,15,16,106,Adelaide,13,14,92,Manuka Oval
15377,15377,11-Aug-2018,R21,Collingwood,14,20,104,Brisbane Lions,11,7,73,Docklands
15378,15378,12-Aug-2018,R21,North Melbourne,12,13,85,Western Bulldogs,13,14,92,Docklands
15379,15379,12-Aug-2018,R21,Melbourne,10,18,78,Sydney,13,9,87,M.C.G.
15380,15380,12-Aug-2018,R21,Fremantle,15,11,101,Carlton,10,12,72,Perth Stadium


In [5]:
# Add states
nsw = ['S.C.G.','Sydney Showground','Stadium Australia','Blacktown','Albury']
qld = ['Gabba','Carrara','Cazaly\'s Stadium','Brisbane Exhibition']
sa = ['Football Park','Adelaide Oval']
wa = ['W.A.C.A.','Perth Stadium','Subiaco']
nt = ['Marrara Oval','Traeger Park']
tas = ['York Park','Bellerive Oval','North Hobart']
act = ['Manuka Oval','Bruce Stadium']
intl = ['Wellington','Jiangwan Stadium']

In [6]:
def locations(x):
    if x in nsw:
        return "NSW"
    elif x in sa:
        return "SA"
    elif x in wa:
        return "WA"
    elif x in nt:
        return "NT"
    elif x in tas:
        return "TAS"
    elif x in intl:
        return "INTL"
    elif x in act:
        return "ACT"
    elif x in qld:
        return "QLD"
    else: 
        return "VIC"

In [7]:
trans["state"] = trans["venue"].apply(locations)
trans["state"].value_counts()

VIC     13022
WA        638
SA        571
NSW       509
QLD       472
TAS        91
ACT        48
NT         24
INTL        5
Name: state, dtype: int64

In [8]:
## 1. HVAR & AGGR
trans['hvar'] = trans['pts_1'] - trans['pts_2']
trans['aggr'] = trans['pts_1'] + trans['pts_2']

In [9]:
## 2. YEAR & MONTH
import time
import datetime

In [10]:
trans['date'] = pd.to_datetime(trans['date'])
trans['year'], trans['month'] = trans['date'].dt.year, trans['date'].dt.month

In [11]:
## 3. ABBREVIATED CLUB NAMES
adelaide = ['Adelaide']
brisbane = ['Brisbane Bears','Brisbane Lions']
carlton = ['Carlton']
collingwood = ['Collingwood']
essendon = ['Essendon']
fitzroy = ['Fitzroy']
footscray = ['Footscray']
fremantle = ['Fremantle']
geelong = ['Geelong']
gold_coast = ['Gold Coast']
gws = ['GW Sydney']
hawthorn = ['Hawthorn']
melbourne = ['Melbourne']
north_melbourne = ['North Melbourne','Kangaroos']
port_adelaide = ['Port Adelaide']
richmond = ['Richmond']
south_melbourne = ['South Melbourne']
st_kilda = ['St Kilda']
sydney = ['Sydney']
west_coast = ['West Coast']
western_bulldogs = ['Western Bulldogs', 'Footscray']
university = ['University']

In [12]:
def team_shorten(x):
    if x in adelaide:
        return "AD"
    elif x in brisbane:
        return "BR"
    elif x in carlton:
        return "CA"
    elif x in collingwood:
        return "CO"
    elif x in essendon:
        return "ES"
    elif x in fitzroy:
        return "FI"
    elif x in fremantle:
        return "FR"
    elif x in geelong:
        return "GE"
    elif x in gold_coast:
        return "GC"
    elif x in gws:
        return "GW"
    elif x in hawthorn:
        return "HA"
    elif x in melbourne:
        return "ME"
    elif x in north_melbourne:
        return "NM"
    elif x in port_adelaide:
        return "PA"
    elif x in richmond:
        return "RI"
    elif x in south_melbourne:
        return "SM"
    elif x in st_kilda:
        return "SK"
    elif x in sydney:
        return "SY"
    elif x in west_coast:
        return "WC"
    elif x in western_bulldogs:
        return "WB"
    elif x in university:
        return "UN"
    else: 
        return "NOT_FOUND"

In [13]:
trans["t1"] = trans["team1"].apply(team_shorten)
trans["t2"] = trans["team2"].apply(team_shorten)

In [14]:
trans['t1'].value_counts().sum()

15380

##  FIX TEAM NAMES ANOMOLIES

* Merge Footscray & Western Bulldogs as **Western Bulldogs**
* Merge North Melbourne with Kangaroos as **North Melbourne**
* Merge Brisbane Lions with Brisbane Bears as **Brisbane**
* Leave Sydney and **South Melbourne** as separate
* Leave **Fitzroy** as separate
* Modify GW Sydney to **GWS Giants**

In [15]:
# Function from Analytics Vidhya
def coding(col, codeDict):
  colCoded = pd.Series(col, copy=True)
  for key, value in codeDict.items():
    colCoded.replace(key, value, inplace=True)
  return colCoded

In [16]:
trans["team1"] = coding(trans["team1"], {'Footscray':'Western Bulldogs'})
trans["team2"] = coding(trans["team2"], {'Footscray':'Western Bulldogs'})
trans["team1"] = coding(trans["team1"], {'Kangaroos':'North Melbourne'})
trans["team2"] = coding(trans["team2"], {'Kangaroos':'North Melbourne'})
trans["team1"] = coding(trans["team1"], {'Brisbane Bears':'Brisbane','Brisbane Lions':'Brisbane'})
trans["team2"] = coding(trans["team2"], {'Brisbane Bears':'Brisbane','Brisbane Lions':'Brisbane'})
trans["team1"] = coding(trans["team1"], {'GW Sydney':'GWS Giants'})
trans["team2"] = coding(trans["team2"], {'GW Sydney':'GWS Giants'})

In [17]:
trans["team2"].value_counts()

Collingwood         1272
Carlton             1251
Essendon            1220
Geelong             1218
St Kilda            1190
Melbourne           1189
Richmond            1118
North Melbourne      984
Hawthorn             976
Western Bulldogs     973
Fitzroy              960
South Melbourne      788
Sydney               430
West Coast           375
Brisbane             358
Adelaide             324
Fremantle            272
Port Adelaide        253
Gold Coast            87
GWS Giants            79
University            63
Name: team2, dtype: int64

## ALL DRAWS (BOOLEAN)

In [18]:
def reg_draw(hvar):
    if hvar == 0:
        return 1
    else: return 0

In [19]:
trans["draw_rt"] = trans["hvar"].apply(reg_draw)

In [20]:
trans['draw_rt'].value_counts()

0    15222
1      158
Name: draw_rt, dtype: int64

### The 158 draws above, represent the VFL/AFL games that have concluded in a draw result

### The two ET (extra-time) draws below are statistically relevent, since they also ended regular full-time with both teams on the same score. However extra-time was applied to create a result, due to finals constraints.

In [21]:
def et_draw(seq):
    if seq == 10794:
        return 1
    elif seq == 13203:
        return 1
    else: return 0

In [22]:
trans["etime_draw"] = trans["seq"].apply(et_draw)
trans['etime_draw'].value_counts()

0    15378
1        2
Name: etime_draw, dtype: int64

In [23]:
### Add the two columns
trans['draws_all']  = trans['draw_rt'] + trans['etime_draw']
trans['draws_all'].value_counts()

0    15220
1      160
Name: draws_all, dtype: int64

In [24]:
# Check
trans.iloc[:,:][(trans.etime_draw == 1)]

Unnamed: 0,seq,date,round,team1,gls_1,bhs_1,pts_1,team2,gls_2,bhs_2,...,state,hvar,aggr,year,month,t1,t2,draw_rt,etime_draw,draws_all
10794,10794,1994-09-10,QF,North Melbourne,15,24,114,Hawthorn,13,13,...,VIC,23,205,1994,9,NM,HA,0,1,1
13203,13203,2007-09-14,SF,Collingwood,13,15,93,West Coast,10,14,...,WA,19,167,2007,9,CO,WC,0,1,1


In [25]:
# 6. GAME CATEGORY
finals = ['EF','QF','SF','PF','GF']

In [26]:
def game_cat(x):
    if x in finals:
        return "final"
    else: 
        return "reg_season"

In [27]:
trans["type"] = trans["round"].apply(game_cat)

In [28]:
trans['type'].value_counts()

reg_season    14734
final           646
Name: type, dtype: int64

### So, there have been 646 finals up to the end of the 2017 season.

In [29]:
def grouping_2K(seq):
    n=500
    if seq <= 4*n:
        return 1
    elif seq <= 8*n:
        return 2
    elif seq <= 12*n:
        return 3
    elif seq <= 16*n:
        return 4
    elif seq <= 20*n:
        return 5
    elif seq <= 24*n:
        return 6
    elif seq <= 28*n:
        return 7
    else: return 8

In [30]:
trans["grp_2K"] = trans["seq"].apply(grouping_2K)

In [31]:
def grouping_1K(seq):
    n=1000
    if seq <= n:
        return 1
    elif seq <= 2*n:
        return 2
    elif seq <= 3*n:
        return 3
    elif seq <= 4*n:
        return 4
    elif seq <= 5*n:
        return 5
    elif seq <= 6*n:
        return 6
    elif seq <= 7*n:
        return 7
    elif seq <= 8*n:
        return 8
    elif seq <= 9*n:
        return 9
    elif seq <= 10*n:
        return 10
    elif seq <= 11*n:
        return 11
    elif seq <= 12*n:
        return 12
    elif seq <= 13*n:
        return 13
    elif seq <= 14*n:
        return 14
    elif seq <= 15*n:
        return 15
    else: return 16

In [32]:
trans["grp_1K"] = trans["seq"].apply(grouping_1K)

In [33]:
# Export the data frame for use in afl draws EDA
trans.to_csv('all_afl_wide.csv')

## ADD res1 and res2 columns as categorical 1 = win, 2 = loss, 3 = draw based on hvar (ignoring the two extra time finals)

In [34]:
def result_cat_h(hvar):
    if hvar > 0:
        return 1
    elif hvar < 0:
        return 2
    else: return 3

In [35]:
def result_cat_a(hvar):
    if hvar > 0:
        return 2
    elif hvar < 0:
        return 1
    else: return 3

In [36]:
trans['res1'] = trans['hvar'].apply(result_cat_h)
trans['res2'] = trans['hvar'].apply(result_cat_a)

In [37]:
trans.tail(9)

Unnamed: 0,seq,date,round,team1,gls_1,bhs_1,pts_1,team2,gls_2,bhs_2,...,t1,t2,draw_rt,etime_draw,draws_all,type,grp_2K,grp_1K,res1,res2
15372,15372,2018-08-10,R21,Essendon,18,14,122,St Kilda,11,13,...,ES,SK,0,0,0,reg_season,8,16,1,2
15373,15373,2018-08-11,R21,Hawthorn,10,11,71,Geelong,8,12,...,HA,GE,0,0,0,reg_season,8,16,1,2
15374,15374,2018-08-11,R21,Gold Coast,7,9,51,Richmond,19,11,...,GC,RI,0,0,0,reg_season,8,16,2,1
15375,15375,2018-08-11,R21,Port Adelaide,9,4,58,West Coast,9,8,...,PA,WC,0,0,0,reg_season,8,16,2,1
15376,15376,2018-08-11,R21,GWS Giants,15,16,106,Adelaide,13,14,...,GW,AD,0,0,0,reg_season,8,16,1,2
15377,15377,2018-08-11,R21,Collingwood,14,20,104,Brisbane,11,7,...,CO,BR,0,0,0,reg_season,8,16,1,2
15378,15378,2018-08-12,R21,North Melbourne,12,13,85,Western Bulldogs,13,14,...,NM,WB,0,0,0,reg_season,8,16,2,1
15379,15379,2018-08-12,R21,Melbourne,10,18,78,Sydney,13,9,...,ME,SY,0,0,0,reg_season,8,16,2,1
15380,15380,2018-08-12,R21,Fremantle,15,11,101,Carlton,10,12,...,FR,CA,0,0,0,reg_season,8,16,1,2


## STAGE TWO
## 9. Reduce and re-order the data frame to prepare for for Long format

In [38]:
tracker = trans
tracker = tracker[['year', 'team1','pts_1','team2','pts_2','venue','t1','t2','hvar']]
tracker.tail()

Unnamed: 0,year,team1,pts_1,team2,pts_2,venue,t1,t2,hvar
15376,2018,GWS Giants,106,Adelaide,92,Manuka Oval,GW,AD,14
15377,2018,Collingwood,104,Brisbane,73,Docklands,CO,BR,31
15378,2018,North Melbourne,85,Western Bulldogs,92,Docklands,NM,WB,-7
15379,2018,Melbourne,78,Sydney,87,M.C.G.,ME,SY,-9
15380,2018,Fremantle,101,Carlton,72,Perth Stadium,FR,CA,29


### Create two new columns with values 1 and 0 for the two subsets

In [39]:
tracker['subset_1'] = 1
tracker['subset_2'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [40]:
tracker.tail(9)

Unnamed: 0,year,team1,pts_1,team2,pts_2,venue,t1,t2,hvar,subset_1,subset_2
15372,2018,Essendon,122,St Kilda,79,Docklands,ES,SK,43,1,0
15373,2018,Hawthorn,71,Geelong,60,M.C.G.,HA,GE,11,1,0
15374,2018,Gold Coast,51,Richmond,125,Carrara,GC,RI,-74,1,0
15375,2018,Port Adelaide,58,West Coast,62,Adelaide Oval,PA,WC,-4,1,0
15376,2018,GWS Giants,106,Adelaide,92,Manuka Oval,GW,AD,14,1,0
15377,2018,Collingwood,104,Brisbane,73,Docklands,CO,BR,31,1,0
15378,2018,North Melbourne,85,Western Bulldogs,92,Docklands,NM,WB,-7,1,0
15379,2018,Melbourne,78,Sydney,87,M.C.G.,ME,SY,-9,1,0
15380,2018,Fremantle,101,Carlton,72,Perth Stadium,FR,CA,29,1,0


In [41]:
droplist_1 = ['subset_2']
droplist_2 = ['subset_1']

In [42]:
left_team = tracker.drop(droplist_1, axis = 1)
right_team = tracker.drop(droplist_2, axis = 1)

In [43]:
left_team.shape

(15380, 10)

In [44]:
right_team.shape

(15380, 10)

# Rename columns in each dframe 

In [45]:
left_team.columns = ['year','team','tm_pts','opp_team','op_pts','venue','tm','op','hvar','lr']
right_team.columns = ['year','opp_team','op_pts','team','tm_pts','venue','op','tm','hvar','lr']

### Now re-order the second dframe to match columns

### Note double brackets to re-order

In [46]:
right_team = right_team[['year', 'team','tm_pts','opp_team','op_pts','venue','tm','op','hvar','lr']]
right_team.tail()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr
15376,2018,Adelaide,92,GWS Giants,106,Manuka Oval,AD,GW,14,0
15377,2018,Brisbane,73,Collingwood,104,Docklands,BR,CO,31,0
15378,2018,Western Bulldogs,92,North Melbourne,85,Docklands,WB,NM,-7,0
15379,2018,Sydney,87,Melbourne,78,M.C.G.,SY,ME,-9,0
15380,2018,Carlton,72,Fremantle,101,Perth Stadium,CA,FR,29,0


In [47]:
right_team.tail()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr
15376,2018,Adelaide,92,GWS Giants,106,Manuka Oval,AD,GW,14,0
15377,2018,Brisbane,73,Collingwood,104,Docklands,BR,CO,31,0
15378,2018,Western Bulldogs,92,North Melbourne,85,Docklands,WB,NM,-7,0
15379,2018,Sydney,87,Melbourne,78,M.C.G.,SY,ME,-9,0
15380,2018,Carlton,72,Fremantle,101,Perth Stadium,CA,FR,29,0


In [48]:
left_team.tail()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr
15376,2018,GWS Giants,106,Adelaide,92,Manuka Oval,GW,AD,14,1
15377,2018,Collingwood,104,Brisbane,73,Docklands,CO,BR,31,1
15378,2018,North Melbourne,85,Western Bulldogs,92,Docklands,NM,WB,-7,1
15379,2018,Melbourne,78,Sydney,87,M.C.G.,ME,SY,-9,1
15380,2018,Fremantle,101,Carlton,72,Perth Stadium,FR,CA,29,1


## CONCATENATION

### Now very easily, can concatenate the two dframes, as in si_week11

In [49]:
merge_ft = pd.concat([left_team, right_team], axis=0)
merge_ft.head()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr
1,1897,Fitzroy,49,Carlton,16,Brunswick St,FI,CA,33,1
2,1897,Collingwood,41,St Kilda,16,Victoria Park,CO,SK,25,1
3,1897,Geelong,24,Essendon,47,Corio Oval,GE,ES,-23,1
4,1897,South Melbourne,27,Melbourne,44,Lake Oval,SM,ME,-17,1
5,1897,South Melbourne,40,Carlton,36,Lake Oval,SM,CA,4,1


#### The new long data frame consisting of 30,000 plus rows:

In [50]:
merge_ft.shape

(30760, 10)

# The following is to keep each game together (two rows per game)

In [51]:
merge_ft.sort_index(inplace=True)
merge_ft.head(10)

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr
1,1897,Fitzroy,49,Carlton,16,Brunswick St,FI,CA,33,1
1,1897,Carlton,16,Fitzroy,49,Brunswick St,CA,FI,33,0
2,1897,Collingwood,41,St Kilda,16,Victoria Park,CO,SK,25,1
2,1897,St Kilda,16,Collingwood,41,Victoria Park,SK,CO,25,0
3,1897,Geelong,24,Essendon,47,Corio Oval,GE,ES,-23,1
3,1897,Essendon,47,Geelong,24,Corio Oval,ES,GE,-23,0
4,1897,South Melbourne,27,Melbourne,44,Lake Oval,SM,ME,-17,1
4,1897,Melbourne,44,South Melbourne,27,Lake Oval,ME,SM,-17,0
5,1897,South Melbourne,40,Carlton,36,Lake Oval,SM,CA,4,1
5,1897,Carlton,36,South Melbourne,40,Lake Oval,CA,SM,4,0


In [78]:
merge_ft.tail()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,neg_dummy,new_lr,tm_var,outcome
15378,2018,North Melbourne,85,Western Bulldogs,92,Docklands,NM,WB,-7,1,7,1,-7,L
15379,2018,Sydney,87,Melbourne,78,M.C.G.,SY,ME,-9,0,9,-1,9,W
15379,2018,Melbourne,78,Sydney,87,M.C.G.,ME,SY,-9,1,9,1,-9,L
15380,2018,Fremantle,101,Carlton,72,Perth Stadium,FR,CA,29,1,-29,1,29,W
15380,2018,Carlton,72,Fremantle,101,Perth Stadium,CA,FR,29,0,-29,-1,-29,L


## There are actually 30,000 plus rows, indexing isn't simple sequential

## Create team result column

In [52]:
def neg_hvar(hvar):
        return -1*hvar

In [53]:
merge_ft["neg_dummy"] = merge_ft['hvar'].apply(neg_hvar)

In [54]:
merge_ft.tail()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,neg_dummy
15378,2018,North Melbourne,85,Western Bulldogs,92,Docklands,NM,WB,-7,1,7
15379,2018,Sydney,87,Melbourne,78,M.C.G.,SY,ME,-9,0,9
15379,2018,Melbourne,78,Sydney,87,M.C.G.,ME,SY,-9,1,9
15380,2018,Fremantle,101,Carlton,72,Perth Stadium,FR,CA,29,1,-29
15380,2018,Carlton,72,Fremantle,101,Perth Stadium,CA,FR,29,0,-29


In [55]:
def team_var(hvar):
    if merge_ft['lr'] == 1:
        return hvar
    else: return neg_dummy

In [56]:
def new_lr(lr):
    if lr == 0:
        return -1
    else: return 1

In [57]:
merge_ft["new_lr"] = merge_ft['lr'].apply(new_lr)
merge_ft["tm_var"] = merge_ft['new_lr']*merge_ft['hvar']

In [58]:
merge_ft.tail()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,neg_dummy,new_lr,tm_var
15378,2018,North Melbourne,85,Western Bulldogs,92,Docklands,NM,WB,-7,1,7,1,-7
15379,2018,Sydney,87,Melbourne,78,M.C.G.,SY,ME,-9,0,9,-1,9
15379,2018,Melbourne,78,Sydney,87,M.C.G.,ME,SY,-9,1,9,1,-9
15380,2018,Fremantle,101,Carlton,72,Perth Stadium,FR,CA,29,1,-29,1,29
15380,2018,Carlton,72,Fremantle,101,Perth Stadium,CA,FR,29,0,-29,-1,-29


### Can now remove the columns new_lr and neg_dummy, which were used to create tm_var

In [79]:
merge_ft = merge_ft.drop(['neg_dummy', 'new_lr'], axis=1)

In [80]:
merge_ft.head()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
1,1897,Fitzroy,49,Carlton,16,Brunswick St,FI,CA,33,1,33,W
1,1897,Carlton,16,Fitzroy,49,Brunswick St,CA,FI,33,0,-33,L
2,1897,Collingwood,41,St Kilda,16,Victoria Park,CO,SK,25,1,25,W
2,1897,St Kilda,16,Collingwood,41,Victoria Park,SK,CO,25,0,-25,L
3,1897,Geelong,24,Essendon,47,Corio Oval,GE,ES,-23,1,-23,L


### Note that the column 'lr' denotes left right, so there are only two different values.

In [81]:
merge_ft['lr'].value_counts()

1    15380
0    15380
Name: lr, dtype: int64

## New function for win/loss/draw (W/L/D)


In [82]:
def outcome(tm_var):
    if tm_var >= 1:
        return "W"
    elif tm_var <= -1:
        return "L"
    elif tm_var == 0:
        return "D"
    else: return "Unknown"

In [83]:
merge_ft["outcome"] = merge_ft['tm_var'].apply(outcome)

In [84]:
merge_ft.head()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
1,1897,Fitzroy,49,Carlton,16,Brunswick St,FI,CA,33,1,33,W
1,1897,Carlton,16,Fitzroy,49,Brunswick St,CA,FI,33,0,-33,L
2,1897,Collingwood,41,St Kilda,16,Victoria Park,CO,SK,25,1,25,W
2,1897,St Kilda,16,Collingwood,41,Victoria Park,SK,CO,25,0,-25,L
3,1897,Geelong,24,Essendon,47,Corio Oval,GE,ES,-23,1,-23,L


In [85]:
merge_ft.tail()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
15378,2018,North Melbourne,85,Western Bulldogs,92,Docklands,NM,WB,-7,1,-7,L
15379,2018,Sydney,87,Melbourne,78,M.C.G.,SY,ME,-9,0,9,W
15379,2018,Melbourne,78,Sydney,87,M.C.G.,ME,SY,-9,1,-9,L
15380,2018,Fremantle,101,Carlton,72,Perth Stadium,FR,CA,29,1,29,W
15380,2018,Carlton,72,Fremantle,101,Perth Stadium,CA,FR,29,0,-29,L


## Example filtering view

# Hawks last 10 games at the Sydney Cricket Ground

In [86]:
merge_ft.iloc[:,:][(merge_ft.tm == 'HA') & (merge_ft.venue == 'S.C.G.')].tail(10)

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
11830,2000,Hawthorn,78,Sydney,109,S.C.G.,HA,SY,31,0,-31,L
12430,2003,Hawthorn,110,Sydney,93,S.C.G.,HA,SY,-17,0,17,W
12537,2004,Hawthorn,79,Sydney,80,S.C.G.,HA,SY,1,0,-1,L
12657,2005,Hawthorn,55,Sydney,118,S.C.G.,HA,SY,63,0,-63,L
13196,2007,Hawthorn,69,Sydney,141,S.C.G.,HA,SY,72,0,-72,L
13724,2010,Hawthorn,85,Sydney,129,S.C.G.,HA,SY,44,0,-44,L
13831,2011,Hawthorn,106,Sydney,60,S.C.G.,HA,SY,-46,0,46,W
14143,2012,Hawthorn,102,Sydney,95,S.C.G.,HA,SY,-7,0,7,W
14922,2016,Hawthorn,75,Sydney,70,S.C.G.,HA,SY,-5,0,5,W
15075,2017,Hawthorn,81,Sydney,75,S.C.G.,HA,SY,-6,0,6,W


## Demon's last 10 games against GWS

In [87]:
merge_ft.iloc[:,:][(merge_ft.tm == 'ME') & (merge_ft.op == 'GW')].tail(10)

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
14066,2012,Melbourne,135,GWS Giants,57,M.C.G.,ME,GW,78,1,78,W
14133,2012,Melbourne,84,GWS Giants,59,Manuka Oval,ME,GW,-25,0,25,W
14200,2013,Melbourne,144,GWS Giants,103,M.C.G.,ME,GW,41,1,41,W
14321,2013,Melbourne,87,GWS Giants,124,Sydney Showground,ME,GW,37,0,-37,L
14398,2014,Melbourne,47,GWS Giants,79,Sydney Showground,ME,GW,32,0,-32,L
14552,2014,Melbourne,34,GWS Giants,98,M.C.G.,ME,GW,-64,1,-64,L
14592,2015,Melbourne,56,GWS Giants,101,Manuka Oval,ME,GW,45,0,-45,L
14776,2015,Melbourne,103,GWS Giants,77,Docklands,ME,GW,26,1,26,W
14788,2016,Melbourne,80,GWS Giants,78,M.C.G.,ME,GW,2,1,2,W
15157,2017,Melbourne,62,GWS Giants,97,Manuka Oval,ME,GW,35,0,-35,L


## Demon's last 10 games against GWS at the MCG

In [89]:
merge_ft.iloc[:,:][(merge_ft.tm == 'ME') & (merge_ft.op == 'GW') & (merge_ft.venue == 'M.C.G.')].tail(4)

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
14066,2012,Melbourne,135,GWS Giants,57,M.C.G.,ME,GW,78,1,78,W
14200,2013,Melbourne,144,GWS Giants,103,M.C.G.,ME,GW,41,1,41,W
14552,2014,Melbourne,34,GWS Giants,98,M.C.G.,ME,GW,-64,1,-64,L
14788,2016,Melbourne,80,GWS Giants,78,M.C.G.,ME,GW,2,1,2,W


## Kangaroos last 8

In [90]:
merge_ft.iloc[:,:][(merge_ft.tm == 'NM') & (merge_ft.year == 2018)].tail(8)

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
15316,2018,North Melbourne,77,Western Bulldogs,75,Docklands,NM,WB,-2,0,2,W
15325,2018,North Melbourne,108,Essendon,125,Docklands,NM,ES,17,0,-17,L
15333,2018,North Melbourne,95,Gold Coast,58,Docklands,NM,GC,37,1,37,W
15343,2018,North Melbourne,98,Sydney,104,Docklands,NM,SY,-6,1,-6,L
15346,2018,North Melbourne,64,Collingwood,130,M.C.G.,NM,CO,66,0,-66,L
15360,2018,North Melbourne,81,West Coast,41,Bellerive Oval,NM,WC,40,1,40,W
15365,2018,North Melbourne,107,Brisbane,104,Gabba,NM,BR,-3,0,3,W
15378,2018,North Melbourne,85,Western Bulldogs,92,Docklands,NM,WB,-7,1,-7,L


## Ballarat games - check venue is shown correctly

In [98]:
merge_ft.iloc[:,:][(merge_ft.tm == 'WB') & (merge_ft.venue != 'Docklands') & (merge_ft.year == 2017)].tail(10)

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
15017,2017,Western Bulldogs,73,Fremantle,89,Subiaco,WB,FR,16,0,-16,L
15039,2017,Western Bulldogs,73,GWS Giants,75,Manuka Oval,WB,GW,2,0,-2,L
15057,2017,Western Bulldogs,61,West Coast,69,Subiaco,WB,WC,8,0,-8,L
15066,2017,Western Bulldogs,81,Geelong,104,Kardinia Park,WB,GE,23,0,-23,L
15089,2017,Western Bulldogs,42,Sydney,88,S.C.G.,WB,SY,46,0,-46,L
15120,2017,Western Bulldogs,45,Adelaide,104,Adelaide Oval,WB,AD,59,0,-59,L
15136,2017,Western Bulldogs,82,Carlton,62,M.C.G.,WB,CA,-20,0,20,W
15141,2017,Western Bulldogs,110,Gold Coast,56,Cazaly's Stadium,WB,GC,54,1,54,W
15159,2017,Western Bulldogs,103,Brisbane,89,Gabba,WB,BR,-14,0,14,W
15175,2017,Western Bulldogs,79,Port Adelaide,96,Eureka Stadium,WB,PA,-17,1,-17,L
