### Data file preparation steps:
    
    * The raw text file from https://afltables.com/afl/stats/biglists/bg3.txt is saved as llist.txt
    * The file llist.txt is then processed through the bash script file a_updater.sh to add commas at the appropriate locations and make other modifications, by typing ./a_updater into Terminal - ready to be imported from within Jupyter.

# Post week 13 of 2019 season

In [1]:
#%%html
#<img src="img/ess_stk.png" alt="Drawing" align="left" style="width: 800px;"/>

In [2]:
import os 
os.getcwd() 

'/Users/stevegabriel/data_projects/sports_data'

In [3]:
import pandas as pd

In [4]:
cols = ['seq','date','round','team1','gls_1','bhs_1','pts_1','team2','gls_2','bhs_2','pts_2','venue']

#### The file will contain all matches starting from the first ever VFL game in 1987 - more than fifteen thousand games.

In [5]:
upd_round = pd.read_csv("newlist.csv", names = cols)
trans = upd_round

In [6]:
# Modify index to start at 1 
trans.index += 1
trans.tail()

Unnamed: 0,seq,date,round,team1,gls_1,bhs_1,pts_1,team2,gls_2,bhs_2,pts_2,venue
15514,15514,14-Jun-2019,R13,Essendon,14,12,96,Hawthorn,11,11,77,Docklands
15515,15515,15-Jun-2019,R13,Gold Coast,11,10,76,St Kilda,11,14,80,Riverway Stadium
15516,15516,15-Jun-2019,R13,Fremantle,15,10,100,Port Adelaide,12,7,79,Perth Stadium
15517,15517,15-Jun-2019,R13,Carlton,15,10,100,Western Bulldogs,15,13,103,Docklands
15518,15518,16-Jun-2019,R13,North Melbourne,10,8,68,GW Sydney,14,7,91,Bellerive Oval


#### Shown above, the most four finals matches played on the weekend.

### Adding additional information including state, and addition team names as two letter codes

In [7]:
# Add states
nsw = ['S.C.G.','Sydney Showground','Stadium Australia','Blacktown','Albury']
qld = ['Gabba','Carrara','Cazaly\'s Stadium','Brisbane Exhibition']
sa = ['Football Park','Adelaide Oval']
wa = ['W.A.C.A.','Perth Stadium','Subiaco']
nt = ['Marrara Oval','Traeger Park']
tas = ['York Park','Bellerive Oval','North Hobart']
act = ['Manuka Oval','Bruce Stadium']
intl = ['Wellington','Jiangwan Stadium']

In [8]:
def locations(x):
    if x in nsw:
        return "NSW"
    elif x in sa:
        return "SA"
    elif x in wa:
        return "WA"
    elif x in nt:
        return "NT"
    elif x in tas:
        return "TAS"
    elif x in intl:
        return "INTL"
    elif x in act:
        return "ACT"
    elif x in qld:
        return "QLD"
    else: 
        return "VIC"

In [9]:
trans["state"] = trans["venue"].apply(locations)
trans["state"].value_counts()

VIC     13095
WA        654
SA        585
NSW       522
QLD       486
TAS        95
ACT        50
NT         25
INTL        6
Name: state, dtype: int64

In [10]:
## 1. HVAR & AGGR
trans['hvar'] = trans['pts_1'] - trans['pts_2']
trans['aggr'] = trans['pts_1'] + trans['pts_2']

In [11]:
## 2. YEAR & MONTH
import time
import datetime

In [12]:
trans['date'] = pd.to_datetime(trans['date'])
trans['year'], trans['month'] = trans['date'].dt.year, trans['date'].dt.month

In [13]:
## 3. ABBREVIATED CLUB NAMES
adelaide = ['Adelaide']
brisbane = ['Brisbane Bears','Brisbane Lions']
carlton = ['Carlton']
collingwood = ['Collingwood']
essendon = ['Essendon']
fitzroy = ['Fitzroy']
footscray = ['Footscray']
fremantle = ['Fremantle']
geelong = ['Geelong']
gold_coast = ['Gold Coast']
gws = ['GW Sydney']
hawthorn = ['Hawthorn']
melbourne = ['Melbourne']
north_melbourne = ['North Melbourne','Kangaroos']
port_adelaide = ['Port Adelaide']
richmond = ['Richmond']
south_melbourne = ['South Melbourne']
st_kilda = ['St Kilda']
sydney = ['Sydney']
west_coast = ['West Coast']
western_bulldogs = ['Western Bulldogs', 'Footscray']
university = ['University']

In [14]:
def team_shorten(x):
    if x in adelaide:
        return "AD"
    elif x in brisbane:
        return "BR"
    elif x in carlton:
        return "CA"
    elif x in collingwood:
        return "CO"
    elif x in essendon:
        return "ES"
    elif x in fitzroy:
        return "FI"
    elif x in fremantle:
        return "FR"
    elif x in geelong:
        return "GE"
    elif x in gold_coast:
        return "GC"
    elif x in gws:
        return "GW"
    elif x in hawthorn:
        return "HA"
    elif x in melbourne:
        return "ME"
    elif x in north_melbourne:
        return "NM"
    elif x in port_adelaide:
        return "PA"
    elif x in richmond:
        return "RI"
    elif x in south_melbourne:
        return "SM"
    elif x in st_kilda:
        return "SK"
    elif x in sydney:
        return "SY"
    elif x in west_coast:
        return "WC"
    elif x in western_bulldogs:
        return "WB"
    elif x in university:
        return "UN"
    else: 
        return "NOT_FOUND"

In [15]:
trans["t1"] = trans["team1"].apply(team_shorten)
trans["t2"] = trans["team2"].apply(team_shorten)

In [16]:
trans['t1'].value_counts().sum()

15518

##  FIX TEAM NAMES ANOMOLIES

* Merge Footscray & Western Bulldogs as **Western Bulldogs**
* Merge North Melbourne with Kangaroos as **North Melbourne**
* Merge Brisbane Lions with Brisbane Bears as **Brisbane**
* Leave Sydney and **South Melbourne** as separate
* Leave **Fitzroy** as separate
* Modify GW Sydney to **GWS Giants**

In [17]:
# Function from Analytics Vidhya
def coding(col, codeDict):
  colCoded = pd.Series(col, copy=True)
  for key, value in codeDict.items():
    colCoded.replace(key, value, inplace=True)
  return colCoded

In [18]:
trans["team1"] = coding(trans["team1"], {'Footscray':'Western Bulldogs'})
trans["team2"] = coding(trans["team2"], {'Footscray':'Western Bulldogs'})
trans["team1"] = coding(trans["team1"], {'Kangaroos':'North Melbourne'})
trans["team2"] = coding(trans["team2"], {'Kangaroos':'North Melbourne'})
trans["team1"] = coding(trans["team1"], {'Brisbane Bears':'Brisbane','Brisbane Lions':'Brisbane'})
trans["team2"] = coding(trans["team2"], {'Brisbane Bears':'Brisbane','Brisbane Lions':'Brisbane'})
trans["team1"] = coding(trans["team1"], {'GW Sydney':'GWS Giants'})
trans["team2"] = coding(trans["team2"], {'GW Sydney':'GWS Giants'})

In [19]:
trans["team2"].value_counts()

Collingwood         1280
Carlton             1258
Essendon            1228
Geelong             1225
Melbourne           1197
St Kilda            1196
Richmond            1126
North Melbourne      992
Hawthorn             986
Western Bulldogs     981
Fitzroy              960
South Melbourne      788
Sydney               438
West Coast           382
Brisbane             365
Adelaide             331
Fremantle            279
Port Adelaide        261
Gold Coast            94
GWS Giants            88
University            63
Name: team2, dtype: int64

#### The dataframe with the additional column. Currently it has one game per row.

In [20]:
trans.head(10)

Unnamed: 0,seq,date,round,team1,gls_1,bhs_1,pts_1,team2,gls_2,bhs_2,pts_2,venue,state,hvar,aggr,year,month,t1,t2
1,1,1897-05-08,R1,Fitzroy,6,13,49,Carlton,2,4,16,Brunswick St,VIC,33,65,1897,5,FI,CA
2,2,1897-05-08,R1,Collingwood,5,11,41,St Kilda,2,4,16,Victoria Park,VIC,25,57,1897,5,CO,SK
3,3,1897-05-08,R1,Geelong,3,6,24,Essendon,7,5,47,Corio Oval,VIC,-23,71,1897,5,GE,ES
4,4,1897-05-08,R1,South Melbourne,3,9,27,Melbourne,6,8,44,Lake Oval,VIC,-17,71,1897,5,SM,ME
5,5,1897-05-15,R2,South Melbourne,6,4,40,Carlton,5,6,36,Lake Oval,VIC,4,76,1897,5,SM,CA
6,6,1897-05-15,R2,Essendon,4,6,30,Collingwood,8,2,50,East Melbourne,VIC,-20,80,1897,5,ES,CO
7,7,1897-05-15,R2,St Kilda,3,8,26,Fitzroy,10,6,66,Junction Oval,VIC,-40,92,1897,5,SK,FI
8,8,1897-05-15,R2,Melbourne,9,10,64,Geelong,3,1,19,M.C.G.,VIC,45,83,1897,5,ME,GE
9,9,1897-05-22,R3,Collingwood,6,5,41,Geelong,5,7,37,Victoria Park,VIC,4,78,1897,5,CO,GE
10,10,1897-05-22,R3,Fitzroy,5,9,39,Melbourne,7,8,50,Brunswick St,VIC,-11,89,1897,5,FI,ME


## ALL DRAWS (BOOLEAN)

In [21]:
def reg_draw(hvar):
    if hvar == 0:
        return 1
    else: return 0

In [22]:
trans["draw_rt"] = trans["hvar"].apply(reg_draw)

In [23]:
trans['draw_rt'].value_counts()

0    15360
1      158
Name: draw_rt, dtype: int64

### The 158 draws above, represent the VFL/AFL games that have concluded in a draw result

### The two ET (extra-time) draws below are statistically relevent, since they also ended regular full-time with both teams on the same score. However extra-time was applied to create a result, due to finals constraints.

In [24]:
def et_draw(seq):
    if seq == 10794:
        return 1
    elif seq == 13203:
        return 1
    else: return 0

In [25]:
trans["etime_draw"] = trans["seq"].apply(et_draw)
trans['etime_draw'].value_counts()

0    15516
1        2
Name: etime_draw, dtype: int64

In [26]:
### Add the two columns
trans['draws_all']  = trans['draw_rt'] + trans['etime_draw']
trans['draws_all'].value_counts()

0    15358
1      160
Name: draws_all, dtype: int64

In [27]:
# Check
trans.iloc[:,:][(trans.etime_draw == 1)]

Unnamed: 0,seq,date,round,team1,gls_1,bhs_1,pts_1,team2,gls_2,bhs_2,...,state,hvar,aggr,year,month,t1,t2,draw_rt,etime_draw,draws_all
10794,10794,1994-09-10,QF,North Melbourne,15,24,114,Hawthorn,13,13,...,VIC,23,205,1994,9,NM,HA,0,1,1
13203,13203,2007-09-14,SF,Collingwood,13,15,93,West Coast,10,14,...,WA,19,167,2007,9,CO,WC,0,1,1


In [28]:
# 6. GAME CATEGORY
finals = ['EF','QF','SF','PF','GF']

In [29]:
def game_cat(x):
    if x in finals:
        return "final"
    else: 
        return "reg_season"

In [30]:
trans["type"] = trans["round"].apply(game_cat)

In [31]:
trans['type'].value_counts()

reg_season    14863
final           655
Name: type, dtype: int64

### So, there have been 650 finals up to the end of week one of 2018 finals series.

In [32]:
def grouping_2K(seq):
    n=500
    if seq <= 4*n:
        return 1
    elif seq <= 8*n:
        return 2
    elif seq <= 12*n:
        return 3
    elif seq <= 16*n:
        return 4
    elif seq <= 20*n:
        return 5
    elif seq <= 24*n:
        return 6
    elif seq <= 28*n:
        return 7
    else: return 8

In [33]:
trans["grp_2K"] = trans["seq"].apply(grouping_2K)

In [34]:
def grouping_1K(seq):
    n=1000
    if seq <= n:
        return 1
    elif seq <= 2*n:
        return 2
    elif seq <= 3*n:
        return 3
    elif seq <= 4*n:
        return 4
    elif seq <= 5*n:
        return 5
    elif seq <= 6*n:
        return 6
    elif seq <= 7*n:
        return 7
    elif seq <= 8*n:
        return 8
    elif seq <= 9*n:
        return 9
    elif seq <= 10*n:
        return 10
    elif seq <= 11*n:
        return 11
    elif seq <= 12*n:
        return 12
    elif seq <= 13*n:
        return 13
    elif seq <= 14*n:
        return 14
    elif seq <= 15*n:
        return 15
    else: return 16

In [35]:
trans["grp_1K"] = trans["seq"].apply(grouping_1K)

In [36]:
# Export the data frame for use in afl draws EDA
trans.to_csv('all_afl_wide.csv')

## ADD res1 and res2 columns as categorical 1 = win, 2 = loss, 3 = draw based on hvar (ignoring the two extra time finals)

In [37]:
def result_cat_h(hvar):
    if hvar > 0:
        return 1
    elif hvar < 0:
        return 2
    else: return 3

In [38]:
def result_cat_a(hvar):
    if hvar > 0:
        return 2
    elif hvar < 0:
        return 1
    else: return 3

In [39]:
trans['res1'] = trans['hvar'].apply(result_cat_h)
trans['res2'] = trans['hvar'].apply(result_cat_a)

In [40]:
trans.tail(9)

Unnamed: 0,seq,date,round,team1,gls_1,bhs_1,pts_1,team2,gls_2,bhs_2,...,t1,t2,draw_rt,etime_draw,draws_all,type,grp_2K,grp_1K,res1,res2
15510,15510,2019-06-08,R12,Adelaide,12,10,82,GWS Giants,9,7,...,AD,GW,0,0,0,reg_season,8,16,1,2
15511,15511,2019-06-09,R12,Sydney,18,8,116,West Coast,10,11,...,SY,WC,0,0,0,reg_season,8,16,1,2
15512,15512,2019-06-10,R12,Collingwood,15,8,98,Melbourne,7,15,...,CO,ME,0,0,0,reg_season,8,16,1,2
15513,15513,2019-06-13,R13,Adelaide,15,11,101,Richmond,9,14,...,AD,RI,0,0,0,reg_season,8,16,1,2
15514,15514,2019-06-14,R13,Essendon,14,12,96,Hawthorn,11,11,...,ES,HA,0,0,0,reg_season,8,16,1,2
15515,15515,2019-06-15,R13,Gold Coast,11,10,76,St Kilda,11,14,...,GC,SK,0,0,0,reg_season,8,16,2,1
15516,15516,2019-06-15,R13,Fremantle,15,10,100,Port Adelaide,12,7,...,FR,PA,0,0,0,reg_season,8,16,1,2
15517,15517,2019-06-15,R13,Carlton,15,10,100,Western Bulldogs,15,13,...,CA,WB,0,0,0,reg_season,8,16,2,1
15518,15518,2019-06-16,R13,North Melbourne,10,8,68,GWS Giants,14,7,...,NM,GW,0,0,0,reg_season,8,16,2,1


## STAGE TWO
## 9. Reduce and re-order the data frame to prepare for for Long format

In [41]:
tracker = trans
tracker = tracker[['year', 'team1','pts_1','team2','pts_2','venue','t1','t2','hvar']]
tracker.tail()

Unnamed: 0,year,team1,pts_1,team2,pts_2,venue,t1,t2,hvar
15514,2019,Essendon,96,Hawthorn,77,Docklands,ES,HA,19
15515,2019,Gold Coast,76,St Kilda,80,Riverway Stadium,GC,SK,-4
15516,2019,Fremantle,100,Port Adelaide,79,Perth Stadium,FR,PA,21
15517,2019,Carlton,100,Western Bulldogs,103,Docklands,CA,WB,-3
15518,2019,North Melbourne,68,GWS Giants,91,Bellerive Oval,NM,GW,-23


### Create two new columns with values 1 and 0 for the two subsets

In [42]:
tracker['subset_1'] = 1
tracker['subset_2'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [43]:
tracker.tail(9)

Unnamed: 0,year,team1,pts_1,team2,pts_2,venue,t1,t2,hvar,subset_1,subset_2
15510,2019,Adelaide,82,GWS Giants,61,Adelaide Oval,AD,GW,21,1,0
15511,2019,Sydney,116,West Coast,71,S.C.G.,SY,WC,45,1,0
15512,2019,Collingwood,98,Melbourne,57,M.C.G.,CO,ME,41,1,0
15513,2019,Adelaide,101,Richmond,68,Adelaide Oval,AD,RI,33,1,0
15514,2019,Essendon,96,Hawthorn,77,Docklands,ES,HA,19,1,0
15515,2019,Gold Coast,76,St Kilda,80,Riverway Stadium,GC,SK,-4,1,0
15516,2019,Fremantle,100,Port Adelaide,79,Perth Stadium,FR,PA,21,1,0
15517,2019,Carlton,100,Western Bulldogs,103,Docklands,CA,WB,-3,1,0
15518,2019,North Melbourne,68,GWS Giants,91,Bellerive Oval,NM,GW,-23,1,0


In [44]:
droplist_1 = ['subset_2']
droplist_2 = ['subset_1']

In [45]:
left_team = tracker.drop(droplist_1, axis = 1)
right_team = tracker.drop(droplist_2, axis = 1)

In [46]:
left_team.shape

(15518, 10)

In [47]:
right_team.shape

(15518, 10)

# Rename columns in each dframe 

In [48]:
left_team.columns = ['year','team','tm_pts','opp_team','op_pts','venue','tm','op','hvar','lr']
right_team.columns = ['year','opp_team','op_pts','team','tm_pts','venue','op','tm','hvar','lr']

### Now re-order the second dframe to match columns

### Note double brackets to re-order

In [49]:
right_team = right_team[['year', 'team','tm_pts','opp_team','op_pts','venue','tm','op','hvar','lr']]
right_team.tail()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr
15514,2019,Hawthorn,77,Essendon,96,Docklands,HA,ES,19,0
15515,2019,St Kilda,80,Gold Coast,76,Riverway Stadium,SK,GC,-4,0
15516,2019,Port Adelaide,79,Fremantle,100,Perth Stadium,PA,FR,21,0
15517,2019,Western Bulldogs,103,Carlton,100,Docklands,WB,CA,-3,0
15518,2019,GWS Giants,91,North Melbourne,68,Bellerive Oval,GW,NM,-23,0


In [50]:
right_team.tail()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr
15514,2019,Hawthorn,77,Essendon,96,Docklands,HA,ES,19,0
15515,2019,St Kilda,80,Gold Coast,76,Riverway Stadium,SK,GC,-4,0
15516,2019,Port Adelaide,79,Fremantle,100,Perth Stadium,PA,FR,21,0
15517,2019,Western Bulldogs,103,Carlton,100,Docklands,WB,CA,-3,0
15518,2019,GWS Giants,91,North Melbourne,68,Bellerive Oval,GW,NM,-23,0


In [51]:
left_team.tail()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr
15514,2019,Essendon,96,Hawthorn,77,Docklands,ES,HA,19,1
15515,2019,Gold Coast,76,St Kilda,80,Riverway Stadium,GC,SK,-4,1
15516,2019,Fremantle,100,Port Adelaide,79,Perth Stadium,FR,PA,21,1
15517,2019,Carlton,100,Western Bulldogs,103,Docklands,CA,WB,-3,1
15518,2019,North Melbourne,68,GWS Giants,91,Bellerive Oval,NM,GW,-23,1


## CONCATENATION

### Can now concatenate the two dframes, as in si_week11

In [52]:
merge_ft = pd.concat([left_team, right_team], axis=0)
merge_ft.head()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr
1,1897,Fitzroy,49,Carlton,16,Brunswick St,FI,CA,33,1
2,1897,Collingwood,41,St Kilda,16,Victoria Park,CO,SK,25,1
3,1897,Geelong,24,Essendon,47,Corio Oval,GE,ES,-23,1
4,1897,South Melbourne,27,Melbourne,44,Lake Oval,SM,ME,-17,1
5,1897,South Melbourne,40,Carlton,36,Lake Oval,SM,CA,4,1


#### The new long data frame consisting of 30,000 plus rows:

In [53]:
merge_ft.shape

(31036, 10)

# The following is to keep each game together (two rows per game)

In [54]:
merge_ft.sort_index(inplace=True)
merge_ft.head(10)

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr
1,1897,Fitzroy,49,Carlton,16,Brunswick St,FI,CA,33,1
1,1897,Carlton,16,Fitzroy,49,Brunswick St,CA,FI,33,0
2,1897,Collingwood,41,St Kilda,16,Victoria Park,CO,SK,25,1
2,1897,St Kilda,16,Collingwood,41,Victoria Park,SK,CO,25,0
3,1897,Geelong,24,Essendon,47,Corio Oval,GE,ES,-23,1
3,1897,Essendon,47,Geelong,24,Corio Oval,ES,GE,-23,0
4,1897,South Melbourne,27,Melbourne,44,Lake Oval,SM,ME,-17,1
4,1897,Melbourne,44,South Melbourne,27,Lake Oval,ME,SM,-17,0
5,1897,South Melbourne,40,Carlton,36,Lake Oval,SM,CA,4,1
5,1897,Carlton,36,South Melbourne,40,Lake Oval,CA,SM,4,0


In [55]:
merge_ft.tail()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr
15516,2019,Fremantle,100,Port Adelaide,79,Perth Stadium,FR,PA,21,1
15517,2019,Western Bulldogs,103,Carlton,100,Docklands,WB,CA,-3,0
15517,2019,Carlton,100,Western Bulldogs,103,Docklands,CA,WB,-3,1
15518,2019,North Melbourne,68,GWS Giants,91,Bellerive Oval,NM,GW,-23,1
15518,2019,GWS Giants,91,North Melbourne,68,Bellerive Oval,GW,NM,-23,0


## There are actually 30,804 rows, indexing isn't  sequential

## Create team result column

In [56]:
def neg_hvar(hvar):
        return -1*hvar

In [57]:
merge_ft["neg_dummy"] = merge_ft['hvar'].apply(neg_hvar)

In [58]:
merge_ft.tail()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,neg_dummy
15516,2019,Fremantle,100,Port Adelaide,79,Perth Stadium,FR,PA,21,1,-21
15517,2019,Western Bulldogs,103,Carlton,100,Docklands,WB,CA,-3,0,3
15517,2019,Carlton,100,Western Bulldogs,103,Docklands,CA,WB,-3,1,3
15518,2019,North Melbourne,68,GWS Giants,91,Bellerive Oval,NM,GW,-23,1,23
15518,2019,GWS Giants,91,North Melbourne,68,Bellerive Oval,GW,NM,-23,0,23


In [59]:
def team_var(hvar):
    if merge_ft['lr'] == 1:
        return hvar
    else: return neg_dummy

In [60]:
def new_lr(lr):
    if lr == 0:
        return -1
    else: return 1

In [61]:
merge_ft["new_lr"] = merge_ft['lr'].apply(new_lr)
merge_ft["tm_var"] = merge_ft['new_lr']*merge_ft['hvar']

In [62]:
merge_ft.tail()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,neg_dummy,new_lr,tm_var
15516,2019,Fremantle,100,Port Adelaide,79,Perth Stadium,FR,PA,21,1,-21,1,21
15517,2019,Western Bulldogs,103,Carlton,100,Docklands,WB,CA,-3,0,3,-1,3
15517,2019,Carlton,100,Western Bulldogs,103,Docklands,CA,WB,-3,1,3,1,-3
15518,2019,North Melbourne,68,GWS Giants,91,Bellerive Oval,NM,GW,-23,1,23,1,-23
15518,2019,GWS Giants,91,North Melbourne,68,Bellerive Oval,GW,NM,-23,0,23,-1,23


### Can now remove the columns new_lr and neg_dummy, which were used to create tm_var

In [63]:
merge_ft = merge_ft.drop(['neg_dummy', 'new_lr'], axis=1)

In [64]:
merge_ft.head()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var
1,1897,Fitzroy,49,Carlton,16,Brunswick St,FI,CA,33,1,33
1,1897,Carlton,16,Fitzroy,49,Brunswick St,CA,FI,33,0,-33
2,1897,Collingwood,41,St Kilda,16,Victoria Park,CO,SK,25,1,25
2,1897,St Kilda,16,Collingwood,41,Victoria Park,SK,CO,25,0,-25
3,1897,Geelong,24,Essendon,47,Corio Oval,GE,ES,-23,1,-23


### Note that the column 'lr' denotes left right, so there are only two different values (1 denotes left)

In [65]:
merge_ft['lr'].value_counts()

1    15518
0    15518
Name: lr, dtype: int64

## New function for win/loss/draw (W/L/D)


In [66]:
def outcome(tm_var):
    if tm_var >= 1:
        return "W"
    elif tm_var <= -1:
        return "L"
    elif tm_var == 0:
        return "D"
    else: return "Unknown"

In [67]:
merge_ft["outcome"] = merge_ft['tm_var'].apply(outcome)

In [68]:
merge_ft.head()

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
1,1897,Fitzroy,49,Carlton,16,Brunswick St,FI,CA,33,1,33,W
1,1897,Carlton,16,Fitzroy,49,Brunswick St,CA,FI,33,0,-33,L
2,1897,Collingwood,41,St Kilda,16,Victoria Park,CO,SK,25,1,25,W
2,1897,St Kilda,16,Collingwood,41,Victoria Park,SK,CO,25,0,-25,L
3,1897,Geelong,24,Essendon,47,Corio Oval,GE,ES,-23,1,-23,L


In [69]:
merge_ft.tail(9)

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
15514,2019,Essendon,96,Hawthorn,77,Docklands,ES,HA,19,1,19,W
15515,2019,St Kilda,80,Gold Coast,76,Riverway Stadium,SK,GC,-4,0,4,W
15515,2019,Gold Coast,76,St Kilda,80,Riverway Stadium,GC,SK,-4,1,-4,L
15516,2019,Port Adelaide,79,Fremantle,100,Perth Stadium,PA,FR,21,0,-21,L
15516,2019,Fremantle,100,Port Adelaide,79,Perth Stadium,FR,PA,21,1,21,W
15517,2019,Western Bulldogs,103,Carlton,100,Docklands,WB,CA,-3,0,3,W
15517,2019,Carlton,100,Western Bulldogs,103,Docklands,CA,WB,-3,1,-3,L
15518,2019,North Melbourne,68,GWS Giants,91,Bellerive Oval,NM,GW,-23,1,-23,L
15518,2019,GWS Giants,91,North Melbourne,68,Bellerive Oval,GW,NM,-23,0,23,W


## Research for Week 14 of 2019 using Pandas filtering:
### * WC v Essendon
### * Sydney v Hawthorn

### Recent contests Sydney v Hawthorn

In [74]:
merge_ft.iloc[:,:][(merge_ft.tm == 'SY') & (merge_ft.op == 'HA')].tail(8)

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
14648,2015,Sydney,73,Hawthorn,69,M.C.G.,SY,HA,-4,0,4,W
14710,2015,Sydney,57,Hawthorn,146,Stadium Australia,SY,HA,-89,1,-89,L
14859,2016,Sydney,69,Hawthorn,55,M.C.G.,SY,HA,-14,0,14,W
14922,2016,Sydney,70,Hawthorn,75,S.C.G.,SY,HA,-5,1,-5,L
15075,2017,Sydney,75,Hawthorn,81,S.C.G.,SY,HA,-6,1,-6,L
15147,2017,Sydney,66,Hawthorn,72,M.C.G.,SY,HA,6,0,-6,L
15264,2018,Sydney,79,Hawthorn,71,M.C.G.,SY,HA,-8,0,8,W
15395,2018,Sydney,74,Hawthorn,83,S.C.G.,SY,HA,-9,1,-9,L


### Recent contests West Coast v Essendon in W.A.

In [75]:
merge_ft.iloc[:,:][(merge_ft.tm == 'WC') & (merge_ft.op == 'ES')].tail(12)

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
13602,2010,West Coast,101,Essendon,78,Subiaco,WC,ES,23,1,23,W
13701,2010,West Coast,132,Essendon,100,Docklands,WC,ES,-32,0,32,W
13814,2011,West Coast,90,Essendon,106,Docklands,WC,ES,16,0,-16,L
13929,2011,West Coast,134,Essendon,77,Subiaco,WC,ES,57,1,57,W
14019,2012,West Coast,52,Essendon,113,Docklands,WC,ES,61,0,-61,L
14275,2013,West Coast,91,Essendon,98,Subiaco,WC,ES,-7,1,-7,L
14335,2013,West Coast,120,Essendon,67,Docklands,WC,ES,-53,0,53,W
14548,2014,West Coast,74,Essendon,77,Docklands,WC,ES,3,0,-3,L
14673,2015,West Coast,96,Essendon,46,Subiaco,WC,ES,50,1,50,W
14907,2016,West Coast,130,Essendon,52,Subiaco,WC,ES,78,1,78,W


## Stk v Bris in Melb

In [78]:
merge_ft.iloc[:,:][(merge_ft.tm == 'SK') & (merge_ft.op == 'BR') & (merge_ft.venue == 'Docklands')].tail(10)

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
11771,2000,St Kilda,66,Brisbane,105,Docklands,SK,BR,-39,1,-39,L
12365,2003,St Kilda,90,Brisbane,85,Docklands,SK,BR,5,1,5,W
12511,2004,St Kilda,92,Brisbane,91,Docklands,SK,BR,1,1,1,W
12824,2005,St Kilda,186,Brisbane,47,Docklands,SK,BR,139,1,139,W
12854,2006,St Kilda,124,Brisbane,87,Docklands,SK,BR,37,1,37,W
13463,2009,St Kilda,97,Brisbane,81,Docklands,SK,BR,16,1,16,W
14982,2016,St Kilda,161,Brisbane,103,Docklands,SK,BR,58,1,58,W
15018,2017,St Kilda,107,Brisbane,76,Docklands,SK,BR,31,1,31,W
15203,2018,St Kilda,107,Brisbane,82,Docklands,SK,BR,25,1,25,W


## PA v Gee in Adelaide

In [81]:
merge_ft.iloc[:,:][(merge_ft.tm == 'PA') & (merge_ft.op == 'GE') & (merge_ft.venue != 'Kardinia Park')].tail(10)

Unnamed: 0,year,team,tm_pts,opp_team,op_pts,venue,tm,op,hvar,lr,tm_var,outcome
12685,2005,Port Adelaide,95,Geelong,99,Football Park,PA,GE,-4,1,-4,L
13094,2007,Port Adelaide,60,Geelong,116,Football Park,PA,GE,-56,1,-56,L
13207,2007,Port Adelaide,44,Geelong,163,M.C.G.,PA,GE,119,0,-119,L
13209,2008,Port Adelaide,96,Geelong,105,Football Park,PA,GE,-9,1,-9,L
14240,2013,Port Adelaide,68,Geelong,116,Football Park,PA,GE,-48,1,-48,L
14369,2013,Port Adelaide,80,Geelong,96,M.C.G.,PA,GE,16,0,-16,L
14427,2014,Port Adelaide,107,Geelong,67,Adelaide Oval,PA,GE,40,1,40,W
14671,2015,Port Adelaide,69,Geelong,92,Adelaide Oval,PA,GE,-23,1,-23,L
14827,2016,Port Adelaide,59,Geelong,107,Adelaide Oval,PA,GE,-48,1,-48,L
15240,2018,Port Adelaide,50,Geelong,84,Adelaide Oval,PA,GE,-34,1,-34,L
