In [2]:
import urllib.request
import csv
import glob
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup
from io import StringIO
import numpy as np
import os

# Scrape Fanfooty data
This notebook is used to scrape the following data from fanfooty:

1. Current player list
2. Match stats for each player
3. Match results/fixture

## REQUIRED - Specify match IDs to scrape:

In [27]:
start_match = 8455
end_match = 8465

# start_match = 7901
# end_match = 8044

### Create folder to save all outputs

In [28]:
timestr = time.strftime("%Y%m%d-%H%M%S")
destination = "exports/scrape_{}".format(timestr)
os.mkdir(destination)

## 1. Current player list

### Save historical fanfooty player IDs csv file

In [29]:
url = "https://www.fanfooty.com.au/resource/player.php?type=all"
url_headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36"
}
headers = "fanfooty_player_id,fanfooty_afl_player_id,fanfooty_first_name,fanfooty_surname,fanfooty_team,fanfooty_status,fanfooty_number,fanfooty_birth_date,fanfooty_height,fanfooty_weight,fanfooty_state_of_origin,fanfooty_recruited_from,fanfooty_games,fanfooty_goals"

res = requests.get(url, headers=url_headers,verify=False)
split_rows_list = res.text.split('"\r\n')
f = open('{}/fanfooty_player_ids_historical.csv'.format(destination),'w')
f.write('{}\n'.format(headers)) 
for row in split_rows_list:
    f.write('{}\n'.format(row)) 
f.close()



### Clean data:
* Remove duplicate players
* Standard team name

In [30]:
df_player_list = pd.read_csv('{}/fanfooty_player_ids_historical.csv'.format(destination), encoding = "ISO-8859-1")
df_team_names = pd.read_csv('inputs/all_sources_team_names.csv')
df_player_list['short_team'] = df_player_list['fanfooty_team'].map(dict(df_team_names[['fanfooty_team_name_mid', 'sc_team_name']].values))
df_player_list = df_player_list.dropna(subset=['fanfooty_afl_player_id'])
df_player_list = df_player_list[df_player_list['fanfooty_afl_player_id'] != 0]

remove_duplicate_players = [
    {'player_id': 271128, 'first_name': 'Danny', 'surname': 'Butcher', 'team': 'Port Adelaide'},
    {'player_id': 296214, 'first_name': 'Jake', 'surname': 'Barratt', 'team': 'GWS'},
    {'player_id': 991933, 'first_name': 'Jason', 'surname': 'Cantstandya', 'team': 'Richmond'},
    {'player_id': 992351, 'first_name': 'Bailey', 'surname': 'Williams', 'team': 'None'}
]

for player in remove_duplicate_players:
    indexNames = df_player_list[
        (df_player_list['fanfooty_afl_player_id'] == player['player_id']) & 
        (df_player_list['fanfooty_first_name'] == player['first_name']) &
        (df_player_list['fanfooty_surname'] == player['surname']) &
        (df_player_list['fanfooty_team'] == player['team'])
    ].index
    df_player_list.drop(indexNames , inplace=True)

df_player_list.to_csv('{}/fanfooty_player_ids_historical.csv'.format(destination))
df_player_list

ParserError: Error tokenizing data. C error: EOF inside string starting at row 1

## 2. Match stats for each player

### Scrape match files from Fanfooty website

In [31]:
def return_list_of_urls(match_id):
    full_url_list = []
    for match in match_id:
        url = "https://www.fanfooty.com.au/live/"
        extension = ".txt"
        full_url = "{}{}{}".format(url, match, extension)
        full_url_list.append(full_url)
    return full_url_list

matches = list(range(start_match, end_match + 1))

list_of_urls = return_list_of_urls(matches)

for url in list_of_urls:
    print(url)

for url in list_of_urls:
    response = requests.get(url, headers=url_headers, verify=False)
    webContent = response.text
    filename = url[-8:]
    f = open("inputs/All Match Data/{}".format(filename), 'w', encoding="utf-8")
    f.write(webContent)
    print(filename)

https://www.fanfooty.com.au/live/8455.txt
https://www.fanfooty.com.au/live/8456.txt
https://www.fanfooty.com.au/live/8457.txt
https://www.fanfooty.com.au/live/8458.txt
https://www.fanfooty.com.au/live/8459.txt
https://www.fanfooty.com.au/live/8460.txt
https://www.fanfooty.com.au/live/8461.txt
https://www.fanfooty.com.au/live/8462.txt
https://www.fanfooty.com.au/live/8463.txt
https://www.fanfooty.com.au/live/8464.txt
https://www.fanfooty.com.au/live/8465.txt
8455.txt




8456.txt
8457.txt




8458.txt
8459.txt




8460.txt
8461.txt




8462.txt
8463.txt




8464.txt
8465.txt




### Headers of each field in match file

In [32]:
column_header_names = [
    'Fanfooty Match ID',
    'Fanfooty Match URL',
    'Round',
    'Year',
    'Player ID',
    'First Name',
    'Surname',
    'Team',
    'null',
    'DT',
    'SC',
    'null2',
    'null3',
    'null4',
    'Kicks',
    'Handballs',
    'Marks',
    'Tackles',
    'Hitouts',
    'Frees for',
    'Frees against',
    'Goals',
    'Behinds',
    'Not sure',
    'Tag',
    'Tag Notes',
    'Tag 2',
    'Tag 2 Notes',
    'null5',
    'null6',
    'null7',
    'null8',
    'Position',
    'Jumper Number',
    'null9',
    'null10',
    'null11',
    'DT own %',
    'SC own %',
    'AF own %',
    'null12',
    'AF Breakeven',
    'null13',
    'Contested Possessions',
    'Clearances',
    'Clangers',
    'Disposal efficiency',
    'Time on ground',
    'Metres gained'
    ,'Bench staus'
]

### Read match files and write to csv

In [34]:
df_fanfooty_player_raw = pd.DataFrame()
def get_number_of_lines_in_file(data):
    return len(data.split('\n'))


def get_match_id(data):
    name = data.split('\n', 1)[0]
    return name[-8:-4]


def get_url_of_match(data):
    name = data.split('\n', 1)[0]
    url = "http://live.fanfooty.com.au/game/matchcentre.html?id=" + name[-8:-4]
    return url


def get_round(data):
    line = data.split('\n', 1)[1]
    stripped_line = [x.strip() for x in line.split(',')]
    afl_round = stripped_line[4]
    return afl_round


def get_year(data):
    second_line = data.splitlines()[2]
    stripped_second_line = [x.strip() for x in second_line.split(',')]
    afl_year = stripped_second_line[1]
    return afl_year


def get_match_data_list():
    data_list = []
    path = "inputs/All Match Data/*.txt"

    for item in glob.glob(path):
        file = open(item, 'r')
        name = file.name
        data = file.read()
        data_list.append(name + '\n' + data)
    return data_list


def return_player_match_data(data_list):
    player_data_for_match = []

    for match in data_list:
        match = os.linesep.join([s for s in match.splitlines() if s])
        print(match.split('\n', 1)[0])
        number_of_lines = get_number_of_lines_in_file(match)
        afl_round = get_round(match)
        afl_year = get_year(match)
        name = get_url_of_match(match)
        match_id = get_match_id(match)

        for line in range(5, number_of_lines - 1):
            line_data = match.splitlines()[line]
            line_data = [x.strip() for x in line_data.split(',')]
            line_data = [match_id] + [name] + [afl_round] + [afl_year] + line_data
            player_data_for_match.append(line_data)
    return player_data_for_match


match_data_list = get_match_data_list()
player_data = return_player_match_data(match_data_list)
file_name = "fanfooty_match_data_{}.csv".format(timestr)
with open("{}/{}".format(destination, file_name), "w", newline='') as f:
    writer = csv.writer(f)
    writer.writerow(column_header_names)
    for item in player_data:
        writer.writerow(item)

df_fanfooty_player_raw = pd.read_csv("{}/{}".format(destination, file_name), error_bad_lines=False)
df_fanfooty_player_raw

inputs/All Match Data\3425.txt
inputs/All Match Data\3426.txt
inputs/All Match Data\3427.txt
inputs/All Match Data\3428.txt
inputs/All Match Data\3429.txt
inputs/All Match Data\3430.txt
inputs/All Match Data\3431.txt
inputs/All Match Data\3432.txt
inputs/All Match Data\3433.txt
inputs/All Match Data\3434.txt
inputs/All Match Data\3435.txt
inputs/All Match Data\3436.txt
inputs/All Match Data\3437.txt
inputs/All Match Data\3438.txt
inputs/All Match Data\3439.txt
inputs/All Match Data\3440.txt
inputs/All Match Data\3441.txt
inputs/All Match Data\3442.txt
inputs/All Match Data\3443.txt
inputs/All Match Data\3444.txt
inputs/All Match Data\3445.txt
inputs/All Match Data\3446.txt
inputs/All Match Data\3447.txt
inputs/All Match Data\3448.txt
inputs/All Match Data\3449.txt
inputs/All Match Data\3450.txt
inputs/All Match Data\3451.txt
inputs/All Match Data\3452.txt
inputs/All Match Data\3453.txt
inputs/All Match Data\3454.txt
inputs/All Match Data\3455.txt
inputs/All Match Data\3456.txt
inputs/A



  df_fanfooty_player_raw = pd.read_csv("{}/{}".format(destination, file_name), error_bad_lines=False)
  df_fanfooty_player_raw = pd.read_csv("{}/{}".format(destination, file_name), error_bad_lines=False)


Unnamed: 0,Fanfooty Match ID,Fanfooty Match URL,Round,Year,Player ID,First Name,Surname,Team,null,DT,...,null12,AF Breakeven,null13,Contested Possessions,Clearances,Clangers,Disposal efficiency,Time on ground,Metres gained,Bench staus
0,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,990020.0,Andrew,Embley,WC,30.0,111.0,...,,,,,,,,,,
1,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,230254.0,Adam,Selwood,WC,50.0,107.0,...,,,,,,,,,,
2,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,200112.0,Dean,Cox,WC,27.0,99.0,...,,,,,,,,,,
3,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,240016.0,Beau,Waters,WC,26.0,98.0,...,,,,,,,,,,
4,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,261911.0,Brad,Ebert,WC,26.0,94.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122024,634.0,http://live.fanfooty.com.au/game/matchcentre.h...,GF,2023,293581.0,Lincoln,McCarthy,BL,9.0,40.0,...,0.0,,,2.0,0.0,2.0,71.0,74.0,220.0,1.0
122025,634.0,http://live.fanfooty.com.au/game/matchcentre.h...,GF,2023,990609.0,Charlie,Cameron,BL,13.0,40.0,...,0.0,,,4.0,0.0,2.0,100.0,89.0,111.0,0.0
122026,634.0,http://live.fanfooty.com.au/game/matchcentre.h...,GF,2023,1002235.0,Cam,Rayner,BL,3.0,35.0,...,0.0,,,4.0,2.0,4.0,63.0,75.0,248.0,0.0
122027,634.0,http://live.fanfooty.com.au/game/matchcentre.h...,GF,2023,1023708.0,Jaspa,Fletcher,BL,2.0,32.0,...,0.0,,,6.0,0.0,2.0,66.0,64.0,132.0,1.0


### Clean player data

In [35]:
df_fanfooty_player_raw['SC'] = pd.to_numeric(df_fanfooty_player_raw['SC'], errors='coerce')
df_fanfooty_player_raw = df_fanfooty_player_raw.dropna(subset=['SC'])
df_fanfooty_player_raw['SC'] = df_fanfooty_player_raw['SC'].astype('int64')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fanfooty_player_raw['SC'] = df_fanfooty_player_raw['SC'].astype('int64')


### Identify when players were injured during a match
Fanfooty has amazing "tags" that can be used to identify when a player has been injured during a match

If they have certain tags (e.g. concussed) and score below 80 supercoach points, they are judged as injured.

In [36]:
injured_tags = [
    'sore',
    'injured',
    'longterminjured',
    'concussed',
    'heart',
    'subbed'
]

def get_injured_status(row):
#     if (row['Tag'] in injured_tags or row['Tag 2'] in injured_tags) and row['SC'] < 80:
    if (row['Tag'] in injured_tags or row['Tag 2'] in injured_tags):
        return True
    else:
        return False

df_fanfooty_player_raw['Injured'] = df_fanfooty_player_raw.apply(lambda row: get_injured_status(row), axis=1)
df_fanfooty_player_raw

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fanfooty_player_raw['Injured'] = df_fanfooty_player_raw.apply(lambda row: get_injured_status(row), axis=1)


Unnamed: 0,Fanfooty Match ID,Fanfooty Match URL,Round,Year,Player ID,First Name,Surname,Team,null,DT,...,AF Breakeven,null13,Contested Possessions,Clearances,Clangers,Disposal efficiency,Time on ground,Metres gained,Bench staus,Injured
0,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,990020.0,Andrew,Embley,WC,30.0,111.0,...,,,,,,,,,,False
1,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,230254.0,Adam,Selwood,WC,50.0,107.0,...,,,,,,,,,,False
2,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,200112.0,Dean,Cox,WC,27.0,99.0,...,,,,,,,,,,False
3,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,240016.0,Beau,Waters,WC,26.0,98.0,...,,,,,,,,,,False
4,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,261911.0,Brad,Ebert,WC,26.0,94.0,...,,,,,,,,,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122024,634.0,http://live.fanfooty.com.au/game/matchcentre.h...,GF,2023,293581.0,Lincoln,McCarthy,BL,9.0,40.0,...,,,2.0,0.0,2.0,71.0,74.0,220.0,1.0,False
122025,634.0,http://live.fanfooty.com.au/game/matchcentre.h...,GF,2023,990609.0,Charlie,Cameron,BL,13.0,40.0,...,,,4.0,0.0,2.0,100.0,89.0,111.0,0.0,False
122026,634.0,http://live.fanfooty.com.au/game/matchcentre.h...,GF,2023,1002235.0,Cam,Rayner,BL,3.0,35.0,...,,,4.0,2.0,4.0,63.0,75.0,248.0,0.0,False
122027,634.0,http://live.fanfooty.com.au/game/matchcentre.h...,GF,2023,1023708.0,Jaspa,Fletcher,BL,2.0,32.0,...,,,6.0,0.0,2.0,66.0,64.0,132.0,1.0,False


## 3. Match results/fixture

### Get fixture data from Fanfooty website

In [37]:
url = "http://www.fanfooty.com.au/resource/draw.php"
headers = ["FanFooty draw ID", "year", "competition", "round", "gametime (AET)", "day", "home team", "away team", "ground", "timeslot", "TV coverage", "home supergoals", "home goals", "home behinds", "home points", "away supergoals", "away goals", "away behinds", "away points", "match status"]
data_list = []


res = requests.get(url, headers=url_headers, verify=False)
display(res)
if res.text == 'Too many connections':
    raise
        
split_rows_list = res.text.split('"\r\n')
for row in split_rows_list:
    field_list = row.split(',')
    data_list.append(field_list)
    
df_fixture = pd.DataFrame(data_list, columns=headers)

for i, col in enumerate(df_fixture.columns):
    df_fixture.iloc[:, i] = df_fixture.iloc[:, i].str.replace('"', '')
    
df_fixture['FanFooty draw ID'] = pd.to_numeric(df_fixture['FanFooty draw ID'])
df_fixture



<Response [200]>

Unnamed: 0,FanFooty draw ID,year,competition,round,gametime (AET),day,home team,away team,ground,timeslot,TV coverage,home supergoals,home goals,home behinds,home points,away supergoals,away goals,away behinds,away points,match status
0,1006.0,1993,HA,1,1993-03-26 20:08:00,Friday,Western Bulldogs,Collingwood,MCG,N,,,13,17,95,,17,13,115,Full Time
1,1000.0,1993,HA,1,1993-03-27 14:00:00,Saturday,North Melbourne,Brisbane Bears,MCG,D,,,24,22,166,,22,11,143,Full Time
2,1001.0,1993,HA,1,1993-03-27 14:00:00,Saturday,Carlton,Fitzroy,Princes Park,D,,,17,10,112,,17,16,118,Full Time
3,1002.0,1993,HA,1,1993-03-27 14:00:00,Saturday,Hawthorn,Melbourne,Waverley,D,,,13,15,93,,11,4,70,Full Time
4,1005.0,1993,HA,1,1993-03-27 14:08:00,Saturday,Geelong,St Kilda,Kardinia,D,,,20,16,136,,16,16,112,Full Time
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6603,8865.0,2024,HA,24,2024-08-23 12:00:00,Friday,Richmond,Gold Coast,MCG,N,,,,,,,,,,
6604,8866.0,2024,HA,24,2024-08-23 12:00:00,Friday,Sydney,Adelaide,SCG,N,,,,,,,,,,
6605,8867.0,2024,HA,24,2024-08-23 12:00:00,Friday,Western Bulldogs,GWS,Ballarat,N,,,,,,,,,,
6606,8868.0,2024,HA,24,2024-08-23 14:00:00,Friday,Fremantle,Port Adelaide,Perth,D,,,,,,,,,,


### Clean Fixture data

In [38]:
df_fixture = df_fixture.loc[df_fixture['match status'] == ' Full Time']
df_fixture['FanFooty draw ID'] = df_fixture['FanFooty draw ID'].astype('int')
df_fixture.to_csv('inputs/fanfooty_fixture.csv')
df_fixture

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fixture['FanFooty draw ID'] = df_fixture['FanFooty draw ID'].astype('int')


Unnamed: 0,FanFooty draw ID,year,competition,round,gametime (AET),day,home team,away team,ground,timeslot,TV coverage,home supergoals,home goals,home behinds,home points,away supergoals,away goals,away behinds,away points,match status
0,1006,1993,HA,1,1993-03-26 20:08:00,Friday,Western Bulldogs,Collingwood,MCG,N,,,13,17,95,,17,13,115,Full Time
1,1000,1993,HA,1,1993-03-27 14:00:00,Saturday,North Melbourne,Brisbane Bears,MCG,D,,,24,22,166,,22,11,143,Full Time
2,1001,1993,HA,1,1993-03-27 14:00:00,Saturday,Carlton,Fitzroy,Princes Park,D,,,17,10,112,,17,16,118,Full Time
3,1002,1993,HA,1,1993-03-27 14:00:00,Saturday,Hawthorn,Melbourne,Waverley,D,,,13,15,93,,11,4,70,Full Time
4,1005,1993,HA,1,1993-03-27 14:08:00,Saturday,Geelong,St Kilda,Kardinia,D,,,20,16,136,,16,16,112,Full Time
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6395,8630,2023,SF,26,2023-09-15 19:50:00,Friday,Melbourne,Carlton,MCG,N,,,9,17,71,,11,7,73,Full Time
6396,8631,2023,SF,26,2023-09-16 19:40:00,Saturday,Port Adelaide,GWS,Adelaide,N,,,9,16,70,,13,15,93,Full Time
6397,8632,2023,PF,27,2023-09-22 19:50:00,Friday,Collingwood,GWS,MCG,N,,,8,10,58,,8,9,57,Full Time
6398,8633,2023,PF,27,2023-09-23 17:15:00,Saturday,Brisbane Lions,Carlton,Gabba,T,,,11,13,79,,9,9,63,Full Time


# Get standard team name

In [39]:
df_team_names = pd.read_csv("inputs/all_sources_team_names.csv")
df_fixture = df_fixture.merge(df_team_names[['fanfooty_team_name', 'fanfooty_team_name_mid']], left_on='home team' ,right_on='fanfooty_team_name_mid', how='left')
df_fixture = df_fixture.rename(columns={"fanfooty_team_name": "home_team_short"})
del df_fixture['fanfooty_team_name_mid']
df_fixture = df_fixture.merge(df_team_names[['fanfooty_team_name', 'fanfooty_team_name_mid']], left_on='away team' ,right_on='fanfooty_team_name_mid', how='left')
df_fixture = df_fixture.rename(columns={"fanfooty_team_name": "away_team_short"})
del df_fixture['fanfooty_team_name_mid']
df_fixture.drop(df_fixture.tail(1).index,inplace=True)
df_fixture

Unnamed: 0,FanFooty draw ID,year,competition,round,gametime (AET),day,home team,away team,ground,timeslot,...,home goals,home behinds,home points,away supergoals,away goals,away behinds,away points,match status,home_team_short,away_team_short
0,1006,1993,HA,1,1993-03-26 20:08:00,Friday,Western Bulldogs,Collingwood,MCG,N,...,13,17,95,,17,13,115,Full Time,WB,CO
1,1000,1993,HA,1,1993-03-27 14:00:00,Saturday,North Melbourne,Brisbane Bears,MCG,D,...,24,22,166,,22,11,143,Full Time,NM,
2,1001,1993,HA,1,1993-03-27 14:00:00,Saturday,Carlton,Fitzroy,Princes Park,D,...,17,10,112,,17,16,118,Full Time,CA,
3,1002,1993,HA,1,1993-03-27 14:00:00,Saturday,Hawthorn,Melbourne,Waverley,D,...,13,15,93,,11,4,70,Full Time,HW,ME
4,1005,1993,HA,1,1993-03-27 14:08:00,Saturday,Geelong,St Kilda,Kardinia,D,...,20,16,136,,16,16,112,Full Time,GE,SK
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6326,8629,2023,QF,25,2023-09-09 19:25:00,Saturday,Brisbane Lions,Port Adelaide,Gabba,N,...,19,9,123,,11,9,75,Full Time,BL,PA
6327,8630,2023,SF,26,2023-09-15 19:50:00,Friday,Melbourne,Carlton,MCG,N,...,9,17,71,,11,7,73,Full Time,ME,CA
6328,8631,2023,SF,26,2023-09-16 19:40:00,Saturday,Port Adelaide,GWS,Adelaide,N,...,9,16,70,,13,15,93,Full Time,PA,WS
6329,8632,2023,PF,27,2023-09-22 19:50:00,Friday,Collingwood,GWS,MCG,N,...,8,10,58,,8,9,57,Full Time,CO,WS


### Get the total SuperCoach and AFL Fantasy scores for each team, for every match

In [40]:
# Create a summary to get the total SC and AF points for each match
df_match_summary = pd.pivot_table(df_fanfooty_player_raw, index=['Fanfooty Match ID'], values=['SC'], columns=['Team'], aggfunc=np.sum)
df_match_summary = df_match_summary.reset_index()
headings = [x[1] for x in df_match_summary.columns]
headings[0] = 'Fanfooty Match ID'
df_match_summary.columns = headings
df_match_summary

Unnamed: 0,Fanfooty Match ID,AD,BL,CA,CO,ES,FR,GC,GE,HW,ME,NM,PA,RI,SK,SY,WB,WC,WS
0,0.0,,,,,,,,3482.0,,,,,1328.0,,,1576.0,,
1,1.0,1776.0,1435.0,3409.0,,,,,1452.0,1841.0,,,,1712.0,,1794.0,,1512.0,1375.0
2,2.0,,,1756.0,1637.0,,1540.0,3430.0,,,,3159.0,,1390.0,,1742.0,1624.0,,
3,3.0,1663.0,,,1540.0,1570.0,,,,1615.0,3459.0,1712.0,,,1433.0,1682.0,,,1596.0
4,4.0,,1840.0,,1742.0,,,,,1541.0,1344.0,,3424.0,1502.0,,3523.0,,1366.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
994,995.0,,3241.0,,,,,,1664.0,1473.0,,,,,,,1868.0,1436.0,
995,996.0,1647.0,,,,,,3352.0,,1405.0,1631.0,,,,,,,1683.0,
996,997.0,1680.0,,,,,,,,,,1476.0,1844.0,,4678.0,,,,
997,998.0,,,3004.0,1651.0,1877.0,,,,,,,,,,,1420.0,,1669.0


### Join each team's SC total to fixture data + save file

In [41]:
def get_sc_total(row, team_short):
    match_id = row['FanFooty draw ID']
    team_name = row[team_short]
    if match_id >= 3425:
        sc_total = df_match_summary.loc[df_match_summary['Fanfooty Match ID'] == match_id, team_name]
        if len(sc_total) > 0: 
            return sc_total.values[0]

df_fixture['Home SC total'] = df_fixture.apply(lambda row: get_sc_total(row, 'home_team_short'), axis=1)
df_fixture['Away SC total'] = df_fixture.apply(lambda row: get_sc_total(row, 'away_team_short'), axis=1)
df_fixture.to_csv('{}/fanfooty_fixture.csv'.format(destination))
df_fixture

Unnamed: 0,FanFooty draw ID,year,competition,round,gametime (AET),day,home team,away team,ground,timeslot,...,home points,away supergoals,away goals,away behinds,away points,match status,home_team_short,away_team_short,Home SC total,Away SC total
0,1006,1993,HA,1,1993-03-26 20:08:00,Friday,Western Bulldogs,Collingwood,MCG,N,...,95,,17,13,115,Full Time,WB,CO,,
1,1000,1993,HA,1,1993-03-27 14:00:00,Saturday,North Melbourne,Brisbane Bears,MCG,D,...,166,,22,11,143,Full Time,NM,,,
2,1001,1993,HA,1,1993-03-27 14:00:00,Saturday,Carlton,Fitzroy,Princes Park,D,...,112,,17,16,118,Full Time,CA,,,
3,1002,1993,HA,1,1993-03-27 14:00:00,Saturday,Hawthorn,Melbourne,Waverley,D,...,93,,11,4,70,Full Time,HW,ME,,
4,1005,1993,HA,1,1993-03-27 14:08:00,Saturday,Geelong,St Kilda,Kardinia,D,...,136,,16,16,112,Full Time,GE,SK,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6326,8629,2023,QF,25,2023-09-09 19:25:00,Saturday,Brisbane Lions,Port Adelaide,Gabba,N,...,123,,11,9,75,Full Time,BL,PA,,
6327,8630,2023,SF,26,2023-09-15 19:50:00,Friday,Melbourne,Carlton,MCG,N,...,71,,11,7,73,Full Time,ME,CA,,
6328,8631,2023,SF,26,2023-09-16 19:40:00,Saturday,Port Adelaide,GWS,Adelaide,N,...,70,,13,15,93,Full Time,PA,WS,,
6329,8632,2023,PF,27,2023-09-22 19:50:00,Friday,Collingwood,GWS,MCG,N,...,58,,8,9,57,Full Time,CO,WS,,


### Add home/away + opposition columns to player data

In [42]:
def get_opposition(row):
    match_id = row['Fanfooty Match ID']
    team = row['Team']
    home_team = df_fixture.loc[(df_fixture['FanFooty draw ID'] == match_id) & (df_fixture['home_team_short'] == team)]
    away_team = df_fixture.loc[(df_fixture['FanFooty draw ID'] == match_id) & (df_fixture['away_team_short'] == team)]
    if len(home_team) > 0:
        return home_team['away_team_short'].values[0]
    if len(away_team) > 0:
        return away_team['home_team_short'].values[0]
    

df_fanfooty_player_raw['Opposition'] = df_fanfooty_player_raw.apply(lambda row: get_opposition(row), axis=1)
df_fanfooty_player_raw.to_csv("{}/{}".format(destination, file_name))
df_fanfooty_player_raw

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fanfooty_player_raw['Opposition'] = df_fanfooty_player_raw.apply(lambda row: get_opposition(row), axis=1)


Unnamed: 0,Fanfooty Match ID,Fanfooty Match URL,Round,Year,Player ID,First Name,Surname,Team,null,DT,...,null13,Contested Possessions,Clearances,Clangers,Disposal efficiency,Time on ground,Metres gained,Bench staus,Injured,Opposition
0,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,990020.0,Andrew,Embley,WC,30.0,111.0,...,,,,,,,,,False,
1,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,230254.0,Adam,Selwood,WC,50.0,107.0,...,,,,,,,,,False,
2,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,200112.0,Dean,Cox,WC,27.0,99.0,...,,,,,,,,,False,
3,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,240016.0,Beau,Waters,WC,26.0,98.0,...,,,,,,,,,False,
4,425.0,http://live.fanfooty.com.au/game/matchcentre.h...,R4,2010,261911.0,Brad,Ebert,WC,26.0,94.0,...,,,,,,,,,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122024,634.0,http://live.fanfooty.com.au/game/matchcentre.h...,GF,2023,293581.0,Lincoln,McCarthy,BL,9.0,40.0,...,,2.0,0.0,2.0,71.0,74.0,220.0,1.0,False,
122025,634.0,http://live.fanfooty.com.au/game/matchcentre.h...,GF,2023,990609.0,Charlie,Cameron,BL,13.0,40.0,...,,4.0,0.0,2.0,100.0,89.0,111.0,0.0,False,
122026,634.0,http://live.fanfooty.com.au/game/matchcentre.h...,GF,2023,1002235.0,Cam,Rayner,BL,3.0,35.0,...,,4.0,2.0,4.0,63.0,75.0,248.0,0.0,False,
122027,634.0,http://live.fanfooty.com.au/game/matchcentre.h...,GF,2023,1023708.0,Jaspa,Fletcher,BL,2.0,32.0,...,,6.0,0.0,2.0,66.0,64.0,132.0,1.0,False,


In [43]:
"{}/{}".format(destination, file_name)

'exports/scrape_20240223-165738/fanfooty_match_data_20240223-165738.csv'