In [3]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup


In [19]:
# Define the base URL and parameters for the Supercoach data page
base_url = 'https://www.footywire.com/afl/footy/supercoach_round'
params = {'p': '', 's': 'T'}

# Define the years and rounds to iterate over
years = range(2018, 2023)
rounds = range(1, 24)

# Create an empty list to store the DataFrames for each year and round
dataframes = []

# Iterate over each year and round
for year in years:
    for rnd in rounds:
        # Set the year and round parameters in the URL
        params['year'] = year
        params['round'] = rnd

        print(year, rnd)

        # Construct the full URL with parameters
        url = base_url + '?' + '&'.join([f'{k}={v}' for k, v in params.items()])

        # Send a GET request to the URL and store the response
        response = requests.get(url)

        # Check that the response was successful
        if response.status_code == 200:
            # Parse the HTML content of the response using BeautifulSoup
            soup = BeautifulSoup(response.content, 'html.parser')

            # Find the div element with ID 'supercoach-round-div'
            supercoach_div = soup.find('div', {'id': 'supercoach-round-div'})

            if supercoach_div is not None:
                # Convert the div element into a Pandas DataFrame and add year and round columns
                df = pd.read_html(str(supercoach_div), header=0)[0]
                df['Year'] = year
                df['Round'] = rnd
                
                # Rename the columns to 'RoundScore' and 'RoundSalary'
                df = df.rename(columns={f'{year} R{rnd}Score': 'RoundScore', f'{year} R{rnd}Salary': 'RoundSalary', f'*{year} R{rnd}Value': 'RoundValue'})

                print(df.columns)
                
                # Append the DataFrame to the list
                dataframes.append(df)
                
            else:
                print(f"Error: Could not find div element with ID 'supercoach-round-div' on page for year {year}, round {rnd}.")
        else:
            print(f"Error: Request to URL {url} was unsuccessful with status code {response.status_code}.")

# Concatenate all the DataFrames in the list into a single DataFrame
all_data = pd.concat(dataframes, ignore_index=True)

# Print the first few rows of the combined DataFrame
print(all_data.head())


2018 1
Index(['Rank', 'Player', 'Team', 'CurrentSalary', 'RoundSalary', 'RoundScore',
       'RoundValue', 'Year', 'Round'],
      dtype='object')
2018 2
Index(['Rank', 'Player', 'Team', 'CurrentSalary', 'RoundSalary', 'RoundScore',
       'RoundValue', 'Year', 'Round'],
      dtype='object')
2018 3
Index(['Rank', 'Player', 'Team', 'CurrentSalary', 'RoundSalary', 'RoundScore',
       'RoundValue', 'Year', 'Round'],
      dtype='object')
2018 4
Index(['Rank', 'Player', 'Team', 'CurrentSalary', 'RoundSalary', 'RoundScore',
       'RoundValue', 'Year', 'Round'],
      dtype='object')
2018 5
Index(['Rank', 'Player', 'Team', 'CurrentSalary', 'RoundSalary', 'RoundScore',
       'RoundValue', 'Year', 'Round'],
      dtype='object')
2018 6
Index(['Rank', 'Player', 'Team', 'CurrentSalary', 'RoundSalary', 'RoundScore',
       'RoundValue', 'Year', 'Round'],
      dtype='object')
2018 7
Index(['Rank', 'Player', 'Team', 'CurrentSalary', 'RoundSalary', 'RoundScore',
       'RoundValue', 'Year', 'Ro

In [20]:
# save as csv
with open('supercoachData.csv', 'w') as f:
    all_data.to_csv(f)

Now get general AFL data

In [26]:
import requests
import pandas as pd

# create an empty DataFrame to store the match data
match_data = pd.DataFrame(columns=["year", "round", "team1", "team2", "hscore", "ascore"])

# iterate over each year from 2018 to 2022 (inclusive)
for year in range(2018, 2023):
    # set the API endpoint URL for the current year
    url = f"https://api.squiggle.com.au/?q=games;year={year}"

    # make a GET request to the API endpoint
    response = requests.get(url)

    # check if the request was successful
    if response.status_code == 200:
        # get the JSON data from the response
        data = response.json()

        # iterate over each match in the data
        for match in data["games"]:
            # extract the relevant match data
            round = match.get("round")
            date = match.get("date")
            team1 = match.get("ateam")
            team2 = match.get("hteam")
            hscore = match.get("hscore")
            ascore = match.get("ascore")

            # check if any key is missing in the match dictionary
            if round is None or date is None or team1 is None or team2 is None or hscore is None or ascore is None:
                print(f"Match data is missing a key: {match}")
                continue

            # create a dictionary with the match data
            match_dict = {"year": year, "round": round, "team1": team1, "team2": team2, "hscore": hscore, "ascore": ascore}

            # add the match data to the DataFrame
            match_data = match_data.append(match_dict, ignore_index=True)
    else:
        print(f"Failed to retrieve match data for year {year}.")

# print the match data
print(match_data)


     year round             team1                   team2 hscore ascore
0    2018     1          Adelaide                Essendon     99     87
1    2018     1   North Melbourne              Gold Coast     55     39
2    2018     1  Western Bulldogs  Greater Western Sydney    133     51
3    2018     1       Collingwood                Hawthorn    101     67
4    2018     1           Geelong               Melbourne     94     97
..    ...   ...               ...                     ...    ...    ...
985  2022    25         Fremantle             Collingwood     79     59
986  2022    25    Brisbane Lions               Melbourne     79     92
987  2022    26    Brisbane Lions                 Geelong    120     49
988  2022    26       Collingwood                  Sydney     95     94
989  2022    27            Sydney                 Geelong    133     52

[990 rows x 6 columns]


In [27]:
# save match_data to csv
with open('matchData.csv', 'w') as f:
    match_data.to_csv(f)