In [1]:
import json
import requests
import pandas as pd
from IPython.display import HTML
import matplotlib.pyplot as plt


# COMP30760 Assignment 1 Task 1
## Data Identification and Collection

In this task, we collect race results data from the Formula 1 racing API provided by Ergast. The API URL used is: [https://ergast.com/mrd](https://ergast.com/mrd)


## Preparing API and Gathering Data

We establish the URL structure to retrieve Formula 1 race results from the Ergast API. We'll gather data for races held between 2000 and 2022, spanning rounds 1 to 22.


In [2]:
# API setup and data collection
api_url_pattern = 'https://ergast.com/api/f1/{year}/{round}/results.json'
selected_years = range(2000, 2023)
selected_rounds = range(1, 23)


## Data Collection Workflow

The following code employs nested loops to compile race results data for each chosen year and round. Extracted details encompass driver names, constructors, positions, points, starting positions, and fastest lap times. The data is then stored in a DataFrame.


In [None]:
# Initialize an empty list to hold the collected data
collected_data = []

# Loop through selected years and rounds; retrieve data via API requests
for year in selected_years:
    for rnd in selected_rounds:
        # Generate the API URL for the specific race
        url = api_url_pattern.format(year=year, round=rnd)
        
        # Fetch data from the API
        response = requests.get(url)
        
        # Decode and parse the JSON response
        race_data = json.loads(response.content.decode('utf-8'))
        
        # Extract relevant information from the response
        for race in race_data['MRData']['RaceTable']['Races']:
            for result in race['Results']:
                driver_full_name = result['Driver']['givenName'] + ' ' + result['Driver']['familyName']
                team_name = result['Constructor']['name']
                finishing_position = result['position']
                earned_points = result['points']
                starting_position = result['grid']
                
                # Check if fastest lap information exists
                if 'FastestLap' in result:
                    fastest_lap_time = result['FastestLap']['Time']['time']
                else:
                    fastest_lap_time = None
                
                # Append data to the collected_data list
                collected_data.append({
                    'Year': year,
                    'Round': race['round'],
                    'Driver': driver_full_name,
                    'Team': team_name,
                    'Position': finishing_position,
                    'Points': earned_points,
                    'Start_Position': starting_position,
                    'Fastest_Lap_Time': fastest_lap_time,
                })

# Create a pandas DataFrame from the collected_data list
race_results_df = pd.DataFrame(collected_data)

# Save the DataFrame to a CSV file
race_results_df.to_csv('race_results.csv', index=False)

# Display the initial records in the DataFrame
print(race_results_df)


## Interpretation of Results

The collected data has been processed and stored in a DataFrame. It includes information about race results for various years and rounds, such as driver names, constructors, positions, points, starting positions, and fastest lap times. This data can be further analyzed and visualized to gain insights into Formula 1 racing performance over the years.
