In [0]:
import os
import time
import requests
import pandas as pd

In [0]:
dbutils.widgets.text("year", "", "Año")
year = dbutils.widgets.get("year")
print(f"------------------------------------------YEAR: {year}------------------------------------------")

In [0]:
class Extract:
    def __init__(self):
        self.API_URL = "https://api.openf1.org/v1"
        self.BASE_DIR = os.getcwd()

    def extract_data(self, _endpoint_: str = None, df: pd.DataFrame = None, meeting_key = None, session_key=None):
        if 'sessions' in _endpoint_ or 'meetings' in _endpoint_:
            df_data = pd.DataFrame()
            try:
                print({"complete url": self.API_URL+_endpoint_})
                r = requests.get(self.API_URL+_endpoint_)
                if r.headers.get("Content-Type", "").startswith("application/json"):
                    sessions_data = r.json()
                else:
                    print("Respuesta no es JSON:")
                    print(r.text)
                df = pd.json_normalize(sessions_data)
                df_data = pd.concat([df_data, df], ignore_index=True)
                time.sleep(5)
            except requests.exceptions.RequestException as e:
                print(f"Error during request: {e}")
                return None
            except requests.exceptions.HTTPError as e:
                print(f"HTTP error occurred: {e}")
                return None
            except Exception as e:
                print(f"An unexpected error occurred: {e}")
                return None
            return df_data
        elif 'laps' in _endpoint_:
            df_data = pd.DataFrame()
            if not df.empty:
                df_params = df.groupby(["session_key", "driver_number"]).agg({'country_code': 'max'}).reset_index()
                for index, row in df_params.iterrows():
                    session_key = row["session_key"]
                    driver_number = row["driver_number"]
                    print({"complete url": self.API_URL+_endpoint_.format(session_key, driver_number)})
                    try:
                        r = requests.get(self.API_URL+_endpoint_.format(session_key, driver_number))
                        if r.headers.get("Content-Type", "").startswith("application/json"):
                            laps_data = r.json()
                        else:
                            print("Respuesta no es JSON:")
                            print(r.text)
                        df = pd.json_normalize(laps_data)
                        df_data = pd.concat([df_data, df], ignore_index=True)
                    except requests.exceptions.RequestException as e:
                        print(f"Error during request: {e}")
                        return None
                    except requests.exceptions.HTTPError as e:
                        print(f"HTTP error occurred: {e}")
                        return None
                    except Exception as e:
                        print(f"An unexpected error occurred: {e}")
                        return None
            return df_data
        elif 'drivers' in _endpoint_:
            df_data = pd.DataFrame()
            if not df.empty:
                df_params = df.groupby(["session_key"]).agg({'country_code': 'max'}).reset_index()
                for index, row in df_params.iterrows():
                    session_key = row["session_key"]
                    print({"complete url": self.API_URL+_endpoint_.format(session_key)})
                    try:
                        r = requests.get(self.API_URL+_endpoint_.format(session_key))
                        if r.headers.get("Content-Type", "").startswith("application/json"):
                            drivers_data = r.json()
                        else:
                            print("Respuesta no es JSON:")
                            print(r.text)
                        df = pd.json_normalize(drivers_data)
                        df_data = pd.concat([df_data, df], ignore_index=True)
                        time.sleep(5)
                    except requests.exceptions.RequestException as e:
                        print(f"Error during request: {e}")
                        return None
                    except requests.exceptions.HTTPError as e:
                        print(f"HTTP error occurred: {e}")
                        return None
                    except Exception as e:
                        print(f"An unexpected error occurred: {e}")
                        return None
            return df_data
        elif 'car_data' in _endpoint_:
            df_data = pd.DataFrame()
            if not df.empty:
                for index, row in df.iterrows():
                    driver_number = row["driver_number"]
                    session_key = row["session_key"]
                    print({"complete url": self.API_URL+_endpoint_.format(driver_number, session_key)})
                    try:
                        r = requests.get(self.API_URL+_endpoint_.format(driver_number, session_key))
                        if r.headers.get("Content-Type", "").startswith("application/json"):
                            cars_data = r.json()
                        else:
                            print("Respuesta no es JSON:")
                            print(r.text)
                        df = pd.json_normalize(cars_data)
                        df_data = pd.concat([df_data, df], ignore_index=True)
                    except requests.exceptions.RequestException as e:
                        print(f"Error during request: {e}")
                        return None
                    except requests.exceptions.HTTPError as e:
                        print(f"HTTP error occurred: {e}")
                        return None
                    except Exception as e:
                        print(f"An unexpected error occurred: {e}")
                        return None
            return df_data

    def save_raw_data(self, __df__: pd.DataFrame, __file_name__, year):
        if not __df__.empty:
            print(f"EL DATAFRAME POSEE {__df__.shape[0]} FILAS")
            df = spark.createDataFrame(__df__)
            df.write.format("delta").mode("overwrite").saveAsTable(f"formula_1.raw_{__file_name__}_{year}")
            print(f"Data was appended to existing table {__file_name__}")
            print(f"Table formula_1.raw_{__file_name__}_{year} was saved")

In [0]:
extract = Extract()

In [0]:
def main(year, min_date, max_date):
    formula_1_data = {}
    print("-----------------------------------------sessions-----------------------------------------")
    sessions = extract.extract_data(f"/sessions?date_start>{min_date}&date_end<{max_date}")
    print(sessions.info())
    extract.save_raw_data(sessions, "sessions", year)
    print("")
    print("")
    print("")
    print("-----------------------------------------meetings-----------------------------------------")
    meetings = extract.extract_data(f"/meetings?date_start>{min_date}&year={year}")
    print(meetings.info())
    extract.save_raw_data(meetings, "meetings", year)
    print("")
    print("")
    print("")
    print("-----------------------------------------drivers-----------------------------------------")
    drivers = extract.extract_data("/drivers?session_key={}", sessions)
    print(drivers.info())
    extract.save_raw_data(drivers, "drivers", year)
    print("")
    print("")
    print("")
    print("-----------------------------------------cars data-----------------------------------------")
    cars_data = extract.extract_data("/car_data?driver_number={}&session_key={}&speed>=290", drivers)
    print(cars_data.info())
    extract.save_raw_data(cars_data, "cars_data", year)
    print("")
    print("")
    print("")
    print("-----------------------------------------laps-----------------------------------------")
    laps = extract.extract_data("/laps?session_key={}&driver_number={}", drivers)
    print(laps.info())
    extract.save_raw_data(laps, "laps", year)
    print("")
    print("")
    print("")

In [0]:
min_date = f'{year}-01-01T00:00:00+00:00'
max_date = f'{year}-12-31T00:00:00+00:00'
main(year, min_date, max_date)