In [1]:
import fastf1
import s3fs
import pandas as pd

# --- 1. Configure S3 (MinIO) Connection ---
# This uses the service name 'minio' from docker-compose
endpoint_url = 'http://minio:9000'
s3 = s3fs.S3FileSystem(
    client_kwargs={'endpoint_url': endpoint_url},
    key='minioadmin',
    secret='minioadmin'
)

# --- 2. Create Buckets (Run once) ---
try:
    s3.mkdir('raw-data')
    s3.mkdir('processed-data')
    print("Buckets created!")
except FileExistsError:
    print("Buckets already exist.")

# --- 3. Ingest Data ---
# Enable FastF1 cache (mounted as a Docker volume)
fastf1.Cache.enable_cache('/home/jovyan/.cache/fastf1') 

print("Loading session data...")
session = fastf1.get_session(2024, 'Bahrain', 'R') # Race
session.load(telemetry=True, laps=True, weather=False)

# --- 4. Get Laps and Save to MinIO as Parquet ---
laps_df = session.laps
file_path = 's3://raw-data/2024_bahrain_laps.parquet'

print(f"Saving laps to {file_path}...")
with s3.open(file_path, 'wb') as f:
    laps_df.to_parquet(f)

print("Ingestion Complete!")

# You can repeat this for telemetry
# Note: Telemetry is huge! This is your "Big Data"
# car_data = session.car_data
# ... save to parquet

done
