In [None]:
# Notebook: 10_Streaming_Trigger.ipynb
# Run this notebook *while* 09_Streaming_Prediction_Service.ipynb is running

import s3fs
import pandas as pd
import time

# --- 1. Configure S3 (MinIO) Connection ---
endpoint_url = 'http://minIO:9000'
storage_options = {
    "key": "minioadmin",
    "secret": "minioadmin",
    "client_kwargs": {"endpoint_url": endpoint_url}
}

s3 = s3fs.S3FileSystem(**storage_options)

# --- 2. Define Input and Output Paths ---
# We'll "steal" some data from a race we already downloaded
# Using Monaco (Round 8) as our sample
source_file = 's3://raw-data/laps/2024_08_Monaco.parquet'

# This is the folder our *other* notebook is watching
output_dir = 's3://streaming-input/'
s3.mkdirs(output_dir, exist_ok=True)

print(f"Loading sample data from {source_file}...")

# --- 3. Load Sample Data ---
try:
    df = pd.read_parquet(source_file, storage_options=storage_options)
    
    # Let's just simulate 5 laps from VER
    simulated_data = df.query("Driver == 'VER'").head(5)
    
    # Select only the columns our stream expects
    simulated_data = simulated_data[[
        "Driver", "LapTime", "LapNumber", "TyreLife", "Compound", "IsAccurate"
    ]]
    
    print("Sample data loaded:")
    print(simulated_data)

    # --- 4. "Drop" the File into the Streaming Folder ---
    # We use a unique name each time
    output_filename = f'new_laps_{int(time.time())}.parquet'
    output_path = f'{output_dir}{output_filename}'
    
    print(f"\n---> Triggering stream by saving file to: {output_path}")
    
    with s3.open(output_path, 'wb') as f:
        simulated_data.to_parquet(
            f,
            index=False,
            version='2.4',
            coerce_timestamps='us'
        )
        
    print("File dropped! Check your other notebook... (Notebook 09)")

except Exception as e:
    print(f"Error loading sample data: {e}")
    print("Make sure '07_Bulk_Ingestion' ran successfully and the Monaco file exists.")