### Austin Crash Data Extract Script

In [7]:
# 📦 Importing libraries
import requests
import pandas as pd
import json
import os
import io
from azure.storage.blob import BlobServiceClient
from time import sleep

# 🔐 Load config from config.json
def load_config(config_path="config.json"):
    with open(config_path, "r", encoding="utf-8") as config_file:
        config = json.load(config_file)
    return config["AZURE_CONNECTION_STRING"], config["austin_container"]

AZURE_CONNECTION_STRING, CONTAINER_NAME = load_config()

# 🌐 API Setup
API_ENDPOINT = "https://data.austintexas.gov/resource/y2wy-tgr5.json"
LIMIT = 1000

def fetch_all_data():
    all_records = []
    offset = 0

    while True:
        print(f"📥 Fetching rows {offset} to {offset + LIMIT - 1}")
        response = requests.get(API_ENDPOINT, params={"$limit": LIMIT, "$offset": offset})
        response.raise_for_status()
        batch = response.json()

        if not batch:
            break  # No more data

        all_records.extend(batch)
        offset += LIMIT
        sleep(0.2)  # Rate limit protection

    print(f"✅ Total records retrieved: {len(all_records)}")
    return all_records

# 🧼 Fetch and convert to CSV string
records = fetch_all_data()
df = pd.DataFrame(records)
csv_string = df.to_csv(index=False)

# ☁️ Upload to Azure Blob Storage
def upload_to_azure(csv_text, blob_name):
    blob_service_client = BlobServiceClient.from_connection_string(AZURE_CONNECTION_STRING)
    container_client = blob_service_client.get_container_client(CONTAINER_NAME)

    try:
        container_client.create_container()
        print(f"📦 Created container: {CONTAINER_NAME}")
    except Exception as e:
        print(f"⚠️ Container might already exist: {e}")

    print(f"☁️ Uploading to Azure Blob: {blob_name}")
    csv_bytes = io.BytesIO(csv_text.encode("utf-8"))
    container_client.upload_blob(name=blob_name, data=csv_bytes, overwrite=True)
    print("✅ Upload complete!")

# 🧾 Final upload call
upload_to_azure(csv_string, "austincrashdatafull.csv".strip())


📥 Fetching rows 0 to 999
📥 Fetching rows 1000 to 1999
📥 Fetching rows 2000 to 2999
📥 Fetching rows 3000 to 3999
📥 Fetching rows 4000 to 4999
📥 Fetching rows 5000 to 5999
📥 Fetching rows 6000 to 6999
📥 Fetching rows 7000 to 7999
📥 Fetching rows 8000 to 8999
📥 Fetching rows 9000 to 9999
📥 Fetching rows 10000 to 10999
📥 Fetching rows 11000 to 11999
📥 Fetching rows 12000 to 12999
📥 Fetching rows 13000 to 13999
📥 Fetching rows 14000 to 14999
📥 Fetching rows 15000 to 15999
📥 Fetching rows 16000 to 16999
📥 Fetching rows 17000 to 17999
📥 Fetching rows 18000 to 18999
📥 Fetching rows 19000 to 19999
📥 Fetching rows 20000 to 20999
📥 Fetching rows 21000 to 21999
📥 Fetching rows 22000 to 22999
📥 Fetching rows 23000 to 23999
📥 Fetching rows 24000 to 24999
📥 Fetching rows 25000 to 25999
📥 Fetching rows 26000 to 26999
📥 Fetching rows 27000 to 27999
📥 Fetching rows 28000 to 28999
📥 Fetching rows 29000 to 29999
📥 Fetching rows 30000 to 30999
📥 Fetching rows 31000 to 31999
📥 Fetching rows 32000 to 32999
📥