In [1]:
import requests
import pandas as pd

# Step 1: Get all Makes for the 2024 Model Year
base_url = "https://api.nhtsa.gov/products/vehicle/"
model_year = 2024
makes_url = f"{base_url}makes?modelYear={model_year}&issueType=c"

response = requests.get(makes_url)
makes_data = response.json()

if 'results' not in makes_data:
    raise ValueError("Failed to fetch makes data.")

makes_list = [make['make'] for make in makes_data['results']]

# Step 2: Get all Models for each Make
models = []
for make in makes_list:
    models_url = f"{base_url}models?modelYear={model_year}&make={make}&issueType=c"
    response = requests.get(models_url)
    models_data = response.json()
    if 'results' in models_data:
        models.extend([{
            'modelYear': model['modelYear'],
            'make': model['make'],
            'model': model['model']
        } for model in models_data['results']])

models_df = pd.DataFrame(models)

# Step 3: Get all complaints for each Make and Model combination
complaints = []
for _, row in models_df.iterrows():
    complaints_url = f"https://api.nhtsa.gov/complaints/complaintsByVehicle?make={row['make']}&model={row['model']}&modelYear={row['modelYear']}"
    response = requests.get(complaints_url)
    complaints_data = response.json()
    if 'results' in complaints_data:
        complaints.extend([{
            'make': row['make'],
            'model': row['model'],
            'modelYear': row['modelYear'],
            'odiNumber': complaint['odiNumber'],
            'manufacturer': complaint['manufacturer'],
            'crash': complaint['crash'],
            'fire': complaint['fire'],
            'numberOfInjuries': complaint['numberOfInjuries'],
            'numberOfDeaths': complaint['numberOfDeaths'],
            'dateOfIncident': complaint.get('dateOfIncident'),
            'dateComplaintFiled': complaint.get('dateComplaintFiled'),
            'vin': complaint.get('vin'),
            'components': complaint.get('components'),
            'summary': complaint.get('summary')
        } for complaint in complaints_data['results']])

complaints_df = pd.DataFrame(complaints)

# Save to Parquet format
output_file = "vehicle_complaints_2024.parquet"
complaints_df.to_parquet(output_file, index=False)

print(f"Data saved to {output_file}")
