In [1]:
import os
import pandas as pd
import json

# go up one level from current working dir (same pattern as your pricing script)
# BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# DATA_DIR = os.path.join(BASE_DIR, "data")

# CSV_FILE = os.path.join(DATA_DIR, "cars.csv")
# JSON_FILE = os.path.join(DATA_DIR, "cars.json")

CSV_FILE = "cars.csv"
JSON_FILE = "cars.json"

print("Looking for CSV at:", CSV_FILE)

# If your cars.csv is tab-separated (which matches the file you pasted):
df = pd.read_csv(CSV_FILE)

# If instead it’s space-separated, use this line instead:
# df = pd.read_csv(CSV_FILE, delim_whitespace=True)


# Normalize headers
df.columns = df.columns.str.strip().str.lower()
print("Columns found:", df.columns.tolist())  # should show ['make','model','car_type','upcharge_percentage']

print("Columns found:", df.columns.tolist())  # debug

cars = {}

# Iterate rows safely
for _, row in df.iterrows():
    make = str(row["make"]).strip()
    model = str(row["model"]).strip()
    car_type = str(row["car_type"]).strip()
    upcharge = float(row["upcharge_percentage"])

    if make not in cars:
        cars[make] = {"label": make, "models": {}}

    cars[make]["models"][model] = {
        "label": model,
        "car_type": car_type,
        "upcharge_percentage": upcharge
    }

# Save JSON
with open(JSON_FILE, "w", encoding="utf-8") as f:
    json.dump(cars, f, indent=2, ensure_ascii=False)

print(f"✅ cars.json created successfully at {JSON_FILE}")

# Show first 2 makes as a sample
sample = {k: cars[k] for k in list(cars.keys())[:2]}
print(json.dumps(sample, indent=2, ensure_ascii=False))

Looking for CSV at: cars.csv
Columns found: ['make', 'model', 'car_type', 'upcharge_percentage']
Columns found: ['make', 'model', 'car_type', 'upcharge_percentage']
✅ cars.json created successfully at cars.json
{
  "Skoda": {
    "label": "Skoda",
    "models": {
      "Octavia": {
        "label": "Octavia",
        "car_type": "Standard",
        "upcharge_percentage": 0.0
      }
    }
  },
  "Acura": {
    "label": "Acura",
    "models": {
      "MDX": {
        "label": "MDX",
        "car_type": "Standard",
        "upcharge_percentage": 0.0
      },
      "RDX": {
        "label": "RDX",
        "car_type": "Standard",
        "upcharge_percentage": 0.0
      }
    }
  }
}


In [1]:
import requests
import pandas as pd

def get_car_makes():
    """
    Fetch only car makes that produce passenger cars.
    Returns a list of dicts: [{'id': ..., 'name': ...}, ...]
    """
    url = "https://vpic.nhtsa.dot.gov/api/vehicles/GetMakesForVehicleType/car?format=json"
    res = requests.get(url)
    res.raise_for_status()
    data = res.json()
    return [{"id": m["MakeId"], "name": m["MakeName"]} for m in data["Results"]]

def get_models_for_make_id(make_id: int):
    """
    Fetch models for a specific make using Make_ID.
    Returns a list of model names.
    """
    url = f"https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeId/{make_id}?format=json"
    res = requests.get(url)
    res.raise_for_status()
    data = res.json()
    return [m["Model_Name"] for m in data["Results"]]

# Build flat list of make+model
all_rows = []
makes = get_car_makes()

for make in makes:
    try:
        models = get_models_for_make_id(make["id"])
        for model in models:
            all_rows.append({
                "make": make["name"].strip(),
                "model": model.strip()
            })
    except Exception as e:
        print(f"Skipping {make['name']} (ID {make['id']}): {e}")

# Create DataFrame and drop duplicates
df = pd.DataFrame(all_rows).drop_duplicates().reset_index(drop=True)

df = df.drop_duplicates().reset_index(drop=True)

df.to_csv("car_makes_models_from_api.csv", index=False)
print("✅ Saved car_makes_models_from_api.csv with", len(df), "rows")
# Preview first 20 rows
# df.head(20)




✅ Saved car_makes_models_from_api.csv with 3130 rows


In [2]:
import requests
import pandas as pd
import time

def get_car_makes():
    """
    Fetch only car makes that produce passenger cars.
    Returns a list of dicts: [{'id': ..., 'name': ...}, ...]
    """
    start = time.time()
    url = "https://vpic.nhtsa.dot.gov/api/vehicles/GetMakesForVehicleType/car?format=json"
    res = requests.get(url)
    res.raise_for_status()
    data = res.json()
    makes = [{"id": m["MakeId"], "name": m["MakeName"]} for m in data["Results"]]
    end = time.time()
    print(f"⏱ get_car_makes() took {end - start:.2f} seconds")
    return makes

def get_models_for_make_id(make_id: int):
    """
    Fetch models for a specific make using Make_ID.
    Returns a list of model names.
    """
    start = time.time()
    url = f"https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeId/{make_id}?format=json"
    res = requests.get(url)
    res.raise_for_status()
    data = res.json()
    models = [m["Model_Name"] for m in data["Results"]]
    end = time.time()
    print(f"   ↳ get_models_for_make_id({make_id}) took {end - start:.2f} seconds, got {len(models)} models")
    return models

# Measure whole process
overall_start = time.time()

# Build flat list of make+model
all_rows = []
makes = get_car_makes()

for make in makes:
    try:
        models = get_models_for_make_id(make["id"])
        for model in models:
            all_rows.append({
                "make": make["name"].strip(),
                "model": model.strip()
            })
    except Exception as e:
        print(f"Skipping {make['name']} (ID {make['id']}): {e}")

# Create DataFrame and drop duplicates
df = pd.DataFrame(all_rows).drop_duplicates().reset_index(drop=True)

df.to_csv("car_makes_models_from_api.csv", index=False)

overall_end = time.time()

print("✅ Saved car_makes_models_from_api.csv with", len(df), "rows")
print(f"⏱ Total process time: {overall_end - overall_start:.2f} seconds")


⏱ get_car_makes() took 0.46 seconds
   ↳ get_models_for_make_id(440) took 0.17 seconds, got 18 models
   ↳ get_models_for_make_id(441) took 0.18 seconds, got 7 models
   ↳ get_models_for_make_id(442) took 0.19 seconds, got 17 models
   ↳ get_models_for_make_id(443) took 0.18 seconds, got 14 models
   ↳ get_models_for_make_id(445) took 0.19 seconds, got 15 models
   ↳ get_models_for_make_id(448) took 0.21 seconds, got 56 models
   ↳ get_models_for_make_id(449) took 0.19 seconds, got 60 models
   ↳ get_models_for_make_id(452) took 0.19 seconds, got 258 models
   ↳ get_models_for_make_id(454) took 0.20 seconds, got 6 models
   ↳ get_models_for_make_id(456) took 0.20 seconds, got 8 models
   ↳ get_models_for_make_id(460) took 0.24 seconds, got 149 models
   ↳ get_models_for_make_id(464) took 0.17 seconds, got 17 models
   ↳ get_models_for_make_id(465) took 0.17 seconds, got 17 models
   ↳ get_models_for_make_id(466) took 0.21 seconds, got 19 models
   ↳ get_models_for_make_id(467) took 0.1

In [3]:
import requests
import pandas as pd
import time

def get_car_makes():
    """
    Fetch only car makes that produce passenger cars in the US.
    Returns a list of dicts: [{'id': ..., 'name': ...}, ...]
    """
    start = time.time()
    url = "https://vpic.nhtsa.dot.gov/api/vehicles/GetMakesForVehicleType/car?format=json"
    res = requests.get(url)
    res.raise_for_status()
    data = res.json()
    makes = [{"id": m["MakeId"], "name": m["MakeName"]} for m in data["Results"]]
    end = time.time()
    print(f"⏱ get_car_makes() took {end - start:.2f} seconds")
    return makes

def get_models_for_make_id_year(make_id: int, year: int):
    """
    Fetch car models for a specific make_id and year.
    Only returns US passenger cars certified/sold in that year.
    """
    start = time.time()
    url = f"https://vpic.nhtsa.dot.gov/api/vehicles/GetModelsForMakeIdYear/makeId/{make_id}/modelyear/{year}/vehicleType/car?format=json"
    res = requests.get(url)
    res.raise_for_status()
    data = res.json()
    models = [m["Model_Name"] for m in data["Results"]]
    end = time.time()
    print(f"   ↳ get_models_for_make_id_year({make_id}, {year}) took {end - start:.2f} seconds, got {len(models)} models")
    return models

# Measure whole process
overall_start = time.time()

# Choose year(s) you care about (e.g., last 5 years to cover current sales)
years = range(1996, 2026)

all_rows = []
makes = get_car_makes()

for make in makes:
    try:
        for year in years:
            models = get_models_for_make_id_year(make["id"], year)
            for model in models:
                all_rows.append({
                    "make": make["name"].strip(),
                    "model": model.strip(),
                    "year": year
                })
    except Exception as e:
        print(f"Skipping {make['name']} (ID {make['id']}): {e}")

# Create DataFrame and drop duplicates
df = pd.DataFrame(all_rows).drop_duplicates().reset_index(drop=True)

df.to_csv("car_makes_models_us.csv", index=False)

overall_end = time.time()

print("✅ Saved car_makes_models_us.csv with", len(df), "rows")
print(f"⏱ Total process time: {overall_end - overall_start:.2f} seconds")


⏱ get_car_makes() took 0.54 seconds
   ↳ get_models_for_make_id_year(440, 1996) took 0.18 seconds, got 1 models
   ↳ get_models_for_make_id_year(440, 1997) took 0.18 seconds, got 1 models
   ↳ get_models_for_make_id_year(440, 1998) took 0.36 seconds, got 1 models
   ↳ get_models_for_make_id_year(440, 1999) took 0.18 seconds, got 1 models
   ↳ get_models_for_make_id_year(440, 2000) took 0.18 seconds, got 1 models
   ↳ get_models_for_make_id_year(440, 2001) took 0.22 seconds, got 2 models
   ↳ get_models_for_make_id_year(440, 2002) took 0.18 seconds, got 2 models
   ↳ get_models_for_make_id_year(440, 2003) took 0.22 seconds, got 2 models
   ↳ get_models_for_make_id_year(440, 2004) took 0.24 seconds, got 1 models
   ↳ get_models_for_make_id_year(440, 2005) took 0.20 seconds, got 2 models
   ↳ get_models_for_make_id_year(440, 2006) took 0.17 seconds, got 3 models
   ↳ get_models_for_make_id_year(440, 2007) took 0.16 seconds, got 3 models
   ↳ get_models_for_make_id_year(440, 2008) took 0.2

In [12]:
#CarQuery API Call


import requests
import json
import time

headers = {
    "Accept": "application/json",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/120.0.0.0 Safari/537.36"
}

all_makes = set()

for year in range(1941, 2023):
    url = f"https://www.carqueryapi.com/api/0.3/?cmd=getMakes&year={year}&sold_in_us=1"
    res = requests.get(url, headers=headers)
    
    if not res.text.strip():
        print(f"No data for year {year}")
        continue
    
    try:
        data = json.loads(res.text)
    except Exception as e:
        print(f"Failed for year {year}: {e}")
        print("Raw response:", res.text[:200])
        continue

    for make in data.get("Makes", []):
        all_makes.add(make["make_display"])
    
    time.sleep(0.2)  # be polite, avoid rate-limit

print("✅ Total unique makes:", len(all_makes))
print(sorted(all_makes))


✅ Total unique makes: 0
[]
