In [1]:
!curl ipecho.net/plain

34.106.88.226

In [2]:
import pandas as pd
import requests
import json
import time
import csv

In [None]:
# Load the Excel file
file_name = 'Zips.xlsx'

# Read the Excel file into a pandas DataFrame
df = pd.read_excel(file_name, dtype={'ZipCode': str})

# Check if the 'zipCode' column exists in the DataFrame
if 'ZipCode' not in df.columns:
    raise ValueError("The 'ZipCode' column was not found in the Excel file.")

# Extract the 'ZipCode' column
zip_codes = df['ZipCode'].dropna().tolist()

# Create chunks of 10 zip codes each
chunk_size = 10
chunks = [zip_codes[i:i + chunk_size] for i in range(0, len(zip_codes), chunk_size)]

# Print the list of 10-zip code chunks
for i, chunk in enumerate(chunks, start=1):
    print(f"Chunk {i}: {chunk}")

Chunk 1: ['90011', '92154', '95213', '94112', '93722', '95823', '90805', '94601', '93309', '92805']
Chunk 2: ['02124', '01604', '01109', '02139', '01852', '02301', '01902', '02169', '02704', '02720']
Chunk 3: ['97206', '97402', '97301', '97080', '97124', '97702', '97008', '97504', '97330', '97477']
Chunk 4: ['77036', '78251', '75217', '78745', '76244', '79936', '76010', '78414', '75025', '79424']
Chunk 5: ['32218', '33125', '33647', '32811', '33710', '34953', '33914', '33012', '32304', '33312']
Chunk 6: ['30318', '30901', '31907', '31201', '31419', '30601', '30268', '30328', '30075', '31088']


In [None]:
chunks[1]

['02124',
 '01604',
 '01109',
 '02139',
 '01852',
 '02301',
 '01902',
 '02169',
 '02704',
 '02720']

##1st time fetching

In [None]:
# Define the URL
url = "https://apik.carvana.io/merch/search/api/v2/search"

# Define the zip codes to fetch data for
zip_codes = chunks[5]

# Set to keep track of unique vehicles
unique_vehicles = set()

# Prepare to store vehicle data
vehicle_data = []

# Function to recursively flatten nested dictionaries
def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        elif isinstance(v, list):
            # Convert lists to strings for CSV compatibility
            items.append((new_key, json.dumps(v)))
        else:
            items.append((new_key, v))
    return dict(items)

# Collect all possible headers dynamically
all_headers = set()

# Function to fetch vehicles for a specific zip code
def fetch_vehicles(zip_code):
    global vehicle_data  # Declare as global to avoid NameError
    page = 1
    vehicles_fetched = 0
    initial_count = len(unique_vehicles)  # Track initial count of unique vehicles

    while vehicles_fetched < 50:
        payload = {
            "analyticsData": {
                "browser": "Chrome",
                "clientId": "home_ui",
                "deviceName": "",
                "isMobileDevice": False,
                "referrer": ""
            },
            "pagination": {
                "page": page,
                "pageSize": 24
            },
            "zip5": zip_code
        }

        headers = {
            'Content-Type': 'application/json',
            'Accept': 'application/json, text/plain, /',
            'Authorization': 'Bearer',
            'Origin': 'https://www.carvana.com',
            'Referer': 'https://www.carvana.com/',
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36'
        }

        response = requests.post(url, headers=headers, data=json.dumps(payload))

        if response.status_code == 200:
            response_data = response.json()
            vehicles = response_data.get('inventory', {}).get('vehicles', [])

            for vehicle in vehicles:
                stock_number = vehicle.get('stockNumber')

                if stock_number and stock_number not in unique_vehicles:
                    unique_vehicles.add(stock_number)

                    # Flatten the vehicle dictionary
                    flat_vehicle = flatten_dict(vehicle)

                    # Add the zip code field
                    flat_vehicle["zip_code"] = zip_code

                    # Collect headers
                    all_headers.update(flat_vehicle.keys())

                    # Add the flattened vehicle data to the list
                    vehicle_data.append(flat_vehicle)

                    vehicles_fetched += 1

        else:
            print(f"Failed to fetch data for zip code {zip_code} on page {page}, status code: {response.status_code}")

        # Move to the next page
        page += 1

        # Sleep to avoid hitting rate limits
        time.sleep(2)

    # Calculate the total number of unique vehicles fetched for this zip code
    total_fetched = len(unique_vehicles) - initial_count
    print(f"Total vehicles fetched for zip code {zip_code}: {total_fetched}")

# Fetch vehicles for each zip code
for zip_code in zip_codes:
    print(f"Fetching vehicles for zip code {zip_code}")
    fetch_vehicles(zip_code)

# Write the collected vehicle data to a CSV file
csv_file = open('FL_vehicles.csv', 'w', newline='', encoding='utf-8')
csv_writer = csv.DictWriter(csv_file, fieldnames=sorted(all_headers))

# Write headers and rows
csv_writer.writeheader()
csv_writer.writerows(vehicle_data)

# Close the CSV file
csv_file.close()

print(f"Successfully saved vehicle data for {len(unique_vehicles)} unique vehicles!")


Fetching vehicles for zip code 30318


KeyboardInterrupt: 

##Extracted VIN numbers

In [None]:
# Function to fetch VIN numbers from the CSV and store them in a DataFrame
def extract_vin_from_csv(file_name):
    try:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_name)

        # Check if the 'vin' column exists in the DataFrame
        if 'vin' not in df.columns:
            raise ValueError("The column 'vin' does not exist in the CSV file")

        # Extract the VIN column
        vin_df = df[['vin']]

        # Return the DataFrame containing the VIN numbers
        return vin_df

    except FileNotFoundError:
        print(f"Error: The file '{file_name}' was not found.")
        return None
    except pd.errors.EmptyDataError:
        print("Error: The file is empty.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Specify the CSV file name
file_name = 'unique_car_records.csv'

# Extract the VIN numbers
vin_df = extract_vin_from_csv(file_name)

# Check and print the result
if vin_df is not None:
    print("VIN Numbers extracted successfully:")
    print(vin_df)


VIN Numbers extracted successfully:
                     vin
0      WDBSK7BA7CF168963
1      1C6RR6LT6JS180829
2      1C6HJTAG5NL164399
3      3VWC57BU6MM089869
4      5UXCR6C51KLK80556
...                  ...
34669  3C4NJCAB6LT102396
34670  KL4CJHSB9DB139488
34671  5YJ3E1EB7PF609953
34672  3GNKBBRA0MS557703
34673  5J6RM3H79DL030904

[34674 rows x 1 columns]


##Refetching code

In [None]:
# Define the URL
url = "https://apik.carvana.io/merch/search/api/v2/search"

# Define the zip codes to fetch data for
zip_codes = chunks[1]

# Prepare to store vehicle data
vehicle_data = []

# Function to recursively flatten nested dictionaries
def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        elif isinstance(v, list):
            # Convert lists to strings for CSV compatibility
            items.append((new_key, json.dumps(v)))
        else:
            items.append((new_key, v))
    return dict(items)

# Collect all possible headers dynamically
all_headers = set()

# Function to fetch vehicles for a specific zip code
def fetch_vehicles(zip_code):
    global vehicle_data  # Declare as global to avoid NameError
    page = 1
    vehicles_fetched = 0
    initial_count = len(vehicle_data)  # Track initial count of fetched vehicles
    max_pages = 1000

    while vehicles_fetched < 1000 and page <= max_pages:
        payload = {
            "analyticsData": {
                "browser": "Chrome",
                "clientId": "home_ui",
                "deviceName": "",
                "isMobileDevice": False,
                "referrer": ""
            },
            "pagination": {
                "page": page,
                "pageSize": 24
            },
            "zip5": zip_code
        }

        headers = {
            'Content-Type': 'application/json',
            'Accept': 'application/json, text/plain, /',
            'Authorization': 'Bearer',
            'Origin': 'https://www.carvana.com',
            'Referer': 'https://www.carvana.com/',
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36'
        }

        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload))
            response.raise_for_status()  # Will raise an HTTPError for bad responses
        except requests.exceptions.RequestException as e:
            print(f"Request failed for zip code {zip_code} on page {page}: {e}")
            break  # Stop further attempts if there's a request error

        if response.status_code == 200:
            try:
                response_data = response.json()
            except json.JSONDecodeError as e:
                print(f"Failed to decode JSON response for zip code {zip_code} on page {page}: {e}")
                continue  # Skip this iteration if the JSON is invalid

            vehicles = response_data.get('inventory', {}).get('vehicles', [])

            for vehicle in vehicles:
                stock_number = vehicle.get('stockNumber')
                vin = vehicle.get('vin')

                if vin and vin not in vin_df['vin'].values:  # Check if VIN is not in vin_df
                    if stock_number and stock_number not in [v['stockNumber'] for v in vehicle_data]:
                        # Flatten the vehicle dictionary
                        flat_vehicle = flatten_dict(vehicle)

                        # Add the zip code field
                        flat_vehicle["zip_code"] = zip_code

                        # Collect headers
                        all_headers.update(flat_vehicle.keys())

                        # Add the flattened vehicle data to the list
                        vehicle_data.append(flat_vehicle)

                        vehicles_fetched += 1
                #     else:
                #         print(f"Skipping vehicle with stock number {stock_number} (already added).")
                # else:
                #     print(f"Skipping vehicle with VIN {vin} (already in vin_df).")

        else:
            print(f"Failed to fetch data for zip code {zip_code} on page {page}, status code: {response.status_code}")

        # Move to the next page
        page += 1

        # Sleep to avoid hitting rate limits
        time.sleep(2)

    # Calculate the total number of vehicles fetched for this zip code
    total_fetched = vehicles_fetched
    print(f"Total vehicles fetched for zip code {zip_code}: {total_fetched}")

# Fetch vehicles for each zip code
for zip_code in zip_codes:
    print(f"Fetching vehicles for zip code {zip_code}")
    fetch_vehicles(zip_code)

# Write the collected vehicle data to a CSV file
try:
    with open('MA_vehicles.csv', 'w', newline='', encoding='utf-8') as csv_file:
        csv_writer = csv.DictWriter(csv_file, fieldnames=sorted(all_headers))

        # Write headers and rows
        csv_writer.writeheader()
        csv_writer.writerows(vehicle_data)
    print(f"Successfully saved vehicle data for {len(vehicle_data)} vehicles!")
except IOError as e:
    print(f"Failed to write to CSV file: {e}")


Fetching vehicles for zip code 02124
Total vehicles fetched for zip code 02124: 1017
Fetching vehicles for zip code 01604
Total vehicles fetched for zip code 01604: 1004
Fetching vehicles for zip code 01109
Total vehicles fetched for zip code 01109: 1012
Fetching vehicles for zip code 02139
Total vehicles fetched for zip code 02139: 225
Fetching vehicles for zip code 01852
Total vehicles fetched for zip code 01852: 57
Fetching vehicles for zip code 02301
Total vehicles fetched for zip code 02301: 21
Fetching vehicles for zip code 01902
Total vehicles fetched for zip code 01902: 8
Fetching vehicles for zip code 02169
Total vehicles fetched for zip code 02169: 12
Fetching vehicles for zip code 02704
Total vehicles fetched for zip code 02704: 1004
Fetching vehicles for zip code 02720
Total vehicles fetched for zip code 02720: 8
Successfully saved vehicle data for 4368 vehicles!


###EXTRA - Details form NHTSA API

In [1]:
import requests

# Define the URL and payload
url = "https://vpic.nhtsa.dot.gov/api/vehicles/DecodeVINValuesBatch/"
data = {
    'DATA': '3VV4B7AX3KM063031',  # Ensure 'vin' is defined earlier in your code
    'format': 'JSON'
}

# Send a POST request
try:
    response = requests.post(url, data=data)
    response.raise_for_status()  # Raise an exception for HTTP errors

    # Parse the JSON response
    if response.status_code == 200:
        decoded_data = response.json()
        print(decoded_data)
    else:
        print(f"Error: {response.status_code}, {response.text}")
except requests.exceptions.RequestException as e:
    print(f"An error occurred: {e}")




In [3]:
import json
# Pretty-print the JSON data
print(json.dumps(decoded_data, indent=4))

{
    "Count": 1,
    "Message": "Results returned successfully. NOTE: Any missing decoded values should be interpreted as NHTSA does not have data on the specific variable. Missing value should NOT be interpreted as an indication that a feature or technology is unavailable for a vehicle.",
    "SearchCriteria": "",
    "Results": [
        {
            "ABS": "Standard",
            "ActiveSafetySysNote": "",
            "AdaptiveCruiseControl": "Standard",
            "AdaptiveDrivingBeam": "Standard",
            "AdaptiveHeadlights": "",
            "AdditionalErrorText": "",
            "AirBagLocCurtain": "All Rows",
            "AirBagLocFront": "1st Row (Driver and Passenger)",
            "AirBagLocKnee": "",
            "AirBagLocSeatCushion": "",
            "AirBagLocSide": "1st Row (Driver and Passenger)",
            "AutoReverseSystem": "Standard",
            "AutomaticPedestrianAlertingSound": "",
            "AxleConfiguration": "",
            "Axles": "2",
        