In [42]:
import os
import pandas as pd
import requests
from dotenv import load_dotenv
from urllib.parse import urlencode

# Load environment variables
load_dotenv()

# API key from .env file
API_KEY = os.getenv('DISTANCE_MATRIX_API_KEY')

# Load the geocoded data
walmart_df = pd.read_csv('/Users/pintoza/Desktop/dev/data-science/walmart-proximity/data/interim/walmart_sample_geocoded.csv')
zips_df = pd.read_csv('/Users/pintoza/Desktop/dev/data-science/walmart-proximity/data/interim/zips_sample_geocoded.csv')

# Extract the state from the Walmart addresses and add it as a new column
walmart_df['State'] = walmart_df['Address'].apply(lambda x: x.split(',')[-1].strip())

# Function to split a list into chunks of a specific size
def split_into_chunks(lst, chunk_size):
    for i in range(0, len(lst), chunk_size):
        yield lst[i:i + chunk_size]

# Function to construct the API URL
def construct_api_url(origins, destinations):
    params = {
        'origins': '|'.join(origins),
        'destinations': '|'.join(destinations),
        'key': API_KEY
    }
    return f"https://api.distancematrix.ai/maps/api/distancematrix/json?{urlencode(params)}"

# Function to make the API request
def make_api_request(url):
    response = requests.get(url)
    return response.json() if response.status_code == 200 else None

# Function to process the API response and extract travel times
def process_response(response, origin_chunk, destinations):
    travel_times = []
    print(f"Origin Chunk: {origin_chunk}, Destinations: {destinations}")  # Debugging
    if response and response['status'] == 'OK':
        rows = response['rows']
        for i, row in enumerate(rows):
            elements = row['elements']
            if i >= len(origin_chunk) or not elements:
                print(f"Skipping row {i} due to index out of range or empty elements")
                continue
            for j, element in enumerate(elements):
                if j >= len(destinations):
                    print(f"Skipping element {j} as destination index out of range")
                    continue
                if element['status'] == 'OK':
                    origin = origin_chunk[i] if i < len(origin_chunk) else "Unknown"
                    destination = destinations[j] if j < len(destinations) else "Unknown"
                    time = element['duration']['text']
                    travel_times.append((origin, destination, time))
                else:
                    print(f"Error: {element['status']} for origin {origin_chunk[i] if i < len(origin_chunk) else 'Unknown'} and destination {destinations[j] if j < len(destinations) else 'Unknown'}")
    return travel_times


# Initialize a list to store the results
results = []

# Set the maximum number of elements per request
max_elements = 100  # Adjust as needed based on API limits

# Iterate over each state in the ZIP codes dataset
# Iterate over each state in the ZIP codes dataset
for state in zips_df['state_name'].unique():
    state_walmarts = walmart_df[walmart_df['State'] == state]
    state_zips = zips_df[zips_df['state_name'] == state]

    # Skip states with no Walmart or ZIP code data
    if state_walmarts.empty or state_zips.empty:
        print(f"No data available for state: {state}")
        continue

    # Construct destinations list
    destinations = state_walmarts['latitude'].astype(str) + ',' + state_walmarts['longitude'].astype(str)
    
    # Ensure there are destinations before proceeding
    if len(destinations) == 0:
        print(f"No destinations for state {state}")
        continue

    # Calculate chunk size, ensuring no division by zero
    chunk_size = max_elements // len(destinations) if len(destinations) > 0 else 0

    # If chunk_size is zero, skip this iteration
    if chunk_size == 0:
        print(f"Chunk size zero for state {state}, skipping...")
        continue

    print(f"Processing state: {state}, Destinations: {len(destinations)}, Chunk Size: {chunk_size}")

    # Split origins into chunks
    origin_chunks = list(split_into_chunks(state_zips['latitude'].astype(str) + ',' + state_zips['longitude'].astype(str), chunk_size))
    print(f"Number of chunks for state {state}: {len(origin_chunks)}")  # Debugging

    # Loop over each chunk of origins and make the request
    for origin_chunk in origin_chunks:
        print(f"Processing chunk of size {len(origin_chunk)} for state {state}")  # Debugging

        url = construct_api_url(origin_chunk, destinations.tolist())
        response = make_api_request(url)

        if response is None or response.get('status') != 'OK':
            print(f"Failed API request for state {state}, URL: {url}")
            continue

        travel_times = process_response(response, origin_chunk, destinations.tolist())
        if not travel_times:
            print(f"No travel times extracted for state {state}")

        results.extend(travel_times)

    print(f"Completed processing for state {state}, Total results so far: {len(results)}")

# Convert results to DataFrame and save to CSV
results_df = pd.DataFrame(results, columns=['Origin', 'Destination', 'Travel Time'])
results_df.to_csv('/Users/pintoza/Desktop/dev/data-science/walmart-proximity/data/interim/temp.csv', index=False)

print("Finished processing. Total results:", len(results))


Processing state: Alabama, Destinations: 14, Chunk Size: 7
Number of chunks for state Alabama: 1
Processing chunk of size 7 for state Alabama
Origin Chunk: 0    34.049571,-87.552646
1    32.536635,-85.697354
2       32.77522,-86.1216
3     34.00233,-86.224672
4    33.916778,-86.243593
5     33.45717,-86.864909
6     30.64109,-88.062248
dtype: object, Destinations: ['34.830588,-86.638833', '34.214321,-86.156222', '32.340084,-86.175233', '33.236132,-87.615214', '30.382341,-87.6857', '34.657472,-86.485773', '30.649611,-88.162241', '30.885783,-87.789945', '33.448237,-86.821089', '33.64555,-87.837726', '33.637739,-86.684404', '32.370693,-86.268412', '34.556786,-86.997067', '33.970063,-86.451534']
Completed processing for state Alabama, Total results so far: 98
Processing state: Alaska, Destinations: 2, Chunk Size: 50
Number of chunks for state Alaska: 1
Processing chunk of size 2 for state Alaska
Origin Chunk: 7      61.52704,-144.29413
8    55.208706,-132.825903
dtype: object, Destinations

KeyError: 0

In [43]:
walmart_df

Unnamed: 0,Address,Designation,Store #,Open Date,latitude,longitude,State
0,"1151 Stonecrest Blvd., Tega Cay, South Carolina",Walmart Supercenter,3733,"March 7, 2008",35.046191,-80.994007,South Carolina
1,"2150 E Tangerine Rd., Oro Valley, Arizona",Walmart Supercenter,3379,"October 29, 2008",32.426144,-110.939906,Arizona
2,"3200 Lusk Dr., Neosho, Missouri",Walmart Supercenter,17,"May 1, 1969",36.841710,-94.394270,Missouri
3,"10300 E Highway 350, Raytown, Missouri",Walmart Supercenter,1094,"October 1, 1987",38.984768,-94.460438,Missouri
4,"4517 N Midland Dr., Midland, Texas",Walmart Supercenter,608,"June 29, 1984",32.029424,-102.145934,Texas
...,...,...,...,...,...,...,...
375,"5219 Highway 51 N, Senatobia, Mississippi",Walmart Supercenter,155,"August 1, 1977",34.575831,-89.968427,Mississippi
376,"2550 S Kolb Rd., Tucson, Arizona",Neighborhood Market,3049,"April 19, 2013",32.192946,-110.842481,Arizona
377,"2121 N Collins St., Arlington, Texas",Neighborhood Market,5613,"January 16, 2013",32.768159,-97.095910,Texas
378,"2586 N Slappey Blvd., Albany, Georgia",Neighborhood Market,4517,"January 20, 2016",31.613019,-84.174321,Georgia
