In [7]:
import h3
import pandas as pd

# Load the CSV file
df = pd.read_csv('dataset/sales_4.csv')

# Step 1: Filter out duplicate store entries (based on store_id)
df_unique = df.drop_duplicates(subset=['store_id', 'latitude', 'longitude'])

# Step 2: Create a dictionary to store the store_id and its corresponding H3 index at resolution 3
store_h3_map = {}

# Convert latitude and longitude to H3 index at resolution 3 for each unique store
for index, row in df_unique.iterrows():    
    store_id = row['store_id']
    h3_index = h3.latlng_to_cell(row['latitude'], row['longitude'], 3)
    store_h3_map[store_id] = h3_index

# Step 3: Find the neighboring stores for each store
neighbors = {}

for store_id, h3_index in store_h3_map.items():
    # Get the neighboring H3 cells within 1-cell radius
    neighbor_cells = h3.grid_disk(h3_index, 2)
    # Find the store_ids that correspond to these neighboring cells
    neighbor_store_ids = [
        neighbor_store_id
        for neighbor_store_id, neighbor_h3_index in store_h3_map.items()
        if neighbor_h3_index in neighbor_cells and neighbor_store_id != store_id
    ]
    neighbors[store_id] = neighbor_store_ids

# Step 4: Export the result to a CSV file
output_data = []

# Flatten the dictionary into a list of rows
for store_id, neighbor_store_ids in neighbors.items():
    for neighbor_store_id in neighbor_store_ids:
        output_data.append({'store_id': store_id, 'neighbor_store_id': neighbor_store_id})

# Create a new DataFrame and export it
output_df = pd.DataFrame(output_data)
output_df.to_csv('neighbor_store_ids.csv', index=False)

print("Neighbor store IDs have been exported to 'neighbor_store_ids_2.csv'.")


Neighbor store IDs have been exported to 'neighbor_store_ids_2.csv'.
