In [19]:
import sys
sys.path.append('../')  # Adjust the path to point to the directory containing 'src'
import src.utils as utils
import src.snowflake_utils as snowflake_utils

from snowflake.snowpark import functions as SF
import random

# Generate Data


In [2]:
# Bounding box coordinates for the area of interest
min_lat, max_lat = 33.0633, 33.2182
min_lon, max_lon = -96.9162, -96.6718

num_stores = 10
num_customers_per_store = (20,50)
radius_range = (0.5, 2.0)  # in km

num_orders_per_customer = (5,50)

seed = 1033  # Random seed for reproducibility

In [None]:
utils.generate_store_locations(
    min_lat=min_lat,
    max_lat=max_lat,
    min_lon=min_lon,
    max_lon=max_lon,
    num_stores=num_stores,
    seed=seed
)

utils.generate_customers(
    search_radius_range=radius_range,
    num_customers_range=num_customers_per_store,
    seed=seed
)
    
utils.generate_orders(
    num_orders_per_customer=num_orders_per_customer,
    seed=seed
)

# Setup Snowflake

In [8]:
new_session = snowflake_utils.create_session()

In [15]:
snowflake_utils.setup_snowflake_panuccis_project(new_session)

snowflake_utils.upload_store_data(store_df=utils.Store.display_all_stores(), session=new_session)
snowflake_utils.upload_customer_data(customer_df=utils.Customer.display_all_customers(), session=new_session)
snowflake_utils.upload_order_data(order_df=utils.Order.display_all_orders(), session=new_session)

# What is my coverage in the city?

For geospatial analytics, snowflake has a native `geometry` and `geography` object to allow for various geospatial operations. Fairly similar but there are some differencees. Depending upon the task at hand, you may choose to go one way or another. Certain geospatial functions are limited to one of the 2 variants. Another thing to keep in mind, is that the default UoM for these systems is based on the chosed SRID. eg: 4326 uses degrees and 3857 is meters


1. Geometry Data Type
    - The geometry data type is used for planar (flat, 2D) spatial data. It assumes a Cartesian coordinate system, where the Earth is treated as a flat plane.
    This data type is ideal for small-scale geospatial operations where the curvature of the Earth can be ignored, such as city planning, local mapping, or small-area analytics.
    - Operations using the geometry type are performed in a flat, Euclidean space, which makes calculations simpler and faster for small areas.
    Example Use Case:

    - Calculating distances between points in a city or determining whether a point lies within a polygon representing a neighborhood.

2.  Geography Data Type
    - The geography data type is used for geospatial data on a spherical model of the Earth. It accounts for the Earth's curvature and uses a geodetic coordinate system (latitude and longitude).
    - This data type is suitable for large-scale geospatial operations, such as global mapping, navigation, or analytics that span large distances.
    Operations using the geography type are performed on a spherical surface, which ensures accuracy over long distances.
    Example Use Case:

    - Calculating the shortest distance between two cities on different continents or determining whether a point lies within a country boundary.

In [21]:
db = snowflake_utils.db
schema = snowflake_utils.schema
table_stores = snowflake_utils.table_stores


# We'll  create a view that is downstream of stores data and includees the geospatial data for us
view_name = "STORES_W_GEOSPATIAL"

stores_view = new_session.sql(f"SELECT STORE_ID, LONGITUDE, LATITUDE from {db}.{schema}.{table_stores}")

# Create a new column for the geospatial representation of the location
stores_view = stores_view.with_column(
    "STORE_LOCATION_GEOGRAPHY", 
    SF.call_function("ST_MAKEPOINT",SF.col("LONGITUDE"),SF.col("LATITUDE"))
)

# Create a geometry column with a Mercator projection
stores_view = stores_view.with_column(
    "STORE_LOCATION_GEOMETRY",
        SF.call_function(
            "TO_GEOMETRY", 
            SF.col("STORE_LOCATION_GEOGRAPHY")
            )
)
stores_view = stores_view.drop(["LONGITUDE", "LATITUDE"])
# stores_view.show()

# Create a radius of 1 mile and 2 miles around each store



#### Explanation of Using ST_BUFFER for Drawing a Radius in Snowflake

The ST_BUFFER function in Snowflake is a geospatial function that creates a buffer (a circular or polygonal area) around a given geometry object. However, it is important to note that ST_BUFFER only works with geometry data types, which assume a flat, Cartesian coordinate system. This means that the function does not inherently account for the curvature of the Earth, and distances must be provided in the same units as the geometry's coordinate system

#### Base Data in Latitude and Longitude
In your case, the base data points (store locations) are in latitude and longitude, which are expressed in decimal degrees. Since ST_BUFFER operates on geometry data types, the radius argument must also be provided in decimal degrees, not in meters or miles. This requires converting the desired radius (e.g., 1 mile, 2 miles, or 5 miles) into decimal degrees.

#### Conversion of Miles to Decimal Degrees
The conversion from miles to decimal degrees depends on the location on Earth because the distance represented by one degree of latitude or longitude varies based on the Earth's curvature. For simplicity:

>Latitude:\
>One degree of latitude is approximately 69 miles everywhere on Earth.\
>Therefore, 1 mile ≈ 1 / 69 ≈ 0.0145 degrees of latitude.

>Longitude:\
>The distance represented by one degree of longitude varies with latitude. At the equator, 1 degree of longitude is approximately 69 miles, but this decreases as you move toward the poles.\
>In `Frisco, Texas` (latitude ~33°N), 1 degree of longitude is approximately 57 miles.\
>Therefore, 1 mile ≈ 1 / 57 ≈ 0.0176 degrees of longitude.

Average Conversion:

To simplify the calculation, you can use an average value for both latitude and longitude. For example:\
1 mile ≈ 0.01605 degrees (average of 0.0145 and 0.0176).

In [22]:
mile_list = [1, 2]
degree_list = [round(m*0.01605,5) for m in mile_list]

In [23]:
for m,d in zip(mile_list, degree_list):
    stores_view = stores_view.with_column(
        f"STORE_LOCATION_{m}MILE_DELIVERY_RADIUS", 
        SF.call_function(
            "ST_BUFFER", 
            SF.col("STORE_LOCATION_GEOMETRY"), 
            d
        )
    )
# stores_view.show()

In [24]:
# Save the view to Snowflake
stores_view.create_or_replace_view(
    f'{db}.{schema}.{view_name}')

[Row(status='View STORES_W_GEOSPATIAL successfully created.')]

# Use folium to visualize the delivery radius

In [25]:
query = f"""
    SELECT 
        STORE_ID, 
        ST_X(STORE_LOCATION_GEOMETRY) AS LONGITUDE, 
        ST_Y(STORE_LOCATION_GEOMETRY) AS LATITUDE,
        STORE_LOCATION_1MILE_DELIVERY_RADIUS, 
        STORE_LOCATION_2MILE_DELIVERY_RADIUS
    FROM {db}.{schema}.{view_name}
"""

In [26]:
import geopandas as gpd
from shapely.geometry import shape
df = new_session.sql(query).to_pandas()

gpd_df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.LONGITUDE, df.LATITUDE))

# Convert delivery radius columns to polygons
gpd_df['STORE_LOCATION_1MILE_DELIVERY_RADIUS'] = gpd_df['STORE_LOCATION_1MILE_DELIVERY_RADIUS'].apply(lambda x: shape(eval(x)))
gpd_df['STORE_LOCATION_2MILE_DELIVERY_RADIUS'] = gpd_df['STORE_LOCATION_2MILE_DELIVERY_RADIUS'].apply(lambda x: shape(eval(x)))

In [27]:
gpd_df

Unnamed: 0,STORE_ID,LONGITUDE,LATITUDE,STORE_LOCATION_1MILE_DELIVERY_RADIUS,STORE_LOCATION_2MILE_DELIVERY_RADIUS,geometry
0,81670,-96.86502,33.21178,"MULTIPOLYGON (((-96.84897 33.21178, -96.849278...","MULTIPOLYGON (((-96.83292 33.21178, -96.833536...",POINT (-96.86502 33.21178)
1,39630,-96.75526,33.16184,"MULTIPOLYGON (((-96.73921 33.16184, -96.739518...","MULTIPOLYGON (((-96.72316000000001 33.16184, -...",POINT (-96.75526 33.16184)
2,43273,-96.82455,33.12194,"MULTIPOLYGON (((-96.8085 33.12194, -96.8088083...","MULTIPOLYGON (((-96.79245 33.12194, -96.793066...",POINT (-96.82455 33.12194)
3,88102,-96.81952,33.08763,"MULTIPOLYGON (((-96.80346999999999 33.08763, -...","MULTIPOLYGON (((-96.78742 33.08763, -96.788036...",POINT (-96.81952 33.08763)
4,27394,-96.79401,33.07763,"MULTIPOLYGON (((-96.77796 33.07763, -96.778268...","MULTIPOLYGON (((-96.76191 33.07763, -96.762526...",POINT (-96.79401 33.07763)
5,27005,-96.76022,33.19113,"MULTIPOLYGON (((-96.74417 33.19113, -96.744478...","MULTIPOLYGON (((-96.72812 33.19113, -96.728736...",POINT (-96.76022 33.19113)
6,83895,-96.7081,33.11404,"MULTIPOLYGON (((-96.69205 33.11404, -96.692358...","MULTIPOLYGON (((-96.676 33.11404, -96.67661679...",POINT (-96.7081 33.11404)
7,31205,-96.68787,33.17873,"MULTIPOLYGON (((-96.67182 33.17873, -96.672128...","MULTIPOLYGON (((-96.65577 33.17873, -96.656386...",POINT (-96.68787 33.17873)
8,72317,-96.91058,33.06567,"MULTIPOLYGON (((-96.89452999999999 33.06567, -...","MULTIPOLYGON (((-96.87848 33.06567, -96.879096...",POINT (-96.91058 33.06567)
9,32977,-96.76396,33.12521,"MULTIPOLYGON (((-96.74790999999999 33.12521, -...","MULTIPOLYGON (((-96.73186 33.12521, -96.732476...",POINT (-96.76396 33.12521)


In [28]:
import folium

# Create a map centered around the average latitude and longitude
map_center = [df['LATITUDE'].mean(), df['LONGITUDE'].mean()]
mymap = folium.Map(location=map_center, zoom_start=12, tiles="cartodb-darkmatter")

# Add store locations as markers
for _, row in gpd_df.iterrows():
    folium.Marker(
        location=[row['LATITUDE'], row['LONGITUDE']],
        popup=f"Store ID: {row['STORE_ID']}"
    ).add_to(mymap)

# Add delivery radius polygons
for _, row in gpd_df.iterrows():
    folium.GeoJson(
        row['STORE_LOCATION_1MILE_DELIVERY_RADIUS'],
        name=f"1 Mile Radius - Store {row['STORE_ID']}",
        style_function=lambda x: {'color': 'green', 'weight': 1, 'fillOpacity': 0.2}
    ).add_to(mymap)
    folium.GeoJson(
        row['STORE_LOCATION_2MILE_DELIVERY_RADIUS'],
        name=f"2 Mile Radius - Store {row['STORE_ID']}",
        style_function=lambda x: {'color': 'orange', 'weight': 1, 'fillOpacity': 0.2}
    ).add_to(mymap)

# Display the map
mymap

In [30]:
mymap.save("../data/store_delivery_radius_map.html")
new_session.close()