In [2]:
import pandas as pd
pd.set_option('display.max_colwidth', 0)
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import warnings
warnings.filterwarnings('ignore')
pd.options.display.float_format = '{:.5f}'.format
import matplotlib.pyplot as plt

# Venue polygon and shuttle points

In [59]:
%%time

# Create a polygon for event venue
from shapely.geometry import Polygon
import geopandas as gpd

lat_point_list = [30.131962, 30.146337, 30.140386, 30.122629]
lon_point_list = [-97.647388, -97.635686, -97.619652, -97.631465]
polygon_geom = Polygon(zip(lon_point_list, lat_point_list))
gdf_venue = gpd.GeoDataFrame(index=[0], crs='epsg:4326', geometry=[polygon_geom])       
gdf_venue['name']  = 'the Circuit of the Americas'
gdf_venue = gdf_venue.to_crs("EPSG:3857") 
gdf_venue
gdf_venue

CPU times: user 118 ms, sys: 4.01 ms, total: 122 ms
Wall time: 219 ms


Unnamed: 0,geometry,name
0,"POLYGON ((-10870057.509 3520523.631, -10868754.849 3522374.003, -10866969.952 3521607.949, -10868284.969 3519322.417, -10870057.509 3520523.631))",the Circuit of the Americas


In [18]:
import shapely.geometry as geom

# Create Shapely point geometries
point_waterloo_park = geom.Point(-97.736285, 30.273726) # (longitude, latitude)
point_barton_creek_square = geom.Point(-97.805046, 30.257509)
point_expo_center = geom.Point(-97.622544, 30.297062)
point_ridehailing = geom.Point(-97.614135, 30.178718)

point_shuttle_list = [point_waterloo_park,point_barton_creek_square,point_expo_center,point_ridehailing]
gdf_shuttle = gpd.GeoDataFrame({'Shuttle_Location': ['Shuttle_Waterloo_Park', 'Shuttle_Barton_Creek_Square', 'Shuttle_Expo_Center', 'Uber_DelValle_HighSchool'],
                        'geometry': point_shuttle_list},
                        crs="EPSG:4326")
gdf_shuttle = gdf_shuttle.to_crs("EPSG:3857") 
gdf_shuttle

Unnamed: 0,Shuttle_Location,geometry
0,Shuttle_Waterloo_Park,POINT (-10879953.478 3538783.517)
1,Shuttle_Barton_Creek_Square,POINT (-10887607.918 3536693.356)
2,Shuttle_Expo_Center,POINT (-10867291.888 3541791.831)
3,Uber_DelValle_HighSchool,POINT (-10866355.802 3526543.124)


# Road network data

In [16]:
df_road = pd.read_csv("data/TMC_Identification.csv", sep=',', header=0)
df_road = df_road[['tmc','intersection','start_latitude','start_longitude','end_latitude','end_longitude','miles']]
print('total number of road segments:',df_road.shape[0])
df_road.head(3)

total number of road segments: 4460


Unnamed: 0,tmc,intersection,start_latitude,start_longitude,end_latitude,end_longitude,miles
0,112P13033,W MARTIN LUTHER KING JR BLVD,30.28172,-97.74193,30.28232,-97.74207,0.04249
1,112+08908,FM-20,30.11066,-97.41049,30.11082,-97.37323,2.22783
2,112+16538,GATTIS SCHOOL RD,30.49057,-97.67514,30.49351,-97.67573,0.20831


In [17]:
%%time
from shapely.geometry import LineString
from geopandas import GeoDataFrame

df_road['geometry'] = df_road.apply(
    lambda row: LineString([(row['start_longitude'], row['start_latitude']),
                             (row['end_longitude'], row['end_latitude'])]),
    axis=1
)

# Create a GeoDataFrame for road network
gdf_road = GeoDataFrame(df_road, geometry='geometry',crs="EPSG:4326")
gdf_road = gdf_road.to_crs("EPSG:3857") 

CPU times: user 190 ms, sys: 545 µs, total: 191 ms
Wall time: 189 ms


## Airbnb data

In [20]:
df_airbnb = pd.read_csv("data/airbnb_listings_202312.csv", sep=',', header=0)
print('total number of airbnb:',df_airbnb.shape[0])
# df_airbnb.head(2)  

#Converting Pandas DataFrame to GeoDataFrame
from shapely.geometry import Point
from geopandas import GeoDataFrame

geometry = [Point(xy) for xy in zip(df_airbnb.longitude, df_airbnb.latitude)]
gdf_airbnb = GeoDataFrame(df_airbnb, crs="EPSG:4326", geometry=geometry)
gdf_airbnb = gdf_airbnb.to_crs("EPSG:3857") 

total number of airbnb: 15419


# Feature engineering 
Dynamic features: event timing, attendance size, proximity to the event venue, proximity to shuttle services, hotel occupancy rates

Static features: population density, hotel density

In [33]:
# gdf_airbnb.head(20).explore()
gdf_airbnb_need = gdf_airbnb[['id','geometry']]
gdf_airbnb_need['airbnb_count'] = 1

In [151]:
%%time
# spatial join airbnb locations and road segments

buffer_distance = 500  # Buffer distance in meters
gdf_polyline_buffered = gdf_road.copy()
# gdf_polyline_buffered = gdf_road.head(100).copy()
gdf_polyline_buffered["geometry"] = gdf_polyline_buffered.geometry.buffer(buffer_distance)

# gdf_road_w_airbnb = gpd.sjoin(gdf_airbnb, gdf_road, predicate="intersects")
gdf_road_w_airbnb = gpd.sjoin(gdf_polyline_buffered, gdf_airbnb_need, predicate="intersects", how="left")
print(gdf_road_w_airbnb.tmc.unique().shape[0])
point_counts = gdf_road_w_airbnb.groupby("tmc").airbnb_count.count().reset_index()
gdf_road_merged = gdf_road.merge(point_counts, how='left')
gdf_road_merged = gdf_road_merged.fillna(0)

4460
CPU times: user 340 ms, sys: 6.44 ms, total: 346 ms
Wall time: 344 ms


In [104]:
# gdf_road_merged.explore()

## Distance to venue and shuttles

In [153]:
def compute_midpoint(linestring):
    coords = list(linestring.coords)
    avg_x = sum([coord[0] for coord in coords]) / len(coords)
    avg_y = sum([coord[1] for coord in coords]) / len(coords)
    return Point(avg_x, avg_y)

gdf_road_merged["midpoint"] = gdf_road_merged["geometry"].apply(compute_midpoint)

In [152]:
gdf_road_merged['Shuttle_Waterloo_Park'] = gdf_shuttle[gdf_shuttle['Shuttle_Location']=='Shuttle_Waterloo_Park'].geometry.iloc[0]
gdf_road_merged['Shuttle_Barton_Creek_Square'] = gdf_shuttle[gdf_shuttle['Shuttle_Location']=='Shuttle_Barton_Creek_Square'].geometry.iloc[0]
gdf_road_merged['Shuttle_Expo_Center'] = gdf_shuttle[gdf_shuttle['Shuttle_Location']=='Shuttle_Expo_Center'].geometry.iloc[0]
gdf_road_merged['Uber_DelValle_HighSchool'] = gdf_shuttle[gdf_shuttle['Shuttle_Location']=='Uber_DelValle_HighSchool'].geometry.iloc[0]


gdf_road_merged['Shuttle_Waterloo_Park'] = gpd.GeoSeries.from_wkt(gdf_road_merged['Shuttle_Waterloo_Park'].astype(str))
gdf_road_merged["distance_to_Shuttle_Waterloo_Park"] = gdf_road_merged["midpoint"].distance(gdf_road_merged["Shuttle_Waterloo_Park"])

gdf_road_merged['Shuttle_Barton_Creek_Square'] = gpd.GeoSeries.from_wkt(gdf_road_merged['Shuttle_Barton_Creek_Square'].astype(str))
gdf_road_merged["distance_to_Shuttle_Barton_Creek_Square"] = gdf_road_merged["midpoint"].distance(gdf_road_merged["Shuttle_Barton_Creek_Square"])

gdf_road_merged['Shuttle_Expo_Center'] = gpd.GeoSeries.from_wkt(gdf_road_merged['Shuttle_Expo_Center'].astype(str))
gdf_road_merged["distance_to_Shuttle_Expo_Center"] = gdf_road_merged["midpoint"].distance(gdf_road_merged["Shuttle_Expo_Center"])

gdf_road_merged['Uber_DelValle_HighSchool'] = gpd.GeoSeries.from_wkt(gdf_road_merged['Uber_DelValle_HighSchool'].astype(str))
gdf_road_merged["distance_to_Uber_DelValle_HighSchool"] = gdf_road_merged["midpoint"].distance(gdf_road_merged["Uber_DelValle_HighSchool"])

In [175]:
# Distance to venue centroid
venue_centroid = gdf_venue.iloc[0].geometry.centroid
gdf_road_merged["venue_centroid"] = gdf_road_merged.apply(lambda x: venue_centroid, axis=1)
gdf_road_merged["distance_to_venue"] = gdf_road_merged["midpoint"].distance(gdf_road_merged["venue_centroid"])

# Visualizing module

In [54]:
%%time
# congestion_colors = ["#00FF00", "#ADFF2F", "#FFFF00", "#FFA500", "#FF0000", "#8B0000"]

m = gdf_road_merged.explore(
    column='airbnb_count',
    cmap= "rainbow", #"rainbow",
    tiles="CartoDB dark_matter", # OpenStreetMap, CartoDB dark_matter
    categorical=False
)

m = gdf_shuttle.explore(
    m=m,
    markersize=40,
    linewidth=2,
    edgecolor="black",
)

CPU times: user 541 ms, sys: 0 ns, total: 541 ms
Wall time: 581 ms


In [55]:
%%time
gdf_venue.explore(
    m=m
)

CPU times: user 118 ms, sys: 9.09 ms, total: 127 ms
Wall time: 125 ms
