In [None]:
import geopandas as gpd

# Load data set

taxi_zones_gpd = gpd.read_file("../../data/raw/taxi_zones")
taxi_zones_gpd.head()

In [None]:
# Check the coordinate reference system (CRS) of taxi_zones_gpd
print(taxi_zones_gpd.crs)

In [None]:
# Convert the CRS to WGS84 (EPSG:4326) if it's not already in that CRS
taxi_zones_wgs84 = taxi_zones_gpd.to_crs(epsg=4326)
taxi_zones_wgs84.head()

## Analysing Busiest Pickup Zones of Uber ! 

In [None]:
import pandas as pd

uber_transactions_df = pd.read_csv('../../data/raw/uber-raw-data-sep14.csv')
uber_transactions_df.head()

In [None]:
# Construct a GeoDataFrame from the Uber transactions DataFrame
from shapely.geometry import Point
# Create a geometry data from the pickup longitude and latitude  
geometry = [Point(xy) for xy in zip(uber_transactions_df['Lon'] , uber_transactions_df["Lat"])]

In [None]:
geometry[0:5]

In [None]:
# Create a GeoDataFrame from the Uber transactions DataFrame
uber_transactions_gpd = gpd.GeoDataFrame(uber_transactions_df, geometry=geometry, crs='EPSG:4326')
uber_transactions_gpd.head()

In [None]:
# Check taxi zones columns
taxi_zones_wgs84.columns

In [None]:
# Spatial join to find out which taxi zone each Uber transaction belongs to
uber_transactions_with_zones = gpd.sjoin(uber_transactions_gpd,
                                          taxi_zones_wgs84[['zone', 'LocationID', 'borough','geometry']],
                                            how='left',
                                              predicate='within')
uber_transactions_with_zones.head()

In [None]:
uber_transactions_with_zones['zone'].value_counts().head(15)

In [None]:
uber_transactions_with_zones['zone'].value_counts().index

## Finding Top pick up zone

In [None]:
top_zones = uber_transactions_with_zones['zone'].value_counts()[:15]
top_zones

In [None]:
# Construct summary table for top 15 zones to dataframe
top_zones_df = top_zones.reset_index()
top_zones_df.columns = ['zone', 'count']
top_zones_df.head()

In [None]:
top_zones_gpd = taxi_zones_wgs84.merge(top_zones_df, left_on='zone', right_on='zone', how='inner')
top_zones_gpd.head()

In [None]:
top_zones_gpd_2263 = top_zones_gpd.to_crs(epsg=2263)

In [None]:
top_zones_gpd_2263

In [None]:
top_zones_gpd_2263['centroid'] = top_zones_gpd_2263['geometry'].centroid
top_zones_gpd_2263.head()

In [None]:
# convert to WGS84 coordinate system for visualization
temp_gpd = top_zones_gpd_2263.copy()
temp_gpd['geometry'] = temp_gpd['geometry'].to_crs(epsg=4326)
temp_gpd['centroid'] = temp_gpd['centroid'].to_crs(epsg=4326)
top_zones_gpd_4326 = temp_gpd
top_zones_gpd_4326.head()

In [None]:
# Geospatial visualization using Folium
import folium

base_map = folium.Map(location = [40.75 , -73.97] , zoom_start= 12, tiles = 'cartodbpositron')

In [None]:
for index , row in top_zones_gpd_4326.iterrows():
    folium.CircleMarker(
    location = [row["centroid"].y , row["centroid"].x] , 
        radius = row["count"]/2000 , 
        color = "crimson" , 
        fill = True , 
        fill_opacity = 0.6 , 
        tooltip = f"{row['zone']} : {int(row['count'])} pickups"
    ).add_to(base_map)

In [None]:
from IPython.display import display
display(base_map)

## Create Marker cluster for analysis

In [None]:
uber_transactions_df.head()

In [None]:
# Create bins for Lat_bins and Lon_bins
uber_transactions_df['Lat_bins'] = uber_transactions_df['Lat'].round(3) 
uber_transactions_df['Lon_bins'] = uber_transactions_df['Lon'].round(3) 

In [None]:
rush_uber_bin = uber_transactions_df.groupby(['Lat_bins', 'Lon_bins'], as_index=False).size()
rush_uber_bin.sort_values(by = "size" , ascending= False)

In [None]:
import folium
from folium.plugins import FastMarkerCluster

base_map_cluster = folium.Map(location=[40.75, -73.97], zoom_start=12, tiles='cartodbpositron')
FastMarkerCluster(rush_uber_bin[['Lat_bins', 'Lon_bins', 'size']]).add_to(base_map_cluster)
display(base_map_cluster)

## Uber Demand Over time 

In [None]:
# Create data_time column from the date/time column
uber_transactions_df['data_time'] = pd.to_datetime(uber_transactions_df['Date/Time'])

In [None]:
uber_transactions_df['hour'] = uber_transactions_df['data_time'].dt.hour

In [None]:
# Check results
uber_transactions_df.head()

In [None]:
heatmap_data = []

for h in range(24):
    hour_df = uber_transactions_df[uber_transactions_df["hour"] == h]
    grouped = hour_df.groupby(['Lat_bins' , 'Lon_bins']).size().reset_index(name = "count")
    heatmap_data.append(grouped[['Lat_bins' , 'Lon_bins' , 'count']].values.tolist())

In [None]:
heatmap_data[

In [None]:
import folium
from folium.plugins import HeatMapWithTime
base_map_demand = folium.Map(location = [40.75 , -73.97] , zoom_start = 11, tiles = 'cartodbpositron')

HeatMapWithTime(heatmap_data , 
                index = [f"{h}:00" for h in range(24)] , 
                auto_play = True , 
                max_opacity = 0.7
           ).add_to(base_map_demand)

In [None]:
base_map_demand

In [None]:
# Save the map to an HTML file
base_map_demand.save("../../results/uber_demand_heatmap.html")