In [7]:
import folium
import pandas as pd
import json

# load data
taxi_data = pd.read_csv('../data//yellow/taxi_y_cleaned.csv')

# make sure fare is right format
taxi_data['fare_amount'] = pd.to_numeric(taxi_data['fare_amount'], errors='coerce')

# gather data by pickup location and calculate the average fare
average_fare_data = taxi_data.groupby('pulocationid')['fare_amount'].mean().reset_index()
average_fare_data.columns = ['LocationID', 'Average_Fare']

# load the GeoJSON file
with open('../data/NYC Taxi Zones.geojson') as f:
    nyc_taxi_zones_geojson = json.load(f)

# create the choropleth map
nyc_map = folium.Map(location=[40.7128, -74.0060], zoom_start=11)

folium.Choropleth(
    geo_data=nyc_taxi_zones_geojson,
    name='choropleth',
    data=average_fare_data,
    columns=['LocationID', 'Average_Fare'],
    key_on='feature.properties.location_id',
    fill_color='YlOrRd',  # Choose a color scheme
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Average Fare Amount'
).add_to(nyc_map)

# Display the map
nyc_map.save('../data/nyc_taxi_average_fare_map.html')


In [9]:
# Load the taxi data and taxi zone lookup data
taxi_data = pd.read_csv('../data/yellow/taxi_y_cleaned.csv')
taxi_zone_lookup = pd.read_csv('../data/taxi+_zone_lookup.csv')

# Convert fare_amount to numeric, just in case it's not
taxi_data['fare_amount'] = pd.to_numeric(taxi_data['fare_amount'], errors='coerce')

# Ensure that the column names match for the merging process
taxi_zone_lookup = taxi_zone_lookup.rename(columns={'LocationID': 'locationid'})

# Merge for pickup location using lowercase column names
taxi_data_merged_pickup = pd.merge(
    taxi_data, 
    taxi_zone_lookup, 
    left_on='pulocationid', 
    right_on='locationid', 
    how='left'
)

# Aggregate the pickup data by location ID to get counts
pickup_counts = taxi_data_merged_pickup.groupby('locationid').size().reset_index(name='Pickup_Count')

# Load the GeoJSON file for the NYC Taxi Zones
with open('../data/NYC Taxi Zones.geojson', 'r') as f:
    nyc_taxi_zones_geojson = json.load(f)

# Create a map using Folium centered around New York City
nyc_map = folium.Map(location=[40.7128, -74.0060], zoom_start=11)

# Add the GeoJSON overlay with the pickup counts
folium.Choropleth(
    geo_data=nyc_taxi_zones_geojson,
    name='choropleth',
    data=pickup_counts,
    columns=['locationid', 'Pickup_Count'],
    key_on='feature.properties.location_id',
    fill_color='YlGnBu',  # Color scheme for the choropleth
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Pickup Count'
).add_to(nyc_map)

# Save the map to an HTML file
output_map_file = '../data/nyc_taxi_pickup_map.html'
nyc_map.save(output_map_file)

output_map_file



'../data/nyc_taxi_pickup_map.html'