In [None]:
# You have to install remove the # sign first

# %pip install folium


In [2]:
# Import necessary libraries
from azureml.opendatasets import NycTlcGreen
from datetime import datetime
from dateutil import parser
import pandas as pd
import folium
from folium.plugins import HeatMap

# Set the date range
end_date = parser.parse('2018-06-06')
start_date = parser.parse('2018-05-01')

# Fetch the dataset
nyc_tlc = NycTlcGreen(start_date=start_date, end_date=end_date)
nyc_tlc_df = nyc_tlc.to_pandas_dataframe()

# Display the columns and basic info to verify the names and data
print(nyc_tlc_df.columns)
nyc_tlc_df.info()

# Ensure necessary columns are present
required_columns = ['pickupLatitude', 'pickupLongitude', 'dropoffLatitude', 'dropoffLongitude']
if not all(col in nyc_tlc_df.columns for col in required_columns):
    print("Required columns are missing. Available columns are:")
    print(nyc_tlc_df.columns)
else:
    # Filter out rows with invalid coordinates
    nyc_tlc_df = nyc_tlc_df[(nyc_tlc_df['pickupLatitude'].between(40, 42)) & 
                            (nyc_tlc_df['pickupLongitude'].between(-75, -72)) &
                            (nyc_tlc_df['dropoffLatitude'].between(40, 42)) &
                            (nyc_tlc_df['dropoffLongitude'].between(-75, -72))]

    # Create a base map
    base_map = folium.Map(location=[40.7128, -74.0060], zoom_start=11)

    # Prepare data for pickup heatmap
    pickup_locations = nyc_tlc_df[['pickupLatitude', 'pickupLongitude']].dropna()
    pickup_locations = pickup_locations.values.tolist()

    # Prepare data for dropoff heatmap
    dropoff_locations = nyc_tlc_df[['dropoffLatitude', 'dropoffLongitude']].dropna()
    dropoff_locations = dropoff_locations.values.tolist()

    # Add pickup heatmap to the base map
    HeatMap(pickup_locations, radius=8, max_zoom=13, name='Pickups').add_to(base_map)

    # Add dropoff heatmap to the base map
    HeatMap(dropoff_locations, radius=8, max_zoom=13, name='Drop-offs').add_to(base_map)

    # Add layer control
    folium.LayerControl().add_to(base_map)

    # Save the map to an HTML file
    base_map.save('nyc_taxi_heatmap.html')

    print("Heatmap has been saved to 'nyc_taxi_heatmap.html'")


{'infer_column_types': 'False', 'activity': 'download'}
{'infer_column_types': 'False', 'activity': 'download', 'activityApp': 'FileDataset'}
[Info] read from /var/folders/m1/v2p5dlc135x8ctp72qfss6200000gn/T/tmpq06drayn/https%3A/%2Fazureopendatastorage.azurefd.net/nyctlc/green/puYear=2018/puMonth=5/part-00087-tid-4753095944193949832-fee7e113-666d-4114-9fcb-bcd3046479f3-2657-1.c000.snappy.parquet
[Info] read from /var/folders/m1/v2p5dlc135x8ctp72qfss6200000gn/T/tmpq06drayn/https%3A/%2Fazureopendatastorage.azurefd.net/nyctlc/green/puYear=2018/puMonth=6/part-00171-tid-4753095944193949832-fee7e113-666d-4114-9fcb-bcd3046479f3-2741-1.c000.snappy.parquet
Index(['vendorID', 'lpepPickupDatetime', 'lpepDropoffDatetime',
       'passengerCount', 'tripDistance', 'puLocationId', 'doLocationId',
       'pickupLongitude', 'pickupLatitude', 'dropoffLongitude',
       'dropoffLatitude', 'rateCodeID', 'storeAndFwdFlag', 'paymentType',
       'fareAmount', 'extra', 'mtaTax', 'improvementSurcharge', 'tipA

In [3]:
# Import necessary libraries
from azureml.opendatasets import NycTlcGreen
from datetime import datetime
from dateutil import parser
import pandas as pd
import folium
from folium.plugins import HeatMap

# Set the date range
end_date = parser.parse('2018-06-06')
start_date = parser.parse('2018-05-01')

# Fetch the dataset
nyc_tlc = NycTlcGreen(start_date=start_date, end_date=end_date)
nyc_tlc_df = nyc_tlc.to_pandas_dataframe()

# Display the columns and basic info to verify the names and data
print(nyc_tlc_df.columns)
nyc_tlc_df.info()

# Ensure necessary columns are present
required_columns = ['pickupLatitude', 'pickupLongitude', 'dropoffLatitude', 'dropoffLongitude']
if not all(col in nyc_tlc_df.columns for col in required_columns):
    print("Required columns are missing. Available columns are:")
    print(nyc_tlc_df.columns)
else:
    # Filter out rows with invalid coordinates
    nyc_tlc_df = nyc_tlc_df[(nyc_tlc_df['pickupLatitude'].between(40, 42)) & 
                            (nyc_tlc_df['pickupLongitude'].between(-75, -72)) &
                            (nyc_tlc_df['dropoffLatitude'].between(40, 42)) &
                            (nyc_tlc_df['dropoffLongitude'].between(-75, -72))]

    # Create a base map
    base_map = folium.Map(location=[40.7128, -74.0060], zoom_start=11)

    # Prepare data for pickup heatmap
    pickup_locations = nyc_tlc_df[['pickupLatitude', 'pickupLongitude']].dropna()
    pickup_locations = pickup_locations.values.tolist()

    # Prepare data for dropoff heatmap
    dropoff_locations = nyc_tlc_df[['dropoffLatitude', 'dropoffLongitude']].dropna()
    dropoff_locations = dropoff_locations.values.tolist()

    # Add pickup heatmap to the base map
    HeatMap(pickup_locations, radius=8, max_zoom=13, name='Pickups').add_to(base_map)

    # Add dropoff heatmap to the base map
    HeatMap(dropoff_locations, radius=8, max_zoom=13, name='Drop-offs').add_to(base_map)

    # Add markers for unique drop-off locations
    unique_dropoff_locations = nyc_tlc_df[['dropoffLatitude', 'dropoffLongitude']].drop_duplicates().dropna()
    for idx, row in unique_dropoff_locations.iterrows():
        folium.Marker([row['dropoffLatitude'], row['dropoffLongitude']], 
                      popup=f"Drop-off Location: ({row['dropoffLatitude']}, {row['dropoffLongitude']})").add_to(base_map)

    # Add layer control
    folium.LayerControl().add_to(base_map)

    # Save the map to an HTML file
    base_map.save('nyc_taxi_heatmap_with_markers.html')

    # Print unique drop-off locations
    unique_dropoff_locations_list = unique_dropoff_locations.values.tolist()
    print("Unique Drop-off Locations:")
    for location in unique_dropoff_locations_list:
        print(location)

    print("Heatmap with markers has been saved to 'nyc_taxi_heatmap_with_markers.html'")


{'infer_column_types': 'False', 'activity': 'download'}
{'infer_column_types': 'False', 'activity': 'download', 'activityApp': 'FileDataset'}
[Info] read from /var/folders/m1/v2p5dlc135x8ctp72qfss6200000gn/T/tmpyxp9pmjh/https%3A/%2Fazureopendatastorage.azurefd.net/nyctlc/green/puYear=2018/puMonth=5/part-00087-tid-4753095944193949832-fee7e113-666d-4114-9fcb-bcd3046479f3-2657-1.c000.snappy.parquet
[Info] read from /var/folders/m1/v2p5dlc135x8ctp72qfss6200000gn/T/tmpyxp9pmjh/https%3A/%2Fazureopendatastorage.azurefd.net/nyctlc/green/puYear=2018/puMonth=6/part-00171-tid-4753095944193949832-fee7e113-666d-4114-9fcb-bcd3046479f3-2741-1.c000.snappy.parquet
Index(['vendorID', 'lpepPickupDatetime', 'lpepDropoffDatetime',
       'passengerCount', 'tripDistance', 'puLocationId', 'doLocationId',
       'pickupLongitude', 'pickupLatitude', 'dropoffLongitude',
       'dropoffLatitude', 'rateCodeID', 'storeAndFwdFlag', 'paymentType',
       'fareAmount', 'extra', 'mtaTax', 'improvementSurcharge', 'tipA

In [4]:
# Import necessary libraries
from azureml.opendatasets import NycTlcGreen
from datetime import datetime
from dateutil import parser
import pandas as pd
import folium
from folium.plugins import HeatMap

# Set the date range
end_date = parser.parse('2018-06-06')
start_date = parser.parse('2018-05-01')

# Fetch the dataset
nyc_tlc = NycTlcGreen(start_date=start_date, end_date=end_date)
nyc_tlc_df = nyc_tlc.to_pandas_dataframe()

# Display the columns and basic info to verify the names and data
print(nyc_tlc_df.columns)
nyc_tlc_df.info()

# Ensure necessary columns are present
required_columns = ['pickupLatitude', 'pickupLongitude', 'dropoffLatitude', 'dropoffLongitude']
if not all(col in nyc_tlc_df.columns for col in required_columns):
    print("Required columns are missing. Available columns are:")
    print(nyc_tlc_df.columns)
else:
    # Filter out rows with invalid coordinates
    nyc_tlc_df = nyc_tlc_df[(nyc_tlc_df['pickupLatitude'].between(40, 42)) & 
                            (nyc_tlc_df['pickupLongitude'].between(-75, -72)) &
                            (nyc_tlc_df['dropoffLatitude'].between(40, 42)) &
                            (nyc_tlc_df['dropoffLongitude'].between(-75, -72))]

    # Create a base map
    base_map = folium.Map(location=[40.7128, -74.0060], zoom_start=11)

    # Prepare data for pickup heatmap
    pickup_locations = nyc_tlc_df[['pickupLatitude', 'pickupLongitude']].dropna()
    pickup_locations = pickup_locations.values.tolist()

    # Prepare data for dropoff heatmap
    dropoff_locations = nyc_tlc_df[['dropoffLatitude', 'dropoffLongitude']].dropna()
    dropoff_locations = dropoff_locations.values.tolist()

    # Add pickup heatmap to the base map
    HeatMap(pickup_locations, radius=8, max_zoom=13, name='Pickups').add_to(base_map)

    # Add dropoff heatmap to the base map
    HeatMap(dropoff_locations, radius=8, max_zoom=13, name='Drop-offs').add_to(base_map)

    # Add markers for unique drop-off locations with red color and larger size
    unique_dropoff_locations = nyc_tlc_df[['dropoffLatitude', 'dropoffLongitude']].drop_duplicates().dropna()
    for idx, row in unique_dropoff_locations.iterrows():
        folium.Marker(
            [row['dropoffLatitude'], row['dropoffLongitude']], 
            popup=f"Drop-off Location: ({row['dropoffLatitude']}, {row['dropoffLongitude']})",
            icon=folium.Icon(color='red', icon='info-sign')
        ).add_to(base_map)

    # Add layer control
    folium.LayerControl().add_to(base_map)

    # Save the map to an HTML file
    base_map.save('nyc_taxi_heatmap_with_red_markers.html')

    # Print unique drop-off locations
    unique_dropoff_locations_list = unique_dropoff_locations.values.tolist()
    print("Unique Drop-off Locations:")
    for location in unique_dropoff_locations_list:
        print(location)

    print("Heatmap with red markers has been saved to 'nyc_taxi_heatmap_with_red_markers.html'")



{'infer_column_types': 'False', 'activity': 'download'}
{'infer_column_types': 'False', 'activity': 'download', 'activityApp': 'FileDataset'}
[Info] read from /var/folders/m1/v2p5dlc135x8ctp72qfss6200000gn/T/tmpo0u13b97/https%3A/%2Fazureopendatastorage.azurefd.net/nyctlc/green/puYear=2018/puMonth=5/part-00087-tid-4753095944193949832-fee7e113-666d-4114-9fcb-bcd3046479f3-2657-1.c000.snappy.parquet
[Info] read from /var/folders/m1/v2p5dlc135x8ctp72qfss6200000gn/T/tmpo0u13b97/https%3A/%2Fazureopendatastorage.azurefd.net/nyctlc/green/puYear=2018/puMonth=6/part-00171-tid-4753095944193949832-fee7e113-666d-4114-9fcb-bcd3046479f3-2741-1.c000.snappy.parquet
Index(['vendorID', 'lpepPickupDatetime', 'lpepDropoffDatetime',
       'passengerCount', 'tripDistance', 'puLocationId', 'doLocationId',
       'pickupLongitude', 'pickupLatitude', 'dropoffLongitude',
       'dropoffLatitude', 'rateCodeID', 'storeAndFwdFlag', 'paymentType',
       'fareAmount', 'extra', 'mtaTax', 'improvementSurcharge', 'tipA

In [5]:
# Import necessary libraries
from azureml.opendatasets import NycTlcGreen
from datetime import datetime
from dateutil import parser
import pandas as pd
import folium
from folium.plugins import HeatMap

# Set the date range
end_date = parser.parse('2018-06-06')
start_date = parser.parse('2018-05-01')

# Fetch the dataset
nyc_tlc = NycTlcGreen(start_date=start_date, end_date=end_date)
nyc_tlc_df = nyc_tlc.to_pandas_dataframe()

# Display the columns and basic info to verify the names and data
print(nyc_tlc_df.columns)
nyc_tlc_df.info()

# Ensure necessary columns are present
required_columns = ['pickupLatitude', 'pickupLongitude', 'dropoffLatitude', 'dropoffLongitude']
if not all(col in nyc_tlc_df.columns for col in required_columns):
    print("Required columns are missing. Available columns are:")
    print(nyc_tlc_df.columns)
else:
    # Filter out rows with invalid coordinates
    nyc_tlc_df = nyc_tlc_df[(nyc_tlc_df['pickupLatitude'].between(40, 42)) & 
                            (nyc_tlc_df['pickupLongitude'].between(-75, -72)) &
                            (nyc_tlc_df['dropoffLatitude'].between(40, 42)) &
                            (nyc_tlc_df['dropoffLongitude'].between(-75, -72))]

    # Create a base map
    base_map = folium.Map(location=[40.7128, -74.0060], zoom_start=11)

    # Prepare data for pickup heatmap
    pickup_locations = nyc_tlc_df[['pickupLatitude', 'pickupLongitude']].dropna()
    pickup_locations = pickup_locations.values.tolist()

    # Prepare data for dropoff heatmap
    dropoff_locations = nyc_tlc_df[['dropoffLatitude', 'dropoffLongitude']].dropna()
    dropoff_locations = dropoff_locations.values.tolist()

    # Add pickup heatmap to the base map
    HeatMap(pickup_locations, radius=8, max_zoom=13, name='Pickups').add_to(base_map)

    # Add dropoff heatmap to the base map
    HeatMap(dropoff_locations, radius=8, max_zoom=13, name='Drop-offs').add_to(base_map)

    # Add markers for unique drop-off locations with larger red icons
    unique_dropoff_locations = nyc_tlc_df[['dropoffLatitude', 'dropoffLongitude']].drop_duplicates().dropna()
    for idx, row in unique_dropoff_locations.iterrows():
        folium.Marker(
            [row['dropoffLatitude'], row['dropoffLongitude']], 
            popup=f"Drop-off Location: ({row['dropoffLatitude']}, {row['dropoffLongitude']})",
            icon=folium.CustomIcon(icon_image='http://maps.google.com/mapfiles/ms/icons/red-dot.png', icon_size=(36, 36))
        ).add_to(base_map)

    # Add layer control
    folium.LayerControl().add_to(base_map)

    # Save the map to an HTML file
    base_map.save('nyc_taxi_heatmap_with_large_red_markers.html')

    # Print unique drop-off locations
    unique_dropoff_locations_list = unique_dropoff_locations.values.tolist()
    print("Unique Drop-off Locations:")
    for location in unique_dropoff_locations_list:
        print(location)

    print("Heatmap with large red markers has been saved to 'nyc_taxi_heatmap_with_large_red_markers.html'")


{'infer_column_types': 'False', 'activity': 'download'}
{'infer_column_types': 'False', 'activity': 'download', 'activityApp': 'FileDataset'}
[Info] read from /var/folders/m1/v2p5dlc135x8ctp72qfss6200000gn/T/tmp4bld5g9c/https%3A/%2Fazureopendatastorage.azurefd.net/nyctlc/green/puYear=2018/puMonth=5/part-00087-tid-4753095944193949832-fee7e113-666d-4114-9fcb-bcd3046479f3-2657-1.c000.snappy.parquet
[Info] read from /var/folders/m1/v2p5dlc135x8ctp72qfss6200000gn/T/tmp4bld5g9c/https%3A/%2Fazureopendatastorage.azurefd.net/nyctlc/green/puYear=2018/puMonth=6/part-00171-tid-4753095944193949832-fee7e113-666d-4114-9fcb-bcd3046479f3-2741-1.c000.snappy.parquet
Index(['vendorID', 'lpepPickupDatetime', 'lpepDropoffDatetime',
       'passengerCount', 'tripDistance', 'puLocationId', 'doLocationId',
       'pickupLongitude', 'pickupLatitude', 'dropoffLongitude',
       'dropoffLatitude', 'rateCodeID', 'storeAndFwdFlag', 'paymentType',
       'fareAmount', 'extra', 'mtaTax', 'improvementSurcharge', 'tipA