Import Libraries

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from PIL import Image, ImageDraw , ImageFont
import folium
%matplotlib inline
from folium import plugins
import os
import io
os.environ["PATH"] += os.pathsep + 'phantomjs'

Load Data

In [2]:
taxi_data_with_all_info=pd.read_csv('taxi_data_with_all_info.csv')

In [3]:
taxi_data_with_all_info=taxi_data_with_all_info.rename(index=str, columns={"pickup_neighboorhood": "pickup_neighborhood"})

Generate a unique id for each pickup and dropoff neighborhood

In [4]:
taxi_data_with_all_info['pickup_neighborhood'] = taxi_data_with_all_info['pickup_neighborhood'].astype('category')
taxi_data_with_all_info['pickup_neighborhood_id']=taxi_data_with_all_info['pickup_neighborhood'].cat.codes

In [5]:
taxi_data_with_all_info['dropoff_neighborhood'] = taxi_data_with_all_info['dropoff_neighborhood'].astype('category')
taxi_data_with_all_info['dropoff_neighborhood_id']=taxi_data_with_all_info['dropoff_neighborhood'].cat.codes

Define a function to count trips at each departure and arrival neighborhoods

In [6]:
def get_trip_counts_by_hour_pickup(selected_hour):

    locations = taxi_data_with_all_info.groupby("pickup_neighborhood_id").first()
    locations = locations[["pickup_latitude","pickup_longitude","pickup_neighborhood"]]
    
    #select pickup time
    subset = taxi_data_with_all_info[taxi_data_with_all_info["pickup_hour"]==selected_hour]
    
    # count trips for each pickup neighborhood
    departure_counts =  subset.groupby("pickup_neighborhood_id").count()
    departure_counts = departure_counts.iloc[:,[0]]
    departure_counts.columns= ["Departure Count"]
    
    trip_counts = departure_counts.join(locations)
    return trip_counts

# print a sample
get_trip_counts_by_hour_pickup(3).head()

Unnamed: 0_level_0,Departure Count,pickup_latitude,pickup_longitude,pickup_neighborhood
pickup_neighborhood_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,246,40.774471,-73.913033,Astoria
2,1,40.768295,-73.89949,Astoria Heights
4,1,40.599155,-73.989265,Bath Beach
5,28,40.711201,-74.016022,Battery Park
6,2,40.624229,-74.030792,Bay Ridge


Define a function to interpolate pickup trip count between hours 

In [7]:
def interpolate(df1, df2, x):
    """return a weighted average of two dataframes"""
    df = df1 * (1 - x) + df2 * x
    return df.replace(np.nan, 0)
  

def get_trip_counts_by_minute_pickup(float_hour, data):
    """get an interpolated dataframe for any time, based
    on hourly data"""
    
    columns = ["pickup_latitude",
               "pickup_longitude",
               "Departure Count"]
               
    df1 = get_trip_counts_by_hour_pickup(int(float_hour))
    df2 = get_trip_counts_by_hour_pickup(int(float_hour) + 1)
    
    df = interpolate(df1.loc[:,columns], 
                     df2.loc[:,columns], 
                     float_hour % 1)
    
    df["pickup_neighborhood"] = df1["pickup_neighborhood"]
    return df

Make a plotting function which takes in counts data from the last function and a boolean based on day and night

In [18]:
def plot_counts_pickup(trip_counts,day):
    # generate a new map
    loc=[40.7245, -73.9419]
    zs=11.5
    w='80%'
    
    if day:
        
        folium_map = folium.Map(location=loc,
                            zoom_start=zs,
                            tiles="cartodbpositron",
                            width=w)
    else:
        
        folium_map = folium.Map(location=loc,
                            zoom_start=zs,
                            tiles="CartoDB dark_matter",
                            width=w)

    # for each row in the data, add a cicle marker
    for index, row in trip_counts.iterrows():
        #departures
        net_departures = (row["Departure Count"])
   
        # radius of circles
        radius = net_departures/250 
            
        # add marker to the map
        folium.CircleMarker(location=(row["pickup_latitude"],
                                      row["pickup_longitude"]),
                            radius=radius,
                            color="#ff0000",
                            fill=True).add_to(folium_map)
        
    return folium_map

Test the plotting and interpolation function

In [19]:
data = get_trip_counts_by_minute_pickup(9.5, taxi_data_with_all_info)
plot_counts_pickup(data,False)

Define a function for generating and saving an image based on an hour input value and a save path.

In [10]:
def frame_pickup(i, hour_of_day, save_path):
    
    # create the map object
    data = get_trip_counts_by_minute_pickup(hour_of_day, taxi_data_with_all_info)
    my_frame = plot_counts_pickup(data,hour_of_day<17)
    
    # generate the png file as a byte array
    png = my_frame._to_png()
    
    #  now add a caption to the image to indicate the time-of-day.
    hour = int(hour_of_day)
    minutes = int((hour_of_day % 1) * 60)
    
    # create a PIL image object
    image = Image.open(io.BytesIO(png))
    draw = ImageDraw.ImageDraw(image)
    
    # load a font
    font = ImageFont.truetype("Montserrat-Regular.ttf",30)
    
    # draw time of day text
    draw.text((20,image.height - 50), 
              "time: {:0>2}:{:0>2}h".format(hour, minutes),
              fill=(255, 0, 0), 
              font=font)
    
    # draw title
    draw.text((image.width - 400,20), 
              "Pickups vs Time of Day",
              fill=(255, 0, 0), 
              font=font)
    
    # write to a png file
    filename = os.path.join(save_path, "frame_{:0>5}.png".format(i))
    image.save(filename, "PNG")
    return image

Repeat for drop-off Data

In [11]:
def get_trip_counts_by_hour_dropoff(selected_hour):
    # make a DataFrame with locations for each bike station
    locations = taxi_data_with_all_info.groupby("dropoff_neighborhood_id").first()
    locations = locations[["dropoff_latitude","dropoff_longitude","dropoff_neighborhood"]]
    
    #select one time of day
    subset = taxi_data_with_all_info[taxi_data_with_all_info["dropoff_hour"]==selected_hour]
    
    # count trips for each destination
    arrival_counts =  subset.groupby("dropoff_neighborhood_id").count()
    arrival_counts = arrival_counts.iloc[:,[0]]
    arrival_counts.columns= ["Arrival Count"]
    

    #join departure counts, arrival counts, and locations
    trip_counts = arrival_counts.join(locations)
    return trip_counts

# print a sample to check our code works
get_trip_counts_by_hour_dropoff(6).head()

Unnamed: 0_level_0,Arrival Count,dropoff_latitude,dropoff_longitude,dropoff_neighborhood
dropoff_neighborhood_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4,2,40.587467,-73.802635,Arverne
5,284,40.765919,-73.918854,Astoria
6,2,40.767582,-73.889427,Astoria Heights
7,3,40.76157,-73.792847,Auburndale
8,3,40.599152,-73.98922,Bath Beach


In [12]:
def get_trip_counts_by_minute_dropoff(float_hour, data):
    """get an interpolated dataframe for any time, based
    on hourly data"""
    
    columns = ["dropoff_latitude",
               "dropoff_longitude",
               "Arrival Count",]
               
    df1 = get_trip_counts_by_hour_dropoff(int(float_hour))
    df2 = get_trip_counts_by_hour_dropoff(int(float_hour) + 1)
    
    df = interpolate(df1.loc[:,columns], 
                     df2.loc[:,columns], 
                     float_hour % 1)
    
    df["dropoff_neighborhood"] = df1["dropoff_neighborhood"]
    return df

In [20]:
def plot_counts_dropoff(trip_counts,day):
    # generate a new map
    
    loc=[40.7245, -73.9419]
    zs=11.5
    w='100%'
    
    if day:
        
        folium_map = folium.Map(location=loc,
                            zoom_start=zs,
                            tiles="cartodbpositron",
                            width=w)
    else:
        
        folium_map = folium.Map(location=loc,
                            zoom_start=zs,
                            tiles="CartoDB dark_matter",
                            width=w)

    # for each row in the data, add a cicle marker
    for index, row in trip_counts.iterrows():
        
        net_arrivals = (row["Arrival Count"])

        
        # radius of circles
        radius = net_arrivals/250
        
       
        # add marker to the map
        folium.CircleMarker(location=(row["dropoff_latitude"],
                                      row["dropoff_longitude"]),
                            radius=radius,
                            color="#00FF00",
                            fill=True).add_to(folium_map)
    

    return folium_map

In [21]:
data = get_trip_counts_by_minute_dropoff(9.5, taxi_data_with_all_info)
plot_counts_dropoff(data,False)

In [15]:
def frame_dropoff(i, hour_of_day, save_path):
    
    # create the map object
    data = get_trip_counts_by_minute_dropoff(hour_of_day, taxi_data_with_all_info)
    my_frame = plot_counts_dropoff(data,hour_of_day<17)
    
    # generate the png file as a byte array
    png = my_frame._to_png()
    
    #  now add a caption to the image to indicate the time-of-day.
    hour = int(hour_of_day)
    minutes = int((hour_of_day % 1) * 60)
    
    # create a PIL image object
    image = Image.open(io.BytesIO(png))
    draw = ImageDraw.ImageDraw(image)
    
    # load a font
    font = ImageFont.truetype("Montserrat-Regular.ttf",30)
    
    # draw time of day text
    draw.text((20,image.height - 50), 
              "time: {:0>2}:{:0>2}h".format(hour, minutes),
              fill=(0,255,0), 
              font=font)
    
    # draw title
    draw.text((image.width - 400,20), 
              "Drop-offs vs Time of Day",
              fill=(0,255,0), 
              font=font)
    
    # write to a png file
    filename = os.path.join(save_path, "frame_{:0>5}.png".format(i))
    image.save(filename, "PNG")
    return image

In [None]:
times  = np.arange(5, 24, 0.5)
for i,hour in enumerate(times):
        frame_pickup(i, hour, "frames_pickup_Manhattan")
        frame_dropoff(i, hour, "frames_dropoff_Manhattan")

Generate a time-dependent heatmap based on pickup data

In [None]:
map_new = folium.Map(location=[40.7245, -73.9419],
                    zoom_start = 11.5) 

# List comprehension to make out list of lists
heat_data = [[[row['pickup_latitude'],row['pickup_longitude']] for index, row in taxi_data_with_all_info[taxi_data_with_all_info['pickup_hour'] == i].iterrows()] for i in range(5,24)]

# Plot it on the map
hm = plugins.HeatMapWithTime(heat_data,auto_play=True,max_opacity=0.5)
hm.add_to(map_new)

Save the heatmap to file

In [None]:
map_new.save("map_pickup.html")