# Address Search Analysis:

#### 1) Longitude/latitude search
#### 2) Number of rides taken to and from location this year and by month
#### 3) Top zip code zones rides are being taken to or from this year and by month
#### 4) Passenger count breakdown this year and by month
#### 5) Average ride length this year and by month
#### 6) Average ride fare this year and by month

In [1]:
import pandas as pd
import numpy as np
import mongoengine as me
from datetime import datetime
import requests
import dateutil
import googlemaps
from bokeh.charts import Histogram, show
from bokeh.models import GeoJSONDataSource
from bokeh.plotting import figure

In [2]:
# mongoengine connection
me.connect('cruz-dev', host='localhost', port=27017)

# google api client
google_api_keys = {
    "surya": "AIzaSyAU2gGkynk36LibmjTwLKOKMHVTRKIM87k",
    "graham": "AIzaSyBRcJ-Oj88gvz0LWNaCKg42K0K9SQIFpfs"}
gmaps = googlemaps.Client(key=google_api_keys["surya"])

In [3]:
class Ride(me.Document):
    pickup_datetime = me.DateTimeField()
    dropoff_datetime = me.DateTimeField()
    pickup_zipcode = me.IntField()
    pickup_borough = me.StringField()
    pickup_county = me.StringField()
    pickup_long_lat = me.PointField()
    dropoff_zipcode = me.IntField()
    dropoff_borough = me.StringField()
    dropoff_county = me.StringField()
    dropoff_long_lat = me.PointField()
    total_amount = me.FloatField()
    fare_amount = me.FloatField()
    tip_amount = me.FloatField()
    passenger_count = me.IntField()
    trip_distance = me.FloatField()
    
    @me.queryset_manager
    def pickups_nearby(doc_cls, queryset, long, lat, distance):
        return queryset.filter(pickup_long_lat__near = [long, lat], pickup_long_lat__max_distance=distance).order_by('-pickup_datetime')
        
    @me.queryset_manager
    def dropoffs_nearby(doc_cls, queryset, long, lat, distance):
        return queryset.filter(dropoff_long_lat__near = [long, lat], dropoff_long_lat__max_distance=distance).order_by('-dropoff_datetime')
    
    def to_json(self):
        response = {"pickup_datetime": self.pickup_datetime, 
                    "dropoff_datetime":  self.dropoff_datetime,
                    "pickup_zipcode":  self.pickup_zipcode,
                    "pickup_borough":  self.pickup_borough,
                    "pickup_county":  self.pickup_county,
                    "pickup_long_lat":  self.pickup_long_lat['coordinates'],
                    "dropoff_zipcode":  self.dropoff_zipcode,
                    "dropoff_borough":  self.dropoff_borough,
                    "dropoff_county":  self.dropoff_county,
                    "dropoff_long_lat":  self.dropoff_long_lat['coordinates'],
                    "total_amount":  self.total_amount,
                    "fare_amount":  self.fare_amount,
                    "tip_amount":  self.tip_amount,
                    "passenger_count":  self.passenger_count,
                    "trip_distance":  self.trip_distance
                   }
        return response
    
    def to_series(self):
        response = pd.Series(
            {"pickup_datetime": self.pickup_datetime, 
             "dropoff_datetime":  self.dropoff_datetime,
             "pickup_zipcode":  self.pickup_zipcode,
             "pickup_borough":  self.pickup_borough,
             "pickup_county":  self.pickup_county,
             "pickup_long_lat":  self.pickup_long_lat['coordinates'],
             "dropoff_zipcode":  self.dropoff_zipcode,
             "dropoff_borough":  self.dropoff_borough,
             "dropoff_county":  self.dropoff_county,
             "dropoff_long_lat":  self.dropoff_long_lat['coordinates'],
             "total_amount":  self.total_amount,
             "fare_amount":  self.fare_amount,
             "tip_amount":  self.tip_amount,
             "passenger_count":  self.passenger_count,
             "trip_distance":  self.trip_distance
            })     
        return response
    
    meta = {
        'indexes': [[("pickup_long_lat", "2dsphere"), ("pickup_datetime", 1)],
                    [("dropoff_long_lat", "2dsphere"), ("dropoff_datetime", 1)],
                    [("pickup_datetime", 1), ("pickup_borough", 1), ("pickup_zipcode", 1)],
                    [("dropoff_datetime", 1), ("dropoff_borough", 1), ("dropoff_zipcode", 1)],
                    [("pickup_datetime", 1), ("pickup_long_lat", "2dsphere")],
                    [("dropoff_datetime", 1), ("dropoff_long_lat", "2dsphere")]],
        'collection': 'rides_15'
    }

In [4]:
# 1) Longitude/latitude search

def long_lat_from_address(address):
    response = {}
    try:
        response["value"] = gmaps.geocode(address)[0]['geometry']['location']
        response["error"] = "None"
    except:
        response["error"] = "Long Lat Not Found"
    return response

In [6]:
address = "PHD"
long_lat_response = long_lat_from_address(address)
if long_lat_response["error"] == "None":
    rides = Ride.pickups_nearby(long_lat_response["value"]["lng"], long_lat_response["value"]["lat"] , 100)
    rides_df = pd.DataFrame([ride.to_series() for ride in rides])
    pickup_frequency = Histogram(rides_df, 'pickup_month', title="Number of Pickups by Month for {0}".format(address))
    show(pickup_frequency)
    top_dropoff_zones = Histogram(rides_df, 'dropoff_zipcode', bins=10, title="Top Dropoff Zones for {0}".format(address))
    show(top_dropoff_zones)
    
    



In [None]:

zipcode_geojson_url = "http://catalog.civicdashboards.com/dataset/11fd957a-8885-42ef-aa49-5c879ec93fac/resource/28377e88-8a50-428f-807c-40ba1f09159b/download/nyc-zip-code-tabulation-areas-polygons.geojson"
zipcode_geojson = str(requests.get(zipcode_geojson_url).json())

geo_source = GeoJSONDataSource(geojson=zipcode_geojson)

p = figure()
p.circle(x='x', y='y', alpha=0.9, source=geo_source)

show(p)



In [None]:
from bokeh.io import show
from bokeh.models import (
    ColumnDataSource,
    HoverTool,
    LogColorMapper
)
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure

from bokeh.sampledata.us_counties import data as counties
from bokeh.sampledata.unemployment import data as unemployment

palette.reverse()

counties = {
    code: county for code, county in counties.items() if county["state"] == "tx"
}

county_xs = [county["lons"] for county in counties.values()]
county_ys = [county["lats"] for county in counties.values()]

county_names = [county['name'] for county in counties.values()]
county_rates = [unemployment[county_id] for county_id in counties]
color_mapper = LogColorMapper(palette=palette)

source = ColumnDataSource(data=dict(
    x=county_xs,
    y=county_ys,
    name=county_names,
    rate=county_rates,
))

TOOLS = "pan,wheel_zoom,box_zoom,reset,hover,save"

p = figure(
    title="Texas Unemployment, 2009", tools=TOOLS,
    x_axis_location=None, y_axis_location=None
)
p.grid.grid_line_color = None

p.patches('x', 'y', source=source,
          fill_color={'field': 'rate', 'transform': color_mapper},
          fill_alpha=0.7, line_color="white", line_width=0.5)

hover = p.select_one(HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = [
    ("Name", "@name"),
    ("Unemployment rate)", "@rate%"),
    ("(Long, Lat)", "($x, $y)"),
]

show(p)

In [None]:
counties.items()

In [None]:
from bokeh.sampledata.us_counties import data as counties

In [None]:
counties