In [2]:
import datetime
import dateutil.parser
import itertools
import logging
import pickle
import math
import requests
import time
import json

import pymongo
import moves as mvs
import pandas as pd
import numpy as np

In [3]:
client = pymongo.MongoClient('localhost', 27017)
db = client.carbon_calculator

In [4]:
profile = db.users.find_one({'userId': 32734778124657154})
moves = mvs.MovesClient(access_token=profile['user']['access_token'])

### GeoJsonify

In [9]:
transports = list(db.moves2.find({}, {'_id':0}))

In [5]:
def make_geometry(trackPoint):
    return {
        'type': 'Point',
        'coordinates': [trackPoint['lon'], trackPoint['lat']]
    }

In [6]:
def make_property(trackPoint):
    return {
        'latitude': trackPoint['lat'],
        'longitude': trackPoint['lon'],
        'time': trackPoint['time'],
        'id': 'transport'
    }

In [7]:
def make_feature(trackPoint):
    return {
        'type': 'Feature',
        'properties': make_property(trackPoint),
        'geometry': make_geometry(trackPoint)
    }

In [8]:
def make_feature_collection(transport):
    return {
        'type': 'FeatureCollection',
        'features': [make_feature(tp) for tp in transport['trackPoints']]
    }

In [10]:
def add_feature_collection(transport):
    transport['geojson'] = make_feature_collection(transport)
    return transport

In [11]:
transports = [add_feature_collection(t) for t in transports]

In [15]:
ids = db.moves2.insert(transports)

In [56]:
points = {
    'points': [make_feature_collection(s) for s in sample]
}

In [57]:
# point = make_feature_collection(sample[90])

In [58]:
# with open('../static/testPoint.json', 'w') as w:
#     json.dump(point, w)

### Pipeline

In [281]:
def existing_dates_(profile, record_type):
    """ Finds the earliest update for a moves record. """
    docs = db.moves2.find({
        'record_type': record_type,
        'userId': profile['userId']
        }, {'date': 1})
    dates = [doc['date'].date() for doc in docs]

    return dates

In [282]:
def service_daterange(start_date):
    """ Creates a list of datatime date objects from starting with
    the date the person joined Moves to today.
    """
    base_date = dateutil.parser.parse(start_date)
    today = datetime.datetime.today()
    numdays = (today - base_date).days
    dates = [(today - datetime.timedelta(days=x)).date()
                for x in range(0, numdays)]
    return dates

In [283]:
def missing_dates(service_dates, existing_dates):
    """ Returns a list of dates that haven't been fetched """
    return [date for date in service_dates if date not in existing_dates]

In [284]:
def fetch_resource(resource, date, update_since=None):
    """ Fetches a user's Moves summary for a given date range

    Args:
        resource: String of the moves resource to fetch.
        start_date: String of the start date.
        end_date: String of the end date.

    Returns:
        resources: List of resouce dicts from the Moves API

    Raises:
        ValueError: resource requested is not a moves resource.
    """
    if resource not in ['summary', 'activities', 'places', 'storyline']:
        raise ValueError('Invalid Moves resource.')

    rsrc_path = "user/{}/daily/{}?".format(resource, date)

    if resource == 'storyline':
        rsrc_path = "%s&trackPoints=true" % rsrc_path

    if update_since:
        rsrc_path = "%s&updateSince>T%sZ" % (rsrc_path, update_since)

    try:
        resources = moves.api(rsrc_path, 'GET').json()
    except Exception, exception:
        logging.error(exception.message)
        raise

    return resources

In [285]:
def transform_resource(resource, record_type, profile):
    """ Adds metadata to a move source record. """
    date_datetime = dateutil.parser.parse(resource['date'])

    if resource.has_key('lastUpdate'):
        update_datetime = dateutil.parser.parse(resource['lastUpdate'])
    else:
        update_datetime = date_datetime

    transformed = {
        'userId': profile['userId'],
        'record_type': record_type,
        'last_update': update_datetime,
        'date': date_datetime,
        'data': resource
    }

    return transformed

In [286]:
def transform_resources(resources, record_type, profile):
    """ Adds some metadata to raw Moves resources. """
    for resource in resources:
        yield transform_resource(resource, record_type, profile)

In [287]:
def insert_resources(transformed_resources):
    """ Inserts a collection of transformed resources into
    the moves staging database.
    """
    try:
        res = db.moves2.insert(transformed_resources)
    except pymongo.errors.BulkWriteError, results:
        res = db.moves2.remove(results)
        logging.error('BulkWriteError')
    except Exception, exception:
        logging.error(exception.message)
        res = None

    return res

In [288]:
def fetch_resources(resource_type, missing_dates):
    resources = []
    for date in missing_dates[:30]:
        resource = fetch_resource(resource_type, date)
        resources.append(resource[0])
    return resources

In [289]:
def extract_segments(storylines):
    segments = [s['data']['segments'] for s in storylines 
                if s['data']['segments']]
    return itertools.chain(*segments)

In [290]:
def extract_activities(segments):
    activities = [s['activities'] for s in segments 
                  if s.has_key('activities')]
    return itertools.chain(*activities)

In [227]:
labels = ['subway', 'bus', 'car', 'airplane']
model = pickle.load(open('../models/gradient_boosting.p', 'rb'))`

In [201]:
subways_entrances = db.subway_entrances.find_one()
features = subways_entrances['features']
station_points = [p['geometry']['coordinates'] for p in features]

In [243]:
# labels[model.predict(create_features(transports[0]))]

In [240]:
def predict_transport_type(transport, model):
    X = create_features(transport)
    pred = model.predict(X)
    return labels[pred]

In [244]:
def predict_transport_types(transports, model):
    X = np.array([create_features(t) for t in transports])
    preds = model.predict(X)
    return [labels[pred] for pred in preds]

In [216]:
# def create_features(transport):
#     start_time = dateutil.parser.parse(transport['startTime'])
#     end_time = dateutil.parser.parse(transport['endTime'])
#     fp = transport['trackPoints'][0]
#     lp = transport['trackPoints'][-1]
#     return {
#         'distance': transport['distance'], 
#         'duration': transport['duration'], 
#         'startTimeSec': datetime_to_seconds(start_time),
#         'endTimeSec': datetime_to_seconds(end_time), 
#         'startHour': start_time.hour, 
#         'trackpointCount': len(transport['trackPoints']),
#         'closestSubwayEntrance': compute_total_distance([fp['lon'], fp['lat']], [lp['lon'], lp['lat']]),
#     }

In [248]:
def create_features(transport):
    start_time = dateutil.parser.parse(transport['startTime'])
    end_time = dateutil.parser.parse(transport['endTime'])
    fp = transport['trackPoints'][0]
    lp = transport['trackPoints'][-1]
    feats = [
        transport['distance'], 
        transport['duration'], 
        datetime_to_seconds(start_time),
        datetime_to_seconds(end_time), 
        start_time.hour, 
        len(transport['trackPoints']),
        compute_total_distance([fp['lon'], fp['lat']], [lp['lon'], lp['lat']]),
    ]
    return np.array(feats).reshape(1, -1)

In [185]:
def datetime_to_seconds(dt):
    return (dt.hour * 3600) + (dt.minute * 60) + dt.second

In [186]:
def compute_min_distance(lat_lng):
    distances = [distance_on_unit_sphere(lat_lng[1], lat_lng[0], 
                    point[1], point[0]) for point in station_points] 
    return min(distances)

In [189]:
def compute_total_distance(first_point, last_point):
    start = compute_min_distance(first_point)
    end = compute_min_distance(last_point)
    
    return start + end

In [188]:
def distance_on_unit_sphere(lat1, long1, lat2, long2):
    # Convert latitude and longitude to 
    # spherical coordinates in radians.
    degrees_to_radians = math.pi/180.0
        
    # phi = 90 - latitude
    phi1 = (90.0 - lat1)*degrees_to_radians
    phi2 = (90.0 - lat2)*degrees_to_radians
        
    # theta = longitude
    theta1 = long1*degrees_to_radians
    theta2 = long2*degrees_to_radians
        
    # Compute spherical distance from spherical coordinates.
        
    # For two locations in spherical coordinates 
    # (1, theta, phi) and (1, theta, phi)
    # cosine( arc length ) = 
    #    sin phi sin phi' cos(theta-theta') + cos phi cos phi'
    # distance = rho * arc length
    
    cos = (math.sin(phi1)*math.sin(phi2)*math.cos(theta1 - theta2) + 
           math.cos(phi1)*math.cos(phi2))
    arc = math.acos(cos)

    # Remember to multiply arc by the radius of the earth 
    # in your favorite set of units to get length.
    earth_rad_miles = 3963.1676
    earth_rad_feet = earth_rad_miles
    
    return arc * earth_rad_feet

In [165]:
transformed_resources = list(transform_resources(resources, 'storyline', profile))

In [273]:
def add_carbon(transport):
    time.sleep(.05)
    kgs = compute_carbon_kg(transports_with_type[30])
    transport['carbon'] = kgs
    return transport

In [253]:
def add_prediction(transport, prediction):
    transport['type'] = prediction
    return transport

In [256]:
KEY = '20f32c9fad4aebc9998f8ce569bdc358'
BASE = 'http://impact.brighterplanet.com/' 
TYPES = {
 'car': 'automobile_trips.json',
 'subway': 'rail_trips.json?class=commuter',
 'airplane': 'flights.json'
}

In [257]:
def compute_carbon_kg(transport):
    url = BASE + TYPES[transport['type']]
    params = {
        'distance': transport['distance'],
        'key': KEY
    }
    res = requests.get(url, params=params).json()
    kgs = res['decisions']['carbon']['object']['value']
    
    return kgs

In [266]:
[ix for ix, p in enumerate(preds) if p != 'subway']

[30, 34, 36, 54, 84, 88, 96, 121]

In [291]:
membership_dates = service_daterange(profile['profile']['firstDate'])
existing_dates = existing_dates_(profile, 'storyline')
non_existing_dates = missing_dates(membership_dates, existing_dates)
resources = fetch_resources('storyline', non_existing_dates)
transformed_resources = list(transform_resources(resources, 'storyline', profile))
segments = list(extract_segments(transformed_resources))
activities = list(extract_activities(segments))
transports = [a for a in activities if a['activity'] == 'transport']
preds = [predict_transport_type(transport, model) for transport in transports]
transports_with_type = [add_prediction(t, preds[ix]) for ix, t in enumerate(transports)]
transports_with_carbon = [add_carbon(t) for t in transports_with_type]
insert_resources(transports_with_carbon)



[ObjectId('5760a7b622fc958324032c21'),
 ObjectId('5760a7b622fc958324032c22'),
 ObjectId('5760a7b622fc958324032c23'),
 ObjectId('5760a7b622fc958324032c24'),
 ObjectId('5760a7b622fc958324032c25'),
 ObjectId('5760a7b622fc958324032c26'),
 ObjectId('5760a7b622fc958324032c27'),
 ObjectId('5760a7b622fc958324032c28'),
 ObjectId('5760a7b622fc958324032c29'),
 ObjectId('5760a7b622fc958324032c2a'),
 ObjectId('5760a7b622fc958324032c2b'),
 ObjectId('5760a7b622fc958324032c2c'),
 ObjectId('5760a7b622fc958324032c2d'),
 ObjectId('5760a7b622fc958324032c2e'),
 ObjectId('5760a7b622fc958324032c2f'),
 ObjectId('5760a7b622fc958324032c30'),
 ObjectId('5760a7b622fc958324032c31'),
 ObjectId('5760a7b622fc958324032c32'),
 ObjectId('5760a7b622fc958324032c33'),
 ObjectId('5760a7b622fc958324032c34'),
 ObjectId('5760a7b622fc958324032c35'),
 ObjectId('5760a7b622fc958324032c36'),
 ObjectId('5760a7b622fc958324032c37'),
 ObjectId('5760a7b622fc958324032c38'),
 ObjectId('5760a7b622fc958324032c39'),
 ObjectId('5760a7b622fc95

In [277]:
transports_with_carbon[0]

{u'activity': u'transport',
 'carbon': 2172.080498999573,
 u'distance': 468.0,
 u'duration': 450.0,
 u'endTime': u'20160414T105904-0400',
 u'group': u'transport',
 u'manual': False,
 u'startTime': u'20160414T105134-0400',
 u'trackPoints': [{u'lat': 40.766929,
   u'lon': -73.98125,
   u'time': u'20160414T105134-0400'},
  {u'lat': 40.7634, u'lon': -73.982542, u'time': u'20160414T105848-0400'},
  {u'lat': 40.7634, u'lon': -73.982542, u'time': u'20160414T105904-0400'}],
 'type': 'subway'}