This notebook visualizes the correspondence between multiple high accuracy fast sampling streams, on two different platforms, while on Caltrain.

Remember to load the Caltrain data from `Pull_entries_from_server` if you have not already done so

In [None]:
import sys
import logging
from uuid import UUID

import emission.core.get_database as edb
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import datetime as pydt
import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.timeseries.timequery as estt
import numpy as np

logging.getLogger().setLevel(logging.DEBUG)
logging.debug(sys.path)

iphone_ids = [UUID("079e0f1a-c440-3d7c-b0e7-de160f748e35"), UUID("c76a0487-7e5a-3b17-a449-47be666b36f6"), 
              UUID("c528bcd2-a88b-3e82-be62-ef4f2396967a"), UUID("95e70727-a04e-3e33-b7fe-34ab19194f8b")]
android_ids = [UUID("e471711e-bd14-3dbe-80b6-9c7d92ecc296"), UUID("fd7b4c2e-2c8b-3bfa-94f0-d1e3ecbd5fb7"),
               UUID("86842c35-da28-32ed-a90e-2da6663c5c73"), UUID("3bc0f91f-7660-34a2-b005-5c399598a369")]

In [None]:
list(edb.get_timeseries_db().find({"metadata.key": "config/sensor_config", "data.battery_status": {"$exists": True}}).sort("metadata.write_ts").limit(5))

In [None]:
iphone_ts = map(lambda u: esta.TimeSeries.get_time_series(u), iphone_ids)
android_ts = map(lambda u: esta.TimeSeries.get_time_series(u), android_ids)

In [None]:
ranges_df = pd.read_csv("../results_spring_2016/ranges.csv", error_bad_lines=False)

In [None]:
ranges_df.head()

In [None]:
regime_grouped_ranges_df = ranges_df.groupby(['state', 'android_regime', 'ios_regime'])

In [None]:
invalid_df = pd.read_csv("../results_spring_2016/invalid.csv")

In [None]:
invalid_df

In [None]:
caltrain_df = pd.read_csv("../results_spring_2016/caltrain_trips.csv")

In [None]:
caltrain_df

In [None]:
import geojson as gj

In [None]:
caltrain_routes = gj.load(open("../caltrain_routes.geojson"))

In [None]:
len(caltrain_routes)

In [None]:
caltrain_routes["features"][0]

In [None]:
local_route = caltrain_routes["features"][3]

In [None]:
local_route

In [None]:
baby_bullet_stops_sj_2_sf = [[-121.903447, 37.328642], [-122.075954, 37.394458], [-122.164182, 37.44334], [-122.297001, 37.537416], [-122.386097, 37.599223], [-122.395406, 37.776541]]

In [None]:
def get_points_from_ts_for_range(ts, start_ts, end_ts):
    return ts.get_data_df("background/location", estt.TimeQuery('metadata.write_ts', start_ts, end_ts))

In [None]:
def get_points_for_range(ts, range_df):
    points_range_df = map(lambda row: get_points_from_ts_for_range(ts, row['start_ts'], row['end_ts']), range_df.to_dict('records'))
    return points_range_df

In [None]:
def get_points_for_group(ts_list, range_df):
    return map(lambda ts: get_points_for_range(ts, range_df), ts_list)

In [None]:
schedule_csv = pd.read_csv('../mtv_millbrae_local_times.csv')

In [None]:
import emission.analysis.plotting.geojson.geojson_feature_converter as gfc
import emission.analysis.plotting.leaflet_osm.our_plotter as lo
import emission.analysis.plotting.leaflet_osm.ipython_helper as ipy

In [None]:
coordinate_array = local_route["geometry"]["coordinates"][0][4:17]

In [None]:
print len(coordinate_array)
coordinate_array

In [None]:
coordinate_dict = map(lambda c: {'longitude': c[0], 'latitude': c[1]}, coordinate_array)

In [None]:
ground_truth = pd.DataFrame(coordinate_dict)

In [None]:
import arrow

arrow.get('2016-04-01T'+schedule_csv.arrival_time.iloc[0]).timestamp

In [None]:
ground_truth['ts'] = schedule_csv.arrival_time.apply(lambda tstr: arrow.get('2016-04-01T'+tstr).timestamp)

In [None]:
ground_truth['ts_delta'] = ground_truth.ts - ground_truth.ts.iloc[0]

In [None]:
ground_truth

In [None]:
import scipy.interpolate as spi
import numpy as np

In [None]:
def move_ground_truth(gt_df, start_ts):
    gt_df.ts = gt_df.ts_delta + start_ts
    return gt_df

In [None]:
pd.options.display.float_format = '{:.3f}'.format

In [None]:
move_ground_truth(ground_truth, 1459449090.275)

In [None]:
def get_interp_df(raw_df):
    start_ts = raw_df.ts.iloc[0]
    end_ts = raw_df.ts.iloc[-1]
    latf = spi.interp1d(x=raw_df.ts, y=raw_df.latitude, bounds_error=False)
    lngf = spi.interp1d(x=raw_df.ts, y=raw_df.longitude, bounds_error=False)
    ts_new = np.arange(int(start_ts), int(end_ts), 1)
    fmt_time_new = map(lambda ts: pydt.datetime.fromtimestamp(ts).isoformat(), ts_new)
    interp_df = pd.DataFrame({"ts": ts_new, "fmt_time": fmt_time_new,
                            "latitude": latf(ts_new), "longitude": lngf(ts_new)}, index=ts_new)
    interp_df = interp_df.dropna()
    return interp_df

In [None]:
ground_truth_df = get_interp_df(ground_truth)

In [None]:
len(ground_truth_df)

In [None]:
import emission.analysis.point_features as pf
import attrdict as ad

In [None]:
def calDistance(row):
    return pf.calDistance(ad.AttrDict({"latitude": row['latitude_android'], "longitude": row['longitude_android']}),
                          ad.AttrDict({"latitude": row['latitude_ios'], "longitude": row['longitude_ios']}))

In [None]:
def get_ground_truth(android_ts, ios_ts, start_ts, end_ts):
    android_df = get_interp_df(android_ts.get_data_df("background/location", estt.TimeQuery("metadata.write_ts", start_ts, end_ts)))
    ios_df = get_interp_df(ios_ts.get_data_df("background/location", estt.TimeQuery("metadata.write_ts", start_ts, end_ts)))
    joined_df = android_df.join(ios_df, how='inner', lsuffix="_android", rsuffix="_ios")
    # joined_df.drop(['fmt_time_ios', 'ts_ios'], axis=1, inplace=True)
    # joined_df.rename(columns={"fmt_time_android": "fmt_time", "ts_android": "ts"}, inplace=True)
    joined_df['same_point_distances'] = joined_df.apply(calDistance, axis=1)
    filtered_df = joined_df[joined_df.same_point_distances < 40]
    combined_df = pd.DataFrame({"ts": filtered_df.ts_android, "fmt_time": filtered_df.fmt_time_android,
                            "latitude": (filtered_df.latitude_android + filtered_df.latitude_ios)/2,
                            "longitude": (filtered_df.longitude_android + filtered_df.longitude_ios)/2,
                            "same_point_distances": filtered_df.same_point_distances}, index=np.array(filtered_df.ts_android))
    return combined_df

In [None]:
ranges_df.iloc[20]

In [None]:
potential_ground_truth_31_out = get_ground_truth(android_ts[0], iphone_ts[0], caltrain_df.iloc[1].start_ts, caltrain_df.iloc[1].end_ts)

In [None]:
def calDistanceGround(row):
    return pf.calDistance(ad.AttrDict({"latitude": row['latitude_ground'], "longitude": row['longitude_ground']}),
                          ad.AttrDict({"latitude": row['latitude_compare'], "longitude": row['longitude_compare']}))

In [None]:
def compare_trajectories(ground_truth_df, compare_df):
    joined_df = ground_truth_df.join(compare_df, how='inner', lsuffix="_ground", rsuffix="_compare")
    joined_df['compare_distance'] = joined_df.apply(calDistanceGround, axis=1)
    return joined_df

In [None]:
compare_ground_truth_31_out = compare_trajectories(ground_truth_df, potential_ground_truth_31_out)
compare_ground_truth_31_out.head()

In [None]:
ax = compare_ground_truth_31_out.compare_distance.plot(kind='hist', bins=100, figsize=(16,4))
ticks_list = ax.set_xticks(np.arange(0,1800,50))

In [None]:
import emission.analysis.plotting.leaflet_osm.our_plotter as lo
import emission.analysis.plotting.geojson.geojson_feature_converter as gfc
import emission.analysis.plotting.leaflet_osm.ipython_helper as ipy

In [None]:
ipy.inline_maps([lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(compare_ground_truth_31_out.head(n=100), 
                    ts="ts_ground", latitude="latitude_ground", longitude="longitude_ground", fmt_time="fmt_time_ground")]) +
lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(compare_ground_truth_31_out.head(n=100),
                    ts="ts_compare", latitude="latitude_compare", longitude="longitude_compare", fmt_time="fmt_time_compare")])])