This notebook visualizes the correspondence between multiple high accuracy fast sampling streams, on two different platforms, while on Caltrain.

Remember to load the Caltrain data from `Pull_entries_from_server` if you have not already done so

In [None]:
import sys
import logging
from uuid import UUID

import emission.core.get_database as edb
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import datetime as pydt
import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.timeseries.timequery as estt
import numpy as np

logging.getLogger().setLevel(logging.DEBUG)
logging.debug(sys.path)

iphone_ids = [UUID("079e0f1a-c440-3d7c-b0e7-de160f748e35"), UUID("c76a0487-7e5a-3b17-a449-47be666b36f6"), 
              UUID("c528bcd2-a88b-3e82-be62-ef4f2396967a"), UUID("95e70727-a04e-3e33-b7fe-34ab19194f8b")]
android_ids = [UUID("e471711e-bd14-3dbe-80b6-9c7d92ecc296"), UUID("fd7b4c2e-2c8b-3bfa-94f0-d1e3ecbd5fb7"),
               UUID("86842c35-da28-32ed-a90e-2da6663c5c73"), UUID("3bc0f91f-7660-34a2-b005-5c399598a369")]

In [None]:
list(edb.get_timeseries_db().find({"metadata.key": "config/sensor_config", "data.battery_status": {"$exists": True}}).sort("metadata.write_ts").limit(5))

In [None]:
iphone_ts = map(lambda u: esta.TimeSeries.get_time_series(u), iphone_ids)
android_ts = map(lambda u: esta.TimeSeries.get_time_series(u), android_ids)

In [None]:
ranges_df = pd.read_csv("../results_spring_2016/ranges.csv", error_bad_lines=False)

In [None]:
ranges_df.head()

In [None]:
regime_grouped_ranges_df = ranges_df.groupby(['state', 'android_regime', 'ios_regime'])

In [None]:
invalid_df = pd.read_csv("../results_spring_2016/invalid.csv")

In [None]:
invalid_df

In [None]:
caltrain_df = pd.read_csv("../results_spring_2016/caltrain_trips.csv")

In [None]:
caltrain_df

In [None]:
def get_points_from_ts_for_range(ts, start_ts, end_ts):
    return ts.get_data_df("background/location", estt.TimeQuery('metadata.write_ts', start_ts, end_ts))

In [None]:
def get_points_for_range(ts, range_df):
    points_range_df = map(lambda row: get_points_from_ts_for_range(ts, row['start_ts'], row['end_ts']), range_df.to_dict('records'))
    return points_range_df

In [None]:
def get_points_for_group(ts_list, range_df):
    return map(lambda ts: get_points_for_range(ts, range_df), ts_list)

In [None]:
import emission.analysis.plotting.geojson.geojson_feature_converter as gfc
import emission.analysis.plotting.leaflet_osm.our_plotter as lo
import emission.analysis.plotting.leaflet_osm.ipython_helper as ipy

In [None]:
import scipy.interpolate as spi
import numpy as np

In [None]:
pd.options.display.float_format = '{:.3f}'.format

In [None]:
def get_interp_df(raw_df):
    start_ts = raw_df.ts.iloc[0]
    end_ts = raw_df.ts.iloc[-1]
    latf = spi.interp1d(x=raw_df.ts, y=raw_df.latitude, bounds_error=False)
    lngf = spi.interp1d(x=raw_df.ts, y=raw_df.longitude, bounds_error=False)
    ts_new = np.arange(int(start_ts), int(end_ts), 1)
    fmt_time_new = map(lambda ts: pydt.datetime.fromtimestamp(ts).isoformat(), ts_new)
    interp_df = pd.DataFrame({"ts": ts_new, "fmt_time": fmt_time_new,
                            "latitude": latf(ts_new), "longitude": lngf(ts_new)}, index=ts_new)
    interp_df = interp_df.dropna()
    return interp_df

In [None]:
import emission.analysis.point_features as pf
import attrdict as ad

In [None]:
def calDistance(row):
    return pf.calDistance(ad.AttrDict({"latitude": row['latitude_android'], "longitude": row['longitude_android']}),
                          ad.AttrDict({"latitude": row['latitude_ios'], "longitude": row['longitude_ios']}))

In [None]:
def get_ground_truth(android_ts, ios_ts, start_ts, end_ts):
    android_df = get_interp_df(android_ts.get_data_df("background/location", estt.TimeQuery("metadata.write_ts", start_ts, end_ts)))
    ios_df = get_interp_df(ios_ts.get_data_df("background/location", estt.TimeQuery("metadata.write_ts", start_ts, end_ts)))
    joined_df = android_df.join(ios_df, how='inner', lsuffix="_android", rsuffix="_ios")
    # joined_df.drop(['fmt_time_ios', 'ts_ios'], axis=1, inplace=True)
    # joined_df.rename(columns={"fmt_time_android": "fmt_time", "ts_android": "ts"}, inplace=True)
    joined_df['same_point_distances'] = joined_df.apply(calDistance, axis=1)
    filtered_df = joined_df[joined_df.same_point_distances < 40]
    combined_df = pd.DataFrame({"ts": filtered_df.ts_android, "fmt_time": filtered_df.fmt_time_android,
                            "latitude": (filtered_df.latitude_android + filtered_df.latitude_ios)/2,
                            "longitude": (filtered_df.longitude_android + filtered_df.longitude_ios)/2,
                            "same_point_distances": filtered_df.same_point_distances}, index=np.array(filtered_df.ts_android))
    pct_retained = float(len(combined_df) * 100) / (int(end_ts) - int(start_ts))
    return pct_retained, combined_df

In [None]:
caltrain_df.iloc[0]

In [None]:
(pct_retained_30_in, ground_truth_30_in) = get_ground_truth(android_ts[0], iphone_ts[0], caltrain_df.iloc[0].start_ts, caltrain_df.iloc[0].end_ts)

In [None]:
(pct_retained_compare_30_in_1, compare_truth_30_in_1) = get_ground_truth(android_ts[1], iphone_ts[1], caltrain_df.iloc[0].start_ts, caltrain_df.iloc[0].end_ts)

In [None]:
(pct_retained_compare_30_in_2, compare_truth_30_in_2) = get_ground_truth(android_ts[2], iphone_ts[2], caltrain_df.iloc[0].start_ts, caltrain_df.iloc[0].end_ts)

In [None]:
(pct_retained_compare_30_in_3, compare_truth_30_in_3) = get_ground_truth(android_ts[3], iphone_ts[3], caltrain_df.iloc[0].start_ts, caltrain_df.iloc[0].end_ts)

In [None]:
print pct_retained_30_in, pct_retained_compare_30_in_1, pct_retained_compare_30_in_2, pct_retained_compare_30_in_3

In [None]:
def calDistanceGround(row):
    return pf.calDistance(ad.AttrDict({"latitude": row['latitude_ground'], "longitude": row['longitude_ground']}),
                          ad.AttrDict({"latitude": row['latitude_compare'], "longitude": row['longitude_compare']}))

In [None]:
def compare_trajectories(ground_truth_df, compare_df):
    joined_df = ground_truth_df.join(compare_df, how='inner', lsuffix="_ground", rsuffix="_compare")
    joined_df['compare_distance'] = joined_df.apply(calDistanceGround, axis=1)
    return joined_df

In [None]:
compare_ground_truth_30_in_1 = compare_trajectories(ground_truth_30_in, compare_truth_30_in_1)
compare_ground_truth_30_in_1.head()

In [None]:
ax = compare_ground_truth_30_in_1.compare_distance.plot(kind='hist', bins=50, figsize=(16,4))
ticks_list = ax.set_xticks(np.arange(0,50,2))

In [None]:
import emission.analysis.plotting.leaflet_osm.our_plotter as lo
import emission.analysis.plotting.geojson.geojson_feature_converter as gfc
import emission.analysis.plotting.leaflet_osm.ipython_helper as ipy

In [None]:
ipy.inline_maps([lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(compare_ground_truth_30_in_1.head(n=100), 
                    ts="ts_ground", latitude="latitude_ground", longitude="longitude_ground", fmt_time="fmt_time_ground")]) +
lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(compare_ground_truth_30_in_1.head(n=100),
                    ts="ts_compare", latitude="latitude_compare", longitude="longitude_compare", fmt_time="fmt_time_compare")])])

In [None]:
compare_ground_truth_30_in_2 = compare_trajectories(ground_truth_30_in, compare_truth_30_in_2)
compare_ground_truth_30_in_3 = compare_trajectories(ground_truth_30_in, compare_truth_30_in_3)

In [None]:
pct_retained_1 = float(len(compare_ground_truth_30_in_1) * 100) / (int(caltrain_df.iloc[0].end_ts) - int(caltrain_df.iloc[0].start_ts))
pct_retained_2 = float(len(compare_ground_truth_30_in_2) * 100) / (int(caltrain_df.iloc[0].end_ts) - int(caltrain_df.iloc[0].start_ts))
pct_retained_3 = float(len(compare_ground_truth_30_in_3) * 100) / (int(caltrain_df.iloc[0].end_ts) - int(caltrain_df.iloc[0].start_ts))

print pct_retained_1, pct_retained_2, pct_retained_3

In [None]:
compare_ground_truth_30_in_1.compare_distance.describe()

In [None]:
compare_ground_truth_30_in_2.compare_distance.describe()

In [None]:
compare_ground_truth_30_in_3.compare_distance.describe()

In [None]:
compare_raw_trajectory_ios_1 = compare_trajectories(ground_truth_30_in,
                                    get_interp_df(iphone_ts[1].get_data_df("background/location", 
                                            estt.TimeQuery("metadata.write_ts", caltrain_df.iloc[0].start_ts, caltrain_df.iloc[0].end_ts))))

In [None]:
compare_raw_trajectory_ios_1.compare_distance.describe()

In [None]:
compare_raw_trajectory_android_1 = compare_trajectories(ground_truth_30_in,
                                    get_interp_df(android_ts[1].get_data_df("background/location", 
                                            estt.TimeQuery("metadata.write_ts", caltrain_df.iloc[0].start_ts, caltrain_df.iloc[0].end_ts))))

In [None]:
compare_raw_trajectory_android_1.compare_distance.describe()

In [None]:
compare_trajectories(ground_truth_30_in,
    get_interp_df(iphone_ts[3].get_data_df("background/location", 
        estt.TimeQuery("metadata.write_ts", caltrain_df.iloc[0].start_ts, caltrain_df.iloc[0].end_ts)))).compare_distance.describe()

In [None]:
compare_trajectories(ground_truth_30_in,
    get_interp_df(android_ts[3].get_data_df("background/location", 
        estt.TimeQuery("metadata.write_ts", caltrain_df.iloc[0].start_ts, caltrain_df.iloc[0].end_ts)))).compare_distance.describe()

In [None]:
ipy.inline_maps([lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(compare_raw_trajectory_android_1, 
                    ts="ts_ground", latitude="latitude_ground", longitude="longitude_ground", fmt_time="fmt_time_ground")]) +
lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(compare_raw_trajectory_android_1,
                    ts="ts_compare", latitude="latitude_compare", longitude="longitude_compare", fmt_time="fmt_time_compare")])])

In [None]:
def compare_accuracy(ios_regime, android_regime, start_ts, end_ts):
    # First, get ground truth from hafs phones
    (pct_retained, ground_truth_df) = get_ground_truth(android_ts[0], iphone_ts[0], start_ts, end_ts)
    print "Retained %s percent" % pct_retained
    result_list = []
    for i in range(1,4):
        print "Checking ios phone-%s" % i
        ios_raw_df = iphone_ts[i].get_data_df("background/location", 
                                            estt.TimeQuery("metadata.write_ts", start_ts, end_ts))
        # print ios_raw_df.head()
        compare_raw_trajectory_ios = compare_trajectories(ground_truth_df,
                                    get_interp_df(ios_raw_df))
        ios_result = compare_raw_trajectory_ios.compare_distance.describe()
        ios_result['platform'] = 'ios'
        ios_result['label'] = ios_regime
        ios_result['phone'] = i
        print "Result for ios phone-%s = %s" % (i, ios_result)
        result_list.append(ios_result)
        
        print "Checking android phone-%s" % i
        android_raw_df = android_ts[i].get_data_df("background/location", 
                                            estt.TimeQuery("metadata.write_ts", start_ts, end_ts))
        # print android_raw_df.head()
        compare_raw_trajectory_android = compare_trajectories(ground_truth_df,
                                    get_interp_df(android_raw_df))
        android_result = compare_raw_trajectory_android.compare_distance.describe()
        android_result['platform'] = 'android'
        android_result['label'] = android_regime
        android_result['phone'] = i
        result_list.append(android_result)
        print "Result for android phone-%s is %s" % (i, android_result)

    return (["%s:%s" % (ios_regime, android_regime), pct_retained], result_list)

In [None]:
retained_list = []
result_list = []

In [None]:
(ret, res) = compare_accuracy("best+1m", "high+1sec", caltrain_df.iloc[0].start_ts, caltrain_df.iloc[0].end_ts)
retained_list.append(ret)
result_list.extend(res)

In [None]:
(ret, res) = compare_accuracy("best-v-10m+1m-v-30m", "high-v-balanced+1sec-v-30sec", caltrain_df.iloc[1].start_ts, caltrain_df.iloc[1].end_ts)
retained_list.append(ret)
result_list.extend(res)

In [None]:
(ret, res) = compare_accuracy("best-v-10m+1m-v-30m", "high-v-balanced+1sec-v-30sec", caltrain_df.iloc[2].start_ts, caltrain_df.iloc[2].end_ts)
retained_list.append(ret)
result_list.extend(res)

In [None]:
retained_list

In [None]:
pd.DataFrame(retained_list, columns=["regime", "retained"]).plot(kind='bar', x="regime", rot=45)

In [None]:
for key, df in pd.DataFrame(result_list).reset_index(drop=True).groupby('label'):
    df.drop(["count", "std"], axis=1, inplace=True)
    df.plot(subplots=True, x='phone', kind='bar', title=key, figsize=(10,5), layout = (2,3))