This notebook attempts to experimentally determine the filter threshold for determining the baseline.
In other words, at what distance should we say that the points are "close enough" and at what distance should we discard them?

Remember to load the `moving`, `high+1sec` data from `Pull_entries_from_server` if you have not already done so.

In [None]:
import sys
import logging
from uuid import UUID

import emission.core.get_database as edb
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import datetime as pydt
import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.timeseries.timequery as estt
import numpy as np
import scipy.interpolate as spi

logging.getLogger().setLevel(logging.DEBUG)
logging.debug(sys.path)

iphone_ids = [UUID("079e0f1a-c440-3d7c-b0e7-de160f748e35"), UUID("c76a0487-7e5a-3b17-a449-47be666b36f6"), 
              UUID("c528bcd2-a88b-3e82-be62-ef4f2396967a"), UUID("95e70727-a04e-3e33-b7fe-34ab19194f8b")]
android_ids = [UUID("e471711e-bd14-3dbe-80b6-9c7d92ecc296"), UUID("fd7b4c2e-2c8b-3bfa-94f0-d1e3ecbd5fb7"),
               UUID("86842c35-da28-32ed-a90e-2da6663c5c73"), UUID("3bc0f91f-7660-34a2-b005-5c399598a369")]

In [None]:
list(edb.get_timeseries_db().find({"metadata.key": "config/sensor_config", "data.battery_status": {"$exists": True}}).sort("metadata.write_ts").limit(5))

In [None]:
iphone_ts = map(lambda u: esta.TimeSeries.get_time_series(u), iphone_ids)
android_ts = map(lambda u: esta.TimeSeries.get_time_series(u), android_ids)

In [None]:
ranges_df = pd.read_csv("../results_spring_2016/ranges.csv", error_bad_lines=False)

In [None]:
ranges_df.head()

In [None]:
regime_grouped_ranges_df = ranges_df.groupby(['state', 'android_regime', 'ios_regime'])

In [None]:
invalid_df = pd.read_csv("../results_spring_2016/invalid.csv")

In [None]:
invalid_df

In [None]:
def get_points_from_ts_for_range(ts, start_ts, end_ts):
    return ts.get_data_df("background/location", esta.TimeQuery('metadata.write_ts', start_ts, end_ts))

In [None]:
def get_points_for_range(ts, range_df):
    points_range_df = map(lambda row: get_points_from_ts_for_range(ts, row['start_ts'], row['end_ts']), range_df.to_dict('records'))
    return points_range_df

In [None]:
def get_points_for_group(ts_list, range_df):
    return map(lambda ts: get_points_for_range(ts, range_df), ts_list)

In [None]:
import emission.analysis.point_features as pf
import attrdict as ad

In [None]:
def calDistance(row):
    return pf.calDistance(ad.AttrDict({"latitude": row[0], "longitude": row[1]}),
                          ad.AttrDict({"latitude": row[3], "longitude": row[4]}))

In [None]:
def get_interp_distance_df(android_ts, ios_ts, start_ts, end_ts):
    android_df = android_ts.get_data_df("background/location", estt.TimeQuery('metadata.write_ts', start_ts, end_ts))
    ios_df = ios_ts.get_data_df("background/location", estt.TimeQuery('metadata.write_ts', start_ts, end_ts))
    if len(android_df) == 0 or len(ios_df) == 0:
        print "No data found for either android or iOS, exiting"
        return
    android_latf = spi.interp1d(x=android_df.ts, y=android_df.latitude, kind='linear', bounds_error=False)
    android_lngf = spi.interp1d(x=android_df.ts, y=android_df.longitude, kind='linear', bounds_error=False)
    ios_latf = spi.interp1d(x=ios_df.ts, y=ios_df.latitude, bounds_error=False)
    ios_lngf = spi.interp1d(x=ios_df.ts, y=ios_df.longitude, bounds_error=False)
    ts_new = np.arange(int(start_ts), int(end_ts), 1)
    fmt_time_new = map(lambda ts: pydt.datetime.fromtimestamp(ts).isoformat(), ts_new)
    interp_df = pd.DataFrame({"ts": ts_new, "fmt_time": fmt_time_new,
                            "android_lat": android_latf(ts_new), "android_lng": android_lngf(ts_new),
                            "ios_lat": ios_latf(ts_new), "ios_lng": ios_lngf(ts_new)}, index=ts_new)
    interp_df = interp_df.dropna()
    same_point_distances = np.abs(interp_df.apply(calDistance, axis=1))
    interp_df["same_point_distances"] = same_point_distances
    return interp_df

In [None]:
quantiles_list = []
filtered_quantiles_list = []
for row in ranges_df.to_dict('records'):
    key = (row['state'], row['android_regime'], row['ios_regime'])
    if key[0] != 'invalid' and key[0] != 'charging' and key[0] != 'charging-or-full':
        interp_distance_df_ha = get_interp_distance_df(android_ts[0], iphone_ts[0], row['start_ts'], row['end_ts'])
        if interp_distance_df_ha is not None and len(interp_distance_df_ha.dropna()) > 0:
            curr_quantiles = interp_distance_df_ha.same_point_distances.quantile([0.8,0.9,0.95,0.99])
            filtered_distances = interp_distance_df_ha[interp_distance_df_ha.same_point_distances < 100].same_point_distances
            curr_filtered_quantiles = filtered_distances.quantile([0.8,0.9,0.95,0.99])
            curr_filtered_quantiles.name = row['state']
            print ("for key %s, %s (%s) -> %s (%s), quantiles are %s, filtered quantiles are %s" % 
                    (key, row['start_fmt_time'], row['start_ts'], row['end_fmt_time'], row['end_ts'],
                     curr_quantiles, curr_filtered_quantiles))
            quantiles_list.append(curr_quantiles)
            filtered_quantiles_list.append(curr_filtered_quantiles)
            plt.figure()
            ax = interp_distance_df_ha.same_point_distances.plot(kind='hist', bins = 100, figsize=(12,4), cumulative=True, normed=True)
            ax.set_title("unfiltered: %s %s -> %s" % (key, row['start_fmt_time'], row['end_fmt_time']))
            tick_list = ax.set_xticks(np.arange(0,min(interp_distance_df_ha.same_point_distances.max(),100),2))
            
            plt.figure()
            ax = filtered_distances.plot(kind='hist', bins = 100, figsize=(12,4), cumulative=True, normed=True)
            ax.set_title("filtered < 100, %s %s -> %s" % (key, row['start_fmt_time'], row['end_fmt_time']))
            tick_list = ax.set_xticks(np.arange(0,min(filtered_distances.max(),100),2))

In [None]:
import emission.analysis.plotting.leaflet_osm.our_plotter as lo
import emission.analysis.plotting.geojson.geojson_feature_converter as gfc
import emission.analysis.plotting.leaflet_osm.ipython_helper as ipy

In [None]:
interp_df = get_interp_distance_df(android_ts[0], iphone_ts[0], 1459448055.12, 1459456368.01)

In [None]:
interp_df.same_point_distances.plot(kind='hist', bins=100)

In [None]:
interp_df.same_point_distances.plot()
# interp_df = interp_df[interp_df.same_point_distances <= 20]

In [None]:
interp_df[interp_df.same_point_distances > 1000].head(n=20)

In [None]:
interp_df[interp_df.android_lat == 37.394861].tail(n=20)

In [None]:
ipy.inline_maps([lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(
                    iphone_ts[0].get_data_df("background/location", estt.TimeQuery('metadata.write_ts', 1459449109, 1459449156)))]) + 
                 lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(
                    android_ts[0].get_data_df("background/location", estt.TimeQuery('metadata.write_ts', 1459449109, 1459449156)))])])

In [None]:
android_ts[0].get_data_df("background/location", estt.TimeQuery('metadata.write_ts', 1459449109, 1459449156))

In [None]:
android_ts[0].get_data_df("background/location", estt.TimeQuery('metadata.write_ts', 1459440025.84, 1459448055.125)).head(n=20)

In [None]:
ipy.inline_maps([lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(
                    interp_df, ts="ts", latitude="ios_lat", longitude="ios_lng")]) + 
                 lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(
                    interp_df, ts="ts", latitude="android_lat", longitude="android_lng")])])

In [None]:
# No medium accuracy stuff yet
# ipy.inline_maps([lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(
#                     iphone_ts[0].get_data_df("background/location", estt.TimeQuery('metadata.write_ts', 1459387420.68, 1459403297.862)))]) + 
#                  lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(
#                     android_ts[0].get_data_df("background/location", estt.TimeQuery('metadata.write_ts', 1459387420.68, 1459403297.862)))])])

In [None]:
# No medium accuracy stuff yet
# interp_df = get_interp_distance_df(android_ts[0], iphone_ts[0], 1459387420.68, 1459403297.862)
# interp_df = interp_df[interp_df.same_point_distances <= 40]

In [None]:
# No medium accuracy stuff yet
# ipy.inline_maps([lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(
#                     interp_df, ts="ts", latitude="ios_lat", longitude="ios_lng")]) + 
#                  lo.get_maps_for_geojson_unsectioned([gfc.get_feature_list_from_df(
#                     interp_df, ts="ts", latitude="android_lat", longitude="android_lng")])])

In [None]:
interp_df = get_interp_distance_df(android_ts[0], iphone_ts[0], 1459387420.68, 1459403297.862)

In [None]:
# interp_df.same_point_distances.plot(kind='hist', cumulative=True, normed=True, bins=100, figsize=(10,5))

In [None]:
# import statsmodels.api as sm

In [None]:
# ecdf_fn = sm.distributions.ECDF(interp_df.same_point_distances)
# x = np.linspace(0, max(interp_df.same_point_distances))
# plt.plot(x, ecdf_fn(x))

In [None]:
# quantiles_list

In [None]:
# filtered_quantiles_list

In [None]:
def to_state_int(state):
    if state == 'stationary': return 1
    if state == 'loitering': return 2
    if state == 'moving': return 3

In [None]:
dict_list = map(lambda s: {'state': s.name, '0.8': s[0.8], '0.9': s[0.9], '0.95': s[0.95], '0.99': s[0.99]}, filtered_quantiles_list)

In [None]:
filtered_quantiles_df = pd.DataFrame(dict_list)

In [None]:
filtered_quantiles_df.head()

In [None]:
filtered_quantiles_df['0.8']

In [None]:
(fig, axes) = plt.subplots(nrows=1, ncols=3, figsize=(12,3))
filtered_quantiles_df[filtered_quantiles_df.state=='moving']['0.8'].plot(kind='hist', color='blue', title='moving', ax=axes[0])
filtered_quantiles_df[filtered_quantiles_df.state=='loitering']['0.8'].plot(kind='hist', color='red', title='loitering', ax=axes[1])
filtered_quantiles_df[filtered_quantiles_df.state=='stationary']['0.8'].plot(kind='hist', color='purple', title='loitering', ax=axes[2])

In [None]:
quantiles_df = pd.DataFrame(map(lambda s: {'state': s.name, '0.8': s[0.8], '0.9': s[0.9], '0.95': s[0.95], '0.99': s[0.99]}, quantiles_list))

In [None]:
(fig, axes) = plt.subplots(nrows=1, ncols=3, figsize=(12,3))
quantiles_df[filtered_quantiles_df.state=='moving']['0.8'].plot(kind='hist', color='blue', title='moving', ax=axes[0])
quantiles_df[filtered_quantiles_df.state=='loitering']['0.8'].plot(kind='hist', color='red', title='loitering', ax=axes[1])
quantiles_df[filtered_quantiles_df.state=='stationary']['0.8'].plot(kind='hist', color='purple', title='stationary', ax=axes[2])