# Find last tracking point of lost bikes

---

In [195]:
import pandas as pd
from google.cloud import storage
import pickle
# get raw data from gs
gs = storage.Client(project='fleets-production')
bucket = gs.bucket('noa-kraken-production')

In [196]:
import quickavro
from io import BytesIO
from datetime import datetime, timedelta

def get_trackings_day(bucket, import_date):
    """Get avro file of trackings of a single day and return as pandas DF."""
    # get raw data from gs
    file_path = 'trackings/{import_date}.avro'.format(import_date=import_date)
    blob = bucket.blob(file_path)
    trips_day = blob.download_as_string()

    # convert to pandas dataframe
    with quickavro.FileReader(BytesIO(trips_day)) as reader:
        records = []
        for record in reader.records():
            records.append(record)

    return pd.DataFrame(records).drop_duplicates('uuid')

In [197]:
#Make a list of lost bikes - bikes uuid that were emitting signals on date 1 and that were not emitting signals anymore on date 2
# ! Have to code ! something more elaborate to identify lost bikes
trackings1= get_trackings_day(bucket, '2017-10-11')
trackings2= get_trackings_day(bucket, '2017-11-30')
bikes1= pd.DataFrame(trackings1.bicycle_uuid.unique().tolist(), columns={'bicycle_uuid'})
bikes2= pd.DataFrame(trackings2.bicycle_uuid.unique().tolist(), columns={'bicycle_uuid'})
Bikeslost=list(set([b[0] for b in bikes1.values.tolist()]) - set([b[0] for b in bikes2.values.tolist()]))

In [198]:
#Get Trackings between certain date range 
list_of_dfs = []
for d in pd.date_range('2017-10-11', '2017-11-30'):
    try:
        list_of_dfs.append(get_trackings_day(bucket, d.date()))
    except ValueError:
        pass

#Concatenate Trackings in one Dataframe
Df_Trackings=pd.concat(list_of_dfs, axis=0)

#Filter the Organization (In this case = Google)
Df_Trackings_Google=Df_Trackings[Df_Trackings.organization_uuid=='87d12b17-628a-425d-8070-4898f4d395be']

#Get the trackings of bikes lost
Tracking_Bikeslost= Df_Trackings_Google [Df_Trackings_Google.bicycle_uuid.isin(Bikeslost)]

#Change the timestamp format of 'timestamp'
Tracking_Bikeslost['timestamp']= pd.to_datetime(Tracking_Bikeslost['timestamp'], unit='us')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [199]:
#Reset Index - Trackings of lost bikes within selected time frame
Tracking_Bikeslost=Tracking_Bikeslost.reset_index(drop=True)

In [200]:
#Number of lost bikes after a certain *date* within the defined ## date range ##
len(Tracking_Bikeslost[Tracking_Bikeslost['timestamp']> '2017-10-11 00:00:00.000'].bicycle_uuid.unique())

133

In [201]:
#Last point in time when tracking signal with location occured for each of the lost bikes 
Last_track= Tracking_Bikeslost[Tracking_Bikeslost['latitude'].notnull()&Tracking_Bikeslost['longitude'].notnull()].groupby(['bicycle_uuid'])['timestamp'].max().to_frame().reset_index()

In [202]:
#Number of Bikes that did send (not null) coordinates in their last tracking signals \\ - others present null coordinates
### Only a few bikes have coordinates - !!NEED TO FIND LAST TRACKING TIMES WITH COORDINATES NOT NULL
len(Last_track)

102

In [203]:
#Last tracking point(s) coordinates
Last_track_coord= pd.merge(Last_track, Tracking_Bikeslost, on='timestamp', how='left')

In [204]:
#Percentage of lost bikes that sent a location during their last tracking point in time 
##Please change the date to the start date of ## date range ##
len(Last_track)/len(Tracking_Bikeslost[Tracking_Bikeslost['timestamp']> '2017-10-11 00:00:00.000'].bicycle_uuid.unique())

0.7669172932330827

In [205]:
#Double check to eliminate null coordinates(some coordinates appeared to be null after the preceding steps)
Last_track_coord=Last_track_coord[Last_track_coord['latitude'].notnull()&Last_track_coord['longitude'].notnull()].reset_index()

In [206]:
import plotly.plotly as py
from plotly.graph_objs import *

mapbox_access_token = 'pk.eyJ1IjoiYWxleC1ub2EiLCJhIjoiY2o3cWNtMXVyM3dlNjMzbWw5dnZncWJybSJ9.xz2KJBUshA8Enty1rmasqA'

data = Data([
    Scattermapbox(
        lat=Last_track_coord['latitude'].tolist(),
        lon=Last_track_coord['longitude'].tolist(),
        mode='markers',
        marker=Marker(
            size=9
        ) ,
        text=Last_track_coord['bicycle_uuid_x'].tolist(),
    )
])
layout = Layout(
    autosize=True,
    hovermode='closest',
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=37.423059,
            lon=-122.094755
        ),
        pitch=0,
        zoom=10
    ),
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='MultipleMapbox')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~laurannek/0 or inside your plot.ly account where it is named 'MultipleMapbox'


### Notes
1. 
2. 