# Find endpoint of last trip of lost bikes

---

In [58]:
import pandas as pd
from google.cloud import storage
import pickle
# get raw data from gs
gs = storage.Client(project='fleets-production')
bucket = gs.bucket('noa-kraken-production')

In [59]:
import quickavro
from io import BytesIO
from datetime import datetime, timedelta

def get_trips_day(bucket, import_date):
    """Get avro file of trips of a single day and return as pandas DF."""
    # get raw data from gs
    file_path = 'trips/{import_date}.avro'.format(import_date=import_date)
    blob = bucket.blob(file_path)
    trips_day = blob.download_as_string()

    # convert to pandas dataframe
    with quickavro.FileReader(BytesIO(trips_day)) as reader:
        records = []
        for record in reader.records():
            records.append(record)

    return pd.DataFrame(records).drop_duplicates('uuid')


In [60]:
#Make a list of lost bikes - bikes uuid that were emitting signals on date 1 and that were not emitting signals anymore on date 2
# ! Have to code ! something more elaborate to identify lost bikes
trackings1= get_trackings_day(bucket, '2017-10-11')
trackings2= get_trackings_day(bucket, '2017-11-30')
bikes1= pd.DataFrame(trackings1.bicycle_uuid.unique().tolist(), columns={'bicycle_uuid'})
bikes2= pd.DataFrame(trackings2.bicycle_uuid.unique().tolist(), columns={'bicycle_uuid'})
Bikeslost=list(set([b[0] for b in bikes1.values.tolist()]) - set([b[0] for b in bikes2.values.tolist()]))

In [61]:
#Get Trips between certain date range 
list_of_dfs_trips = []
for d in pd.date_range('2017-10-11', '2017-11-30'):
    try:
        list_of_dfs_trips.append(get_trips_day(bucket, d.date()))
    except ValueError:
        pass
    
#Concatenate Trackings in one Dataframe
Df_Trips=pd.concat(list_of_dfs_trips, axis=0)

#Filter the Organization (In this case = Google)
Df_Trips_Google=Df_Trips[Df_Trips.organization_uuid=='87d12b17-628a-425d-8070-4898f4d395be']

#Get the trackings of bikes lost
Df_Trips_Google_lost_bikes= Df_Trips_Google[Df_Trips_Google.bicycle_uuid.isin(Bikeslost)]

#Change the timestamp format of 'ended'
Df_Trips_Google_lost_bikes['ended']= pd.to_datetime(Df_Trips_Google_lost_bikes['ended'], unit='us')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [62]:
#Reset Index - Trips of lost bikes within selected time range
Df_Trips_Google_lost_bikes=Df_Trips_Google_lost_bikes.reset_index(drop=True)

In [63]:
len(Bikeslost)

165

In [64]:
#Number of lost bikes after a certain *date* within the defined ## date range ##
len(Df_Trips_Google_lost_bikes[Df_Trips_Google_lost_bikes['ended']> '2017-10-11 00:00:00.000'].bicycle_uuid.unique())

107

In [65]:
#Last trip for each of the lost bikes 
Last_trip= Df_Trips_Google_lost_bikes.groupby(['bicycle_uuid'])['ended'].max().to_frame().reset_index()

In [66]:
#Last trips of lost bikes dataframe
last_trips_lost_bikes= pd.merge(Last_trip, Df_Trips_Google_lost_bikes, on='ended', how='left')

In [67]:
#Get the locations of the endpoint of the last trip of each lost bike
last_loc=[]
for i in range(len(last_trips_lost_bikes.route)):
          last_trip= last_trips_lost_bikes['route'].iloc[i]
          last_loc.append(last_trip[-1:])

In [68]:
#Make 2 separate lists for latitude and longitude to integrate them in plotly
latitude_list=[]
longitude_list=[]
for i in range(len(last_loc)):
    if len(last_loc[i]) ==0:
        pass
    else:
        lati=last_loc[i][0].get('latitude')
        latitude_list.append(lati)
        long=last_loc[i][0].get('longitude')
        longitude_list.append(long)

In [69]:
#Map the locations of the endpoints of the last trip of lost bikes
import plotly.plotly as py
from plotly.graph_objs import *

mapbox_access_token = 'pk.eyJ1IjoiYWxleC1ub2EiLCJhIjoiY2o3cWNtMXVyM3dlNjMzbWw5dnZncWJybSJ9.xz2KJBUshA8Enty1rmasqA'

data = Data([
    Scattermapbox(
        lat=latitude_list,
        lon=longitude_list,
        mode='markers',
        marker=Marker(
            size=9
        ) #,
        #text=Df_Trackings_Google[Df_Trackings_Google.bicycle_uuid.isin(Bikeslost)]['bicycle_uuid'],
    )
])
layout = Layout(
    autosize=True,
    hovermode='closest',
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=37.423059,
            lon=-122.094755
        ),
        pitch=0,
        zoom=10
    ),
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='MultipleMapbox')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~laurannek/0 or inside your plot.ly account where it is named 'MultipleMapbox'
