In [25]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import pickle

plt.style.use('seaborn-whitegrid')

import warnings
warnings.filterwarnings('ignore')


In [39]:
#import models

price_model = pickle.load(open("prize_model.sav", 'rb'))
time_model = pickle.load(open("length_model.sav", 'rb'))

### Price model

In [29]:
#All functions used for price pred
def prepare_time_features(df):
    df['pickup_datetime'] = df['pickup_datetime'].str.slice(0, 16)
    df['pickup_datetime'] = pd.to_datetime(df['pickup_datetime'], utc=True, format='%Y-%m-%d %H:%M')
    df['hour_of_day'] = df.pickup_datetime.dt.hour
    df['month'] = df.pickup_datetime.dt.month
    df["year"] = df.pickup_datetime.dt.year
    df["weekday"] = df.pickup_datetime.dt.weekday
    return df

def transform(data):
    # Distances to nearby airports, 
    jfk = (-73.7781, 40.6413)
    ewr = (-74.1745, 40.6895)
    lgr = (-73.8740, 40.7769)

    data['distance_to_jfk'] = distance(jfk[1], jfk[0],
                                         data['pickup_latitude'], data['pickup_longitude'])
    data['distance_to_ewr'] = distance(ewr[1], ewr[0], 
                                          data['pickup_latitude'], data['pickup_longitude'])
    data['distance_to_lgr'] = distance(lgr[1], lgr[0],
                                          data['pickup_latitude'], data['pickup_longitude'])
    
    return data

def distance(lat1, lon1, lat2, lon2):
    p = 0.017453292519943295 # Pi/180
    a = 0.5 - np.cos((lat2 - lat1) * p)/2 + np.cos(lat1 * p) * np.cos(lat2 * p) * (1 - np.cos((lon2 - lon1) * p)) / 2
    return 0.6213712 * 12742 * np.arcsin(np.sqrt(a))

In [56]:
def get_price_pipeline(timestamp, n_passengers, pickup_latitude, dropoff_longitude, dropoff_latitude, pickup_longitude):
    d = {'pickup_datetime': [timestamp], 'passenger_count': [n_passengers],  'pickup_latitude' : [pickup_latitude], 'dropoff_longitude' : [dropoff_longitude], 'dropoff_latitude' : [dropoff_latitude],'pickup_longitude' : [pickup_longitude]}
    manual_df = pd.DataFrame(data=d)
    col_order = ['pickup_longitude', 'pickup_latitude', 'dropoff_longitude',
       'dropoff_latitude', 'passenger_count', 'hour_of_day', 'month', 'year',
       'weekday', 'distance_miles', 'distance_to_jfk', 'distance_to_ewr',
       'distance_to_lgr']
    
    prepare_time_features(manual_df)
    transform(manual_df)
    manual_df['distance_miles'] = distance(manual_df.pickup_latitude, manual_df.pickup_longitude, \
                                      manual_df.dropoff_latitude, manual_df.dropoff_longitude)
    manual_df.drop(columns= ['pickup_datetime'], axis= 1, inplace=True)

    manual_df = manual_df[col_order]
    
    manual_predictions = price_model.predict(manual_df)
    
    return manual_predictions[0]
    

In [57]:
#Lets test our pipeline
get_price_pipeline('2020-01-01 13:12:11', 1, 40.763805, -73.98369, 40.743835, -73.97332)

9.4

### Time model

In [64]:
def get_time_pipeline(timestamp, n_passengers, pickup_latitude, dropoff_longitude, dropoff_latitude, pickup_longitude):
    d = {'pickup_datetime': [timestamp], 'passenger_count': [n_passengers],  'pickup_latitude' : [pickup_latitude], 'dropoff_longitude' : [dropoff_longitude], 'dropoff_latitude' : [dropoff_latitude],'pickup_longitude' : [pickup_longitude]}
    manual_df = pd.DataFrame(data=d)
    manual_df['pickup_datetime'] = pd.to_datetime(manual_df.pickup_datetime)
    manual_df['month'] = manual_df.pickup_datetime.dt.month
    manual_df['week'] = manual_df.pickup_datetime.dt.week
    manual_df['weekday'] = manual_df.pickup_datetime.dt.weekday
    manual_df['hour'] = manual_df.pickup_datetime.dt.hour
    manual_df['minute'] = manual_df.pickup_datetime.dt.minute
    manual_df['minute_oftheday'] = manual_df['hour'] * 60 + manual_df['minute']
    manual_df.drop(['minute'], axis=1, inplace=True)
    
    col_order = ['passenger_count', 'pickup_longitude', 'pickup_latitude',
       'dropoff_longitude', 'dropoff_latitude', 'month', 'week', 'weekday',
       'hour', 'minute_oftheday']
    
    predictions = np.exp(time_model.predict(manual_df[col_order]))
    
    return predictions[0]

In [65]:
get_time_pipeline('2020-01-01 13:12:11', 1, 40.763805, -73.98369, 40.743835, -73.97332)

750.4026977686557

### Overall, neater pipeline

In [66]:
def prediction_pipeline(timestamp, n_passengers, pickup_latitude, dropoff_longitude, dropoff_latitude, pickup_longitude):
    fare = get_price_pipeline(timestamp, n_passengers, pickup_latitude, dropoff_longitude, dropoff_latitude, pickup_longitude)
    time = get_time_pipeline(timestamp, n_passengers, pickup_latitude, dropoff_longitude, dropoff_latitude, pickup_longitude)
    
    print("A cab here will cost you" , fare, "dollars, and will take", time, "to get to your destination")
    return(fare, time)

In [67]:
prediction_pipeline('2020-01-01 13:12:11', 1, 40.763805, -73.98369, 40.743835, -73.97332)

A cab here will cost you 9.4 dollars, and will take 750.4026977686557 to get to your destination


(9.4, 750.4026977686557)