In [1]:
#import

import numpy as np
import pandas as pd
import math
import warnings
warnings.filterwarnings('ignore')
import dask
import dask.dataframe as dd
from dask.distributed import Client, progress
client = Client()
import seaborn as sns
import folium
from folium.plugins import HeatMap
import matplotlib.pyplot as plt
%matplotlib inline

import os 
print(os.listdir("../input/new-york-city-taxi-fare-prediction"))

['sample_submission.csv', 'GCP-Coupons-Instructions.rtf', 'train.csv', 'test.csv']


In [2]:
#input file path

data_train_file_path = '../input/new-york-city-taxi-fare-prediction/train.csv'
data_test_file_path = '../input/new-york-city-taxi-fare-prediction/test.csv'

In [3]:
client

0,1
Client  Scheduler: tcp://127.0.0.1:41631  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 2  Cores: 2  Memory: 13.00 GiB


In [4]:
#row count

db_rows_count = 55423857
with open(data_train_file_path) as f:
    db_rows_count = len(f.readlines())   
print("no. of rows in the training data : {0}\n".format(db_rows_count))

no. of rows in the training data : 55423857



### Data Analysis

In [5]:
# training data - Set columns to most suitable type to optimize for memory usage and speed-up the loading

train_types = {'fare_amount'      : 'float32',
               'pickup_datetime'  : 'str', 
               'pickup_longitude' : 'float32',
               'pickup_latitude'  : 'float32',
               'dropoff_longitude': 'float32',
               'dropoff_latitude' : 'float32'}

# test-data - Set columns to most suitable type to optimize for memory usage and speed-up the loading
test_types = { 'pickup_datetime'  : 'str',
                'key'             : 'str',
               'pickup_longitude' : 'float32',
               'pickup_latitude'  : 'float32',
               'dropoff_longitude': 'float32',
               'dropoff_latitude' : 'float32'}

# select the columns (names) that you truly need for analysis - training data
train_cols = list(train_types.keys())    

# select the columns (names) that you truly need for analysis - test data
test_cols = list(test_types.keys())

# NY city - defining the bounding box
BB = (-74.5, -72.8, 40.5, 41.8)        

# set the amount of data to load from db
frac = 0.00050                   # set the amount of data to load from db
    
# select within the bounding box
def select_within_boundingbox(df, BB):
    return (df.pickup_longitude >= BB[0]) & (df.pickup_longitude <= BB[1]) & \
           (df.pickup_latitude >= BB[2]) & (df.pickup_latitude <= BB[3]) & \
           (df.dropoff_longitude >= BB[0]) & (df.dropoff_longitude <= BB[1]) & \
           (df.dropoff_latitude >= BB[2]) & (df.dropoff_latitude <= BB[3])

In [6]:
def load_data(data_file_path, train_data='Y'):
    
    # training data load and filter inputs    
    if (train_data=='Y'):
        df = dd.read_csv(data_file_path,usecols=train_cols, dtype=train_types)  # data load, dask
        
        column_names = ["fare_amount",
                        "pickup_longitude",
                        "pickup_latitude", 
                        "dropoff_longitude",
                        "dropoff_latitude"]                          # check for empty values
        df = df.sample(frac=0.04)                                    # rows to load
        df = df.dropna(how="any", subset = train_cols)               # remove rows with null values
        df = df[(df[column_names] != 0).all(axis=1)]                 # remove the latitude and longitude rows with zeros
        df = df.loc[(df.fare_amount > 0) & (df.fare_amount < 100) & 
            ~(((df.pickup_longitude - df.dropoff_longitude) == 0) & 
             ((df.pickup_latitude - df.dropoff_latitude) == 0))]     # remove the rows where fare amounts less than or greater than zero or with same coordinates
        df = df[select_within_boundingbox(df, BB)]                   #remove the coordinates not within the new york city
    
    if (train_data == 'N'):
        df = dd.read_csv(data_file_path,usecols=test_cols, dtype=test_types)  # data load, dask
    
    df = dd.concat([
        df,dd.to_datetime(df['pickup_datetime']).apply(
        lambda x: pd.Series([x.year, x.month, x.day, x.weekday(), x.hour, x.minute],
        index=['pickup_year', 'pickup_month', 'pickup_dd' ,'pickup_weekday', 'pickup_hour', 'pickup_minute']))], axis=1)   # extract features
    df = client.persist(df) 
    return df

In [7]:
#call the subroutine to load the data 
df = load_data(data_train_file_path, 'Y')
progress(df)

VBox()

In [8]:
# number of rows from db and after applying the filters
after = len(df)
print('# of rows in training data \n\t actual : {0}  \n\t after applying filters : {1}  \n\t dropped rows: {2} '.format(db_rows_count, after, db_rows_count-after))   # before and after filter rows count 

# of rows in training data 
	 actual : 55423857  
	 after applying filters : 2145753  
	 dropped rows: 53278104 


In [9]:
#viewing top 10 rows

df.head(10)

Unnamed: 0,fare_amount,pickup_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,pickup_year,pickup_month,pickup_dd,pickup_weekday,pickup_hour,pickup_minute
344475,21.5,2014-03-24 00:05:48 UTC,-73.984596,40.7286,-73.956017,40.804779,2014,3,24,0,0,5
234772,8.5,2013-01-12 21:11:07 UTC,-73.989792,40.736919,-74.008423,40.731777,2013,1,12,5,21,11
273815,4.5,2012-09-03 15:39:00 UTC,-73.949997,40.776058,-73.960083,40.770561,2012,9,3,0,15,39
434546,5.7,2010-05-25 16:23:22 UTC,-73.983414,40.755898,-73.973061,40.761459,2010,5,25,1,16,23
446856,6.9,2011-10-23 10:15:30 UTC,-73.981606,40.746891,-73.978317,40.730923,2011,10,23,6,10,15
424988,8.5,2014-06-04 20:18:00 UTC,-73.992714,40.743141,-73.987679,40.728611,2014,6,4,2,20,18
570117,6.5,2014-12-24 18:21:39 UTC,-73.995041,40.760368,-73.989799,40.776016,2014,12,24,2,18,21
249572,5.5,2015-03-27 22:08:21 UTC,-73.987892,40.724045,-73.985016,40.734943,2015,3,27,4,22,8
266503,9.5,2014-08-23 11:04:40 UTC,-73.995483,40.717117,-74.003868,40.729797,2014,8,23,5,11,4
20715,6.1,2009-08-03 19:46:22 UTC,-73.950325,40.771465,-73.971954,40.782234,2009,8,3,0,19,46


#### Row count, null check, and data type

In [10]:
print(f'# of rows processing : {len(df)}')
print("\033[4m\nColumn Name\tisnull_counts\tdata_types\033[0m")
for columns in df.columns:
    print(f'{columns.ljust(17)}\t{(df[columns].isnull().map_partitions(sum).compute().sum()):>5}\t{(df[columns].dtype)}')
print('\t')

# of rows processing : 2145753
[4m
Column Name	isnull_counts	data_types[0m
fare_amount      	    0	float32
pickup_datetime  	    0	object
pickup_longitude 	    0	float32
pickup_latitude  	    0	float32
dropoff_longitude	    0	float32
dropoff_latitude 	    0	float32
pickup_year      	    0	int64
pickup_month     	    0	int64
pickup_dd        	    0	int64
pickup_weekday   	    0	int64
pickup_hour      	    0	int64
pickup_minute    	    0	int64
	


#### Distance Calculation

In [11]:
# distance calculation in Kilometeres
from math import cos, asin, sqrt

def distance_haversine(lon1, lat1, lon2, lat2):
    p = 0.017453292519943295     #Pi/180
    a = 0.5 - cos((lat2 - lat1) * p)/2 + cos(lat1 * p) * cos(lat2 * p) * (1 - cos((lon2 - lon1) * p)) / 2
    return 12742 * asin(sqrt(a)) * 0.62137

In [12]:
def distance_rows(p_lon, p_lat, d_lon, d_lat):
    nyc_coord = (40.7141667, -74.0063889,)      # ny city center coordinates
    jfk_coord = (40.639722, -73.778889)         #John F. Kennedy International Airport coordinates
    ewr_coord = (40.6925, -74.168611)           #Newark Liberty International Airport coordinates
    lga_coord = (40.77725, -73.872611)          #LaGuardia Airport coordinates

    distance_between_pickup_dropoff = distance_haversine(p_lon, p_lat, d_lon, d_lat)                    # distance between pickup and dropff
    distance_between_pickup_jfk     = distance_haversine(p_lon, p_lat, jfk_coord[1], jfk_coord[0])      # distance between pickup and jfk airport
    distance_between_dropoff_jfk    = distance_haversine(jfk_coord[1], jfk_coord[0], d_lon, d_lat)      # distance between dropoff and jfk airport
    distance_between_pickup_ewr     = distance_haversine(p_lon, p_lat, ewr_coord[1], ewr_coord[0])      # distance between pickup and ewr airport
    distance_between_dropoff_ewr    = distance_haversine(ewr_coord[1], ewr_coord[0], d_lon, d_lat)      # distance between dropoff and ewr airport
    distance_between_pickup_lga     = distance_haversine(p_lon, p_lat, lga_coord[1], lga_coord[0])      # distance between pickup and lga airport
    distance_between_dropoff_lga    = distance_haversine(lga_coord[1], lga_coord[0], d_lon, d_lat)      # distance between dropoff and lga airport
    distance_between_citycenter_pickup = distance_haversine(nyc_coord[0], nyc_coord[1],p_lon, p_lat)    # distance between pickup and city center
    longitude_diff                     = p_lon - d_lon
    latitude_diff                      = p_lat - d_lat
    
    return [distance_between_pickup_dropoff,
            distance_between_pickup_jfk,
            distance_between_dropoff_jfk, 
            distance_between_pickup_ewr, 
            distance_between_dropoff_ewr, 
            distance_between_pickup_lga, 
            distance_between_dropoff_lga,
            distance_between_citycenter_pickup,
            longitude_diff,
            latitude_diff]

def calculate_coordinates_distance(df, train_data='Y'):
    # distance columns to be added to the data frame
    column_names  = ['distance_between_pickup_dropoff', 
                     'distance_between_pickup_jfk', 
                     'distance_between_dropoff_jfk', 
                     'distance_between_pickup_ewr', 
                     'distance_between_dropoff_ewr', 
                     'distance_between_pickup_lga', 
                     'distance_between_dropoff_lga',
                     'distance_between_citycenter_pickup',
                     'longitude_diff',
                     'latitude_diff']

    # pandas dataframes processing - utilizing dask
    df = dd.concat([df,df[["pickup_longitude","pickup_latitude", "dropoff_longitude","dropoff_latitude"]].apply(lambda x: pd.Series(distance_rows(*x),index=column_names), axis=1)], axis=1)

    # calculate fare per mile
    if (train_data == 'Y'):
        # remove data points less than .05 miles
        df = df.loc[df.distance_between_pickup_dropoff>0.05]
        df['fare_per_mile'] = df.fare_amount/df.distance_between_pickup_dropoff 

    #reset the index
    df = df.reset_index(drop=True)  
    df = client.persist(df)
    return df

In [13]:
# calculate the distance between the coordinates
df = calculate_coordinates_distance(df, train_data='Y')
progress(df)

VBox()

In [None]:
df.compute().info()

### Folium map plotting

(with limited data)

In [None]:
#create a map
this_map = folium.Map(location=[40.741895, -73.989308],
                      zoom_start=11
)

def plotDot(point):
    '''input: series that contains a numeric named latitude and a numeric named longitude
    this function creates a CircleMarker and adds it to your this_map'''
    folium.CircleMarker(location=[point.pickup_latitude, point.pickup_longitude],
                        radius=2,color='#3186cc', fill=True,fill_color='#3186cc',
                       weight=0).add_to(this_map)

df.compute().head(5000).apply(plotDot, axis = 1)

#Set the zoom to the maximum possible 
#this_map.fit_bounds(this_map.get_bounds())
    
this_map

### Heatmap Plotting

(to recognise high activity zones)

In [None]:
#create a map
this_map = folium.Map(location=[40.741895, -73.989308])

# List comprehension to make out list of lists
heat_data = [[row['pickup_latitude'],row['pickup_longitude']] for index, row in df.compute().iterrows()]

# Plot it on the map
HeatMap(heat_data).add_to(this_map)

#Set the zoom to the maximum possible
this_map.fit_bounds(this_map.get_bounds())
    
this_map

#### Delayed function

In [None]:
@dask.delayed
def round_decimals(x, x_decimals=2):
    return x.round(x_decimals)

@dask.delayed
def math_sqrt(x):
    return math.sqrt(x)

### Fare amount and mean standard deviation

In [None]:
fare_amount_mean = df["fare_amount"].mean()
fare_amount_standard_deviation = math_sqrt(((df["fare_amount"] - fare_amount_mean) ** 2).mean())

print("average fair amount (mean) : ${0:.2f}".format(fare_amount_mean.compute()))
print("fare amount standard deviation : ${0:.2f}\n".format(fare_amount_standard_deviation.compute()))

#### Fare Analysis

In [None]:
# plot histogram of fare

plt.figure(figsize=(25,10))
sns.set(color_codes=True)
ax = sns.distplot(df.fare_amount, bins=15, kde=False)
plt.xlabel('fare $USD', fontsize=20)
plt.ylabel('frequency', fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
    
plt.title('fare amount Histogram', fontsize=25)
plt.show()

#### Hourly Fare Analysis

In [None]:
sns.set(style="darkgrid")
plt.figure(figsize=(20,12))

# Plot the responses for different events and regions

sns.lineplot(x="pickup_hour", y="fare_per_mile",
            hue="pickup_year", style="pickup_year",  dashes=False, 
             data=df.compute())
plt.show()

#### Fare Per Mile Analysis on different timings

In [None]:
df_day              = df.loc[(df.pickup_hour >=6) & (df.pickup_hour <16)]
df_peak_hours       = df.loc[((df.pickup_hour >=16) & (df.pickup_hour <20))]
df_night            = df.loc[~((df.pickup_hour >=6) & (df.pickup_hour <20))]
df_between_airports = df.loc[(((df.distance_between_pickup_jfk < 2)  | (df.distance_between_pickup_ewr < 2)  | (df.distance_between_pickup_lga < 2))  &
                              ((df.distance_between_dropoff_jfk < 2) | (df.distance_between_dropoff_lga < 2) | (df.distance_between_dropoff_ewr < 2))                      )] 
df_airport_pickup   = df.loc[((df.distance_between_pickup_jfk < 2)  | (df.distance_between_pickup_ewr < 2)  | (df.distance_between_pickup_lga < 2))]   
df_airport_dropoff  = df.loc[((df.distance_between_dropoff_jfk < 2) | (df.distance_between_dropoff_lga < 2) | (df.distance_between_dropoff_ewr < 2))] 

#remove the coordinates not within the newyork city

BB_manhattan = (-74.025, 40.7, -73.925, 40.8)
df_jfk_manhattan = df[(select_within_boundingbox(df, BB_manhattan) &
                      ((df.distance_between_pickup_jfk < 2) | (df.distance_between_dropoff_jfk < 2)))]

#reset the index

df = df.reset_index(drop=True)

In [None]:
fare_amount_per_mile                  = df.fare_per_mile.mean().compute().round(2)

fare_amount_per_mile_day              = df_day.fare_per_mile.mean().compute().round(2)
fare_amount_per_mile_peak_hours       = df_peak_hours.fare_per_mile.mean().compute().round(2)
fare_amount_per_mile_night            = df_night.fare_per_mile.mean().compute().round(2)

fare_amount_per_mile_between_airports = df_between_airports.fare_per_mile.mean().compute().round(2)
fare_amount_per_mile_airport_pickup   = df_airport_pickup.fare_per_mile.mean().compute().round(2)
fare_amount_per_mile_airport_dropoff  = df_airport_dropoff.fare_per_mile.mean().compute().round(2)
fare_amount_per_mile_jfk_manhattan    = df_jfk_manhattan.fare_per_mile.mean().compute().round(2)

fare_amount_per_mile_weekday          = df.loc[df.pickup_weekday<=4].fare_per_mile.mean().compute().round(2)
fare_amount_per_mile_weekend          = df.loc[df.pickup_weekday>=5].fare_per_mile.mean().compute().round(2)

avg_data = pd.DataFrame({'fare':[
                    fare_amount_per_mile_between_airports,  
                    fare_amount_per_mile_jfk_manhattan,
                    fare_amount_per_mile_airport_pickup,
                    fare_amount_per_mile_airport_dropoff,
                    
                    fare_amount_per_mile_weekend,
                    fare_amount_per_mile_weekday,
                    
                    fare_amount_per_mile_peak_hours,
                    fare_amount_per_mile_night, 
                    fare_amount_per_mile_day,
                    fare_amount_per_mile
]}, index = [ 
             'between_airports', 
             'jfk_manhattan',      
             'airport_pickup', 
             'airport_dropoff', 
             'week end',  
             'week day', 
             'peak hours(4-8pm)',
             'night_ride',
             'day_ride',
             'all_day']
).dropna()

In [None]:
# average fare
sns.set_style("white")
plt.figure(figsize=(20,8))
plt.barh(avg_data.index, avg_data.fare, height = .4, align='center',  color="b")
plt.title("fare per mile - trip average", fontsize=20)
plt.xlabel('fare $ USD', fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

for i, v in enumerate(avg_data.fare):
    plt.text(v,i-.1, '$' + str(v), fontsize=12)

Above plots show
* Charges are higher between airplane terminal voyages
* Day ride charge is higher than nightly ride
* Weekday charge is higher than weekend

#### Fare per mile year by year analysis

In [None]:
fare_per_mile_yr                  = df.groupby('pickup_year')['fare_per_mile'].mean().compute() 
fare_per_mile_day_yr              = df_day.groupby('pickup_year')['fare_per_mile'].mean().compute() 
fare_per_mile_peak_hours_yr       = df_peak_hours.groupby('pickup_year')['fare_per_mile'].mean().compute() 
fare_per_mile_night_yr            = df_night.groupby('pickup_year')['fare_per_mile'].mean().compute() 
fare_per_mile_weekday_yr          = df.loc[df.pickup_weekday<=4].groupby('pickup_year')['fare_per_mile'].mean().compute() 
fare_per_mile_weekend_yr          = df.loc[df.pickup_weekday>=5].groupby('pickup_year')['fare_per_mile'].mean().compute() 
fare_per_mile_airport_pickup_yr   = df_airport_pickup.groupby('pickup_year')['fare_per_mile'].mean().compute() 
fare_per_mile_airport_dropoff_yr  = df_airport_dropoff.groupby('pickup_year')['fare_per_mile'].mean().compute() 
fare_per_mile_between_airports_yr = df_between_airports.groupby('pickup_year')['fare_per_mile'].mean().compute() 
fare_per_mile_jfk_manhattan_yr    = df_jfk_manhattan.groupby('pickup_year')['fare_per_mile'].mean().compute() 

fare_mile = pd.concat([
            pd.DataFrame({'year':fare_per_mile_yr.index, 'avg_fare':fare_per_mile_yr.values, 'type':'overall'}),
            pd.DataFrame({'year':fare_per_mile_day_yr.index, 'avg_fare':fare_per_mile_day_yr.values, 'type':'day_time'}),
            pd.DataFrame({'year':fare_per_mile_peak_hours_yr.index, 'avg_fare':fare_per_mile_peak_hours_yr.values, 'type':'peak_hours'}),
            pd.DataFrame({'year':fare_per_mile_night_yr.index, 'avg_fare':fare_per_mile_night_yr.values, 'type':'night_time'}),
            pd.DataFrame({'year':fare_per_mile_weekday_yr.index, 'avg_fare':fare_per_mile_weekday_yr.values, 'type':'weekend'}),
            pd.DataFrame({'year':fare_per_mile_weekend_yr.index, 'avg_fare':fare_per_mile_weekend_yr.values, 'type':'weekday'}),
            pd.DataFrame({'year':fare_per_mile_airport_pickup_yr.index, 'avg_fare':fare_per_mile_airport_pickup_yr.values, 'type':'airport_pickup'}),
            pd.DataFrame({'year':fare_per_mile_airport_dropoff_yr.index, 'avg_fare':fare_per_mile_airport_dropoff_yr.values, 'type':'airport_dropoff'}),
            pd.DataFrame({'year':fare_per_mile_jfk_manhattan_yr.index, 'avg_fare':fare_per_mile_jfk_manhattan_yr.values, 'type':'jfk_manhattan'}),
            pd.DataFrame({'year':fare_per_mile_between_airports_yr.index, 'avg_fare':fare_per_mile_between_airports_yr.values, 'type':'between_airports'}),
            ]).reset_index(drop = True)

In [None]:
plt.figure(figsize=(20,12)) 
ax = sns.barplot(x="type", y="avg_fare", hue="year", data=fare_mile, palette="Blues")
plt.title("fare per mile - trip average", fontsize=16)
plt.ylabel('fare $ USD', fontsize=11)
plt.xlabel('')
plt.xticks(fontsize=11, rotation =90)
plt.yticks(fontsize=11)

for p in ax.patches:
    height = p.get_height()
    ax.text(p.get_x()+p.get_width()/2.,
            height-1.5,
            '${:1.2f}'.format(height),
            ha="center", rotation=90) 

### Model: XGBoost

The features selected for model training:

pickup_longitude | pickup_latitude | dropoff_longitude | dropoff_latitude | pickup_year | pickup_month | pickup_dd | pickup_weekday | pickup_hour | pickup_minute

distance_between_pickup_dropoff
distance_between_pickup_jfk
distance_between_dropoff_jfk
distance_between_pickup_ewr
distance_between_dropoff_ewr
distance_between_pickup_lga
distance_between_dropoff_lga
distance_between_citycenter_pickup

longitude_diff | latitude_diff

In [None]:
df.compute().info()

In [None]:
df.head(10)

#### Train/Test Data Split

In [None]:
import xgboost as xgb
import dask_xgboost as dxgb
from sklearn.metrics import mean_squared_error

X =  df.drop(['fare_amount', 'fare_per_mile', 'pickup_datetime'], axis=1)
y =  df.fare_amount   

X_train, X_test = X.random_split([0.7, 0.3], random_state=0)
y_train, y_test = y.random_split([0.7, 0.3], random_state=0)

In [None]:
# model definition and training the model 

def dxgb_evaluate() :
    params = {'eval_metric'        : 'rmse' 
              ,'num_boost_round'   : 100
              ,'max_depth'         : 7
              ,'seed'              : 0
              ,'subsample'         : 0.8 
              ,'silent'            : True 
              ,'gamma'             : 1
              ,'colsample_bytree'  : 0.9
              ,'nfold'             : 3 
              ,'boosting_type'     : 'gbdt'
              , 'seed' : 0
         }

    bst = dxgb.train(client, params, X_train, y_train)
    del(params)
    return bst

In [None]:
#train the model

bst = dxgb_evaluate()

In [None]:
#training data prediction

# train split predictions
X_train_predictions = dxgb.predict(client, bst, X_train)

# train test split predictions
X_test_predictions = dxgb.predict(client, bst, X_test)

In [None]:
#training data RMSE scores:

# Report testing and training RMSE
print("\033[1;37;40m\033[2;37:40mdata category \t\t\trmse-score\033[0m")
print('train test split \t\t\033[0;37;41m  {0:.2f}  \033[0m'.format(np.sqrt(mean_squared_error(y_test, X_test_predictions))))
print('train split \t\t\t\033[0;37;41m  {0:.2f}  \033[0m\n'.format(np.sqrt(mean_squared_error(y_train, X_train_predictions))))

### Feature Importance Plotting

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
ax = xgb.plot_importance(bst, ax=ax, height=0.8, max_num_features=20, color='b')
ax.grid("off", axis="y")

In [None]:
#for i in range(100):
#    print(s[i], y_pred[i].round(1))

### Test Data Predictions

In [None]:
#load the data 
df_test = load_data(data_test_file_path, train_data='N')
df_test = calculate_coordinates_distance(df_test, train_data='N')

df_test_key = df_test.key
df_test     = df_test.drop(['key', 'pickup_datetime'], axis=1)

In [None]:
df_test.compute().info()

In [None]:
df_test.head(5)

In [None]:
# train split predictions
test_predictions = dxgb.predict(client, bst, df_test)

### Submitting the Predictions

In [None]:
submission_predictions  = pd.DataFrame({'key': df_test_key.compute(), 'fare_amount': test_predictions.compute()})
submission_predictions.to_csv('submission.csv', index=False)

In [None]:
submission_predictions