**Load the data**

In this project, we will be working with Uber and Lyft data. The data can be found here:

https://www.kaggle.com/datasets/brllrb/uber-and-lyft-dataset-boston-ma

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import seaborn as sns

In [5]:
uber_data = pd.read_csv("rideshare_kaggle.csv")
uber_data.head()

Unnamed: 0,id,timestamp,hour,day,month,datetime,timezone,source,destination,cab_type,...,precipIntensityMax,uvIndexTime,temperatureMin,temperatureMinTime,temperatureMax,temperatureMaxTime,apparentTemperatureMin,apparentTemperatureMinTime,apparentTemperatureMax,apparentTemperatureMaxTime
0,424553bb-7174-41ea-aeb4-fe06d4f4b9d7,1544953000.0,9,16,12,2018-12-16 09:30:07,America/New_York,Haymarket Square,North Station,Lyft,...,0.1276,1544979600,39.89,1545012000,43.68,1544968800,33.73,1545012000,38.07,1544958000
1,4bd23055-6827-41c6-b23b-3c491f24e74d,1543284000.0,2,27,11,2018-11-27 02:00:23,America/New_York,Haymarket Square,North Station,Lyft,...,0.13,1543251600,40.49,1543233600,47.3,1543251600,36.2,1543291200,43.92,1543251600
2,981a3613-77af-4620-a42a-0c0866077d1e,1543367000.0,1,28,11,2018-11-28 01:00:22,America/New_York,Haymarket Square,North Station,Lyft,...,0.1064,1543338000,35.36,1543377600,47.55,1543320000,31.04,1543377600,44.12,1543320000
3,c2d88af2-d278-4bfd-a8d0-29ca77cc5512,1543554000.0,4,30,11,2018-11-30 04:53:02,America/New_York,Haymarket Square,North Station,Lyft,...,0.0,1543507200,34.67,1543550400,45.03,1543510800,30.3,1543550400,38.53,1543510800
4,e0126e1f-8ca9-4f2e-82b3-50505a09db9a,1543463000.0,3,29,11,2018-11-29 03:49:20,America/New_York,Haymarket Square,North Station,Lyft,...,0.0001,1543420800,33.1,1543402800,42.18,1543420800,29.11,1543392000,35.75,1543420800


In [7]:
uber_data.shape

(693071, 57)

In [11]:
uber_data.dtypes

id                              object
timestamp                      float64
hour                             int64
day                              int64
month                            int64
datetime                        object
timezone                        object
source                          object
destination                     object
cab_type                        object
product_id                      object
name                            object
price                          float64
distance                       float64
surge_multiplier               float64
latitude                       float64
longitude                      float64
temperature                    float64
apparentTemperature            float64
short_summary                   object
long_summary                    object
precipIntensity                float64
precipProbability              float64
humidity                       float64
windSpeed                      float64
windGust                 

In [13]:
extra_columns = [ 'apparentTemperature','short_summary'                   object
'long_summary'                    object
'precipIntensity'                float64
'precipProbability'              float64
'humidity'                       float64
'windSpeed'                      float64
'windGust'                       float64
'windGustTime'                     int64
'visibility'                     float64
temperatureHigh                float64
temperatureHighTime              int64
temperatureLow                 float64
temperatureLowTime               int64
apparentTemperatureHigh        float64
apparentTemperatureHighTime      int64
apparentTemperatureLow         float64
apparentTemperatureLowTime       int64
icon                            object
dewPoint                       float64
pressure                       float64
windBearing                      int64
cloudCover                     float64
uvIndex                          int64
visibility.1                   float64
ozone                          float64
sunriseTime                      int64
sunsetTime                       int64
moonPhase                      float64
precipIntensityMax             float64
uvIndexTime                      int64
temperatureMin                 float64
temperatureMinTime               int64
temperatureMax                 float64
temperatureMaxTime               int64
apparentTemperatureMin         float64
apparentTemperatureMinTime       int64
apparentTemperatureMax         float64
apparentTemperatureMaxTime       int64 ]

uber_data = uber_data.drop(extra_columns, axis=1)

In [15]:
uber_data

Unnamed: 0,id,timestamp,hour,day,month,datetime,timezone,source,destination,cab_type,...,long_summary,precipProbability,windGust,temperatureHigh,temperatureLow,temperatureLowTime,icon,pressure,temperatureMin,temperatureMax
0,424553bb-7174-41ea-aeb4-fe06d4f4b9d7,1.544953e+09,9,16,12,2018-12-16 09:30:07,America/New_York,Haymarket Square,North Station,Lyft,...,Rain throughout the day.,0.0,9.17,43.68,34.19,1545048000,partly-cloudy-night,1021.98,39.89,43.68
1,4bd23055-6827-41c6-b23b-3c491f24e74d,1.543284e+09,2,27,11,2018-11-27 02:00:23,America/New_York,Haymarket Square,North Station,Lyft,...,"Rain until morning, starting again in the eve...",1.0,11.98,47.30,42.10,1543298400,rain,1003.97,40.49,47.30
2,981a3613-77af-4620-a42a-0c0866077d1e,1.543367e+09,1,28,11,2018-11-28 01:00:22,America/New_York,Haymarket Square,North Station,Lyft,...,Light rain in the morning.,0.0,7.33,47.55,33.10,1543402800,clear-night,992.28,35.36,47.55
3,c2d88af2-d278-4bfd-a8d0-29ca77cc5512,1.543554e+09,4,30,11,2018-11-30 04:53:02,America/New_York,Haymarket Square,North Station,Lyft,...,Partly cloudy throughout the day.,0.0,5.28,45.03,28.90,1543579200,clear-night,1013.73,34.67,45.03
4,e0126e1f-8ca9-4f2e-82b3-50505a09db9a,1.543463e+09,3,29,11,2018-11-29 03:49:20,America/New_York,Haymarket Square,North Station,Lyft,...,Mostly cloudy throughout the day.,0.0,9.14,42.18,36.71,1543478400,partly-cloudy-night,998.36,33.10,42.18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
693066,616d3611-1820-450a-9845-a9ff304a4842,1.543708e+09,23,1,12,2018-12-01 23:53:05,America/New_York,West End,North End,Uber,...,Light rain in the morning and overnight.,0.0,2.87,44.76,34.83,1543712400,partly-cloudy-night,1023.57,31.42,44.76
693067,633a3fc3-1f86-4b9e-9d48-2b7132112341,1.543708e+09,23,1,12,2018-12-01 23:53:05,America/New_York,West End,North End,Uber,...,Light rain in the morning and overnight.,0.0,2.87,44.76,34.83,1543712400,partly-cloudy-night,1023.57,31.42,44.76
693068,64d451d0-639f-47a4-9b7c-6fd92fbd264f,1.543708e+09,23,1,12,2018-12-01 23:53:05,America/New_York,West End,North End,Uber,...,Light rain in the morning and overnight.,0.0,2.87,44.76,34.83,1543712400,partly-cloudy-night,1023.57,31.42,44.76
693069,727e5f07-a96b-4ad1-a2c7-9abc3ad55b4e,1.543708e+09,23,1,12,2018-12-01 23:53:05,America/New_York,West End,North End,Uber,...,Light rain in the morning and overnight.,0.0,2.87,44.76,34.83,1543712400,partly-cloudy-night,1023.57,31.42,44.76
