# Setup

Packages for data processing

In [2]:
import pandas as pd
import numpy as np
import os

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

Link google drive

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
dc_data_directory = "/content/drive/MyDrive/DC project 2/Trip data w taxi codes"

# Read Data

In [5]:
uber = pd.read_csv(os.path.join(dc_data_directory, "uber_all_processed_date_time_split.csv"), parse_dates=["datetime", "date"])

In [6]:
uber.head()

Unnamed: 0,datetime,lat,lon,base,locationID,borough,zone,date,time
0,2014-04-01 00:00:00,40.7188,-73.9863,B02598,232,Manhattan,Two Bridges/Seward Park,2014-04-01,00:00:00
1,2014-04-01 00:00:00,40.7637,-73.96,B02598,141,Manhattan,Lenox Hill West,2014-04-01,00:00:00
2,2014-04-01 00:00:00,40.7215,-73.9952,B02682,144,Manhattan,Little Italy/NoLiTa,2014-04-01,00:00:00
3,2014-04-01 00:01:00,40.7355,-73.9966,B02617,113,Manhattan,Greenwich Village North,2014-04-01,00:01:00
4,2014-04-01 00:02:00,40.7184,-73.9601,B02682,255,Brooklyn,Williamsburg (North Side),2014-04-01,00:02:00


# Analysis by borough

Trips by borough

In [7]:
uber["borough"].value_counts()

Manhattan        13814497
Brooklyn          2915605
Queens            1693861
Bronx              251728
EWR                 31462
Staten Island        7993
Name: borough, dtype: int64

Number of Trips by Day/ Zone

In [8]:
trips_by_day_zone = uber.groupby(['borough', 'date'])["date"].count().to_frame()
trips_by_day_zone = trips_by_day_zone.rename(columns = {"date": "count"})
trips_by_day_zone = trips_by_day_zone.reset_index()
trips_by_day_zone

Unnamed: 0,borough,date,count
0,Bronx,2014-04-01,64
1,Bronx,2014-04-02,77
2,Bronx,2014-04-03,84
3,Bronx,2014-04-04,79
4,Bronx,2014-04-05,107
...,...,...,...
2073,Staten Island,2015-06-26,65
2074,Staten Island,2015-06-27,89
2075,Staten Island,2015-06-28,42
2076,Staten Island,2015-06-29,31


In [9]:
fig = px.scatter(trips_by_day_zone, x = "date", y = "count", color = "borough")

fig.update_layout(
    title="Daily Ridership by Borough",
    title_x = 0.5,
    title_xref = "paper",
    xaxis_title="Date",
    yaxis_title="Number of Trips",
    legend_title="Borough",
    font=dict(
        size=15,
    ))

fig.update_traces(
    marker_size = 6
)
fig.update_xaxes(
    showspikes=True,
    spikecolor="black",
    spikesnap="data",
    spikemode="across",
    spikethickness = 1,
)
fig.show()

In [10]:
# fig.write_html("/content/drive/MyDrive/DC project 2/plotly_scatterplots/scatterplot_zones.html")

# Incorporate Weather Data

## Load weather data

In [11]:
weather_data = pd.read_csv("/content/drive/MyDrive/DC project 2/weather_data_daily.csv", parse_dates = ["date"])
weather_data = weather_data.drop(["Unnamed: 0"], axis = 1)

In [12]:
weather_data.head()

Unnamed: 0,date,hot,cold,is_raining
0,2014-04-01,False,True,False
1,2014-04-02,False,False,True
2,2014-04-03,False,False,False
3,2014-04-04,False,False,True
4,2014-04-05,False,False,True


## Total trips per day

In [13]:
uber_daily = uber.groupby(['date'])["date"].count().to_frame()
uber_daily = uber_daily.rename(columns = {"date": "count"}).reset_index()
uber_daily.head()

Unnamed: 0,date,count
0,2014-04-01,14254
1,2014-04-02,17179
2,2014-04-03,20363
3,2014-04-04,26294
4,2014-04-05,19071


## Combine trip and weather data

In [14]:
uber_weather_daily = pd.merge(uber_daily, weather_data, how = "left", on = "date")
uber_weather_daily

Unnamed: 0,date,count,hot,cold,is_raining
0,2014-04-01,14254,False,True,False
1,2014-04-02,17179,False,False,True
2,2014-04-03,20363,False,False,False
3,2014-04-04,26294,False,False,True
4,2014-04-05,19071,False,False,True
...,...,...,...,...,...
359,2015-06-26,99524,False,False,False
360,2015-06-27,136201,False,False,True
361,2015-06-28,93139,False,False,True
362,2015-06-29,74453,False,False,True


Impact of rain on ridership

In [15]:
fig = px.scatter(uber_weather_daily, x = "date", y = "count", color = "is_raining")

fig.update_layout(
    title="Impact of Rain on Daily Ridership ",
    title_x = 0.5,
    title_xref = "paper",
    xaxis_title="Date",
    yaxis_title="Number of Trips",
    legend_title="Is Raining",
    font=dict(
        size=15,
    )
  )

fig.update_traces(
    marker_size = 6
)
fig.update_layout(legend_traceorder="reversed")

fig.show()

In [16]:
# fig.write_html("/content/drive/MyDrive/DC project 2/plotly_scatterplots/scatterplot_rain.html")

Impact of cold on ridership

In [18]:
fig = px.scatter(uber_weather_daily, x = "date", y = "count", color = "cold")

fig.update_layout(
    title="Impact of Wind Chill Advisory on Daily Ridership ",
    title_x = 0.5,
    title_xref = "paper",
    xaxis_title="Date",
    yaxis_title="Number of Trips",
    legend_title="Wind Chill Advisory",
    font=dict(
        size=15,
    )
  )

fig.update_traces(
    marker_size = 6
)

fig.show()

In [20]:
# fig.write_html("/content/drive/MyDrive/DC project 2/plotly_scatterplots/scatterplot_cold.html")

Impact of heat on ridership

In [None]:
fig = px.scatter(uber_weather_daily, x = "date", y = "count", color = "hot")

fig.update_layout(
    title="Impact of Heat on Daily Ridership ",
    title_x = 0.5,
    title_xref = "paper",
    xaxis_title="Date",
    yaxis_title="Number of Trips",
    legend_title="Is Hot",
    font=dict(
        size=15,
    )
  )

fig.update_traces(
    marker_size = 6
)
fig.update_layout(legend_traceorder="reversed")

fig.show()

In [None]:
fig.write_html("/content/drive/MyDrive/DC project 2/plotly_scatterplots/scatterplot_hot.html")