In [1]:
#Importing Libraries
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from datetime import datetime as dt
from streamlit_keplergl import keplergl_static

## Create a bar chart using Plotly showing the top 20 bike-sharing stations by number of trips

In [2]:
# Importing dataset
df = pd.read_pickle(r"C:\Users\karen\NYC Bike Sharing Project\02 Data\Prepared Data\dataset_cleaned.pkl")
df.head(5)

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,average_temp,bike_rides_daily,value
0,C401E951D3CE9CF1,classic_bike,2022-01-01 02:13:26,2022-01-01 02:14:57,Greenwich St & Perry St,5922.04,Perry St & Bleecker St,5922.07,40.734982,-74.006973,40.735355,-74.004829,member,11.6,20895,1
1,008C583994B6DC9E,docked_bike,2022-01-01 14:05:37,2022-01-01 14:24:46,Columbus Ave & W 72 St,7175.05,5 Ave & E 78 St,7161.08,40.777058,-73.978989,40.776321,-73.964272,casual,11.6,20895,1
2,2AFE5C9C23A7251A,classic_bike,2022-01-01 10:46:52,2022-01-01 11:05:27,E 11 St & 1 Ave,5746.14,Division St & Bowery,5270.08,40.729538,-73.984268,40.714191,-73.996735,member,11.6,20895,1
3,4723CAEF032C4D35,classic_bike,2022-01-01 00:12:08,2022-01-01 00:25:18,Hicks St & Montague St,4645.09,Henry St & Degraw St,4380.08,40.695129,-73.995949,40.68475,-73.999176,casual,11.6,20895,1
4,6E12358CBFCB35D0,electric_bike,2022-01-01 14:16:58,2022-01-01 14:23:32,Central Park West & W 72 St,7141.07,Central Park W & W 91 St,7453.01,40.775795,-73.976204,40.788666,-73.966797,member,11.6,20895,1


In [3]:
# Groupiing station names by the number of trips 
df_groupby_bar = df.groupby('start_station_name', as_index=False).agg({'value': 'sum'})
top20 = df_groupby_bar.nlargest(20, 'value')

In [4]:
top20

Unnamed: 0,start_station_name,value
1396,W 21 St & 6 Ave,20649
8,1 Ave & E 68 St,18828
661,E 17 St & Broadway,17219
417,Broadway & W 58 St,16494
387,Broadway & E 21 St,16238
1326,University Pl & E 14 St,15975
692,E 33 St & 1 Ave,15769
240,6 Ave & W 33 St,15385
265,8 Ave & W 33 St,14883
386,Broadway & E 14 St,14466


In [5]:
#rename "value" column with "trips"
top20.rename(columns = {'value': 'trips'}, inplace = True)

In [6]:
#save top20 as a csv file
top20.to_csv('top20.csv')

In [7]:
# plotting a bar chart with plotly
fig = px.bar(data_frame = top20, 
             x = 'start_station_name', 
             y ='trips', 
             color= 'trips', 
             color_continuous_scale ='ice_r')

fig.update_layout({
    'title': {'text': 'Top 20 Bike-Sharing Facilities Operated by Citi Bike', 'font': {'weight': 'bold'}},
    'xaxis': {'title': {'text': 'Start Station Name', 'font': {'weight': 'bold'}}, 'tickfont': {'size': 10}},
    'yaxis': {'title': {'text': 'Number of Trips', 'font': {'weight': 'bold'}}}
})
fig.show()

In [8]:
# Make a copy of the DataFrame
df_copy = df.copy()

# Display the first 5 rows of the copied DataFrame
df_copy.head(5)

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,average_temp,bike_rides_daily,value
0,C401E951D3CE9CF1,classic_bike,2022-01-01 02:13:26,2022-01-01 02:14:57,Greenwich St & Perry St,5922.04,Perry St & Bleecker St,5922.07,40.734982,-74.006973,40.735355,-74.004829,member,11.6,20895,1
1,008C583994B6DC9E,docked_bike,2022-01-01 14:05:37,2022-01-01 14:24:46,Columbus Ave & W 72 St,7175.05,5 Ave & E 78 St,7161.08,40.777058,-73.978989,40.776321,-73.964272,casual,11.6,20895,1
2,2AFE5C9C23A7251A,classic_bike,2022-01-01 10:46:52,2022-01-01 11:05:27,E 11 St & 1 Ave,5746.14,Division St & Bowery,5270.08,40.729538,-73.984268,40.714191,-73.996735,member,11.6,20895,1
3,4723CAEF032C4D35,classic_bike,2022-01-01 00:12:08,2022-01-01 00:25:18,Hicks St & Montague St,4645.09,Henry St & Degraw St,4380.08,40.695129,-73.995949,40.68475,-73.999176,casual,11.6,20895,1
4,6E12358CBFCB35D0,electric_bike,2022-01-01 14:16:58,2022-01-01 14:23:32,Central Park West & W 72 St,7141.07,Central Park W & W 91 St,7453.01,40.775795,-73.976204,40.788666,-73.966797,member,11.6,20895,1


In [9]:
# Change data type of the column "started_at" to date
df_copy['started_at'] = pd.to_datetime(df_copy['started_at']).dt.date

In [10]:
df_copy.head(5)

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,average_temp,bike_rides_daily,value
0,C401E951D3CE9CF1,classic_bike,2022-01-01,2022-01-01 02:14:57,Greenwich St & Perry St,5922.04,Perry St & Bleecker St,5922.07,40.734982,-74.006973,40.735355,-74.004829,member,11.6,20895,1
1,008C583994B6DC9E,docked_bike,2022-01-01,2022-01-01 14:24:46,Columbus Ave & W 72 St,7175.05,5 Ave & E 78 St,7161.08,40.777058,-73.978989,40.776321,-73.964272,casual,11.6,20895,1
2,2AFE5C9C23A7251A,classic_bike,2022-01-01,2022-01-01 11:05:27,E 11 St & 1 Ave,5746.14,Division St & Bowery,5270.08,40.729538,-73.984268,40.714191,-73.996735,member,11.6,20895,1
3,4723CAEF032C4D35,classic_bike,2022-01-01,2022-01-01 00:25:18,Hicks St & Montague St,4645.09,Henry St & Degraw St,4380.08,40.695129,-73.995949,40.68475,-73.999176,casual,11.6,20895,1
4,6E12358CBFCB35D0,electric_bike,2022-01-01,2022-01-01 14:23:32,Central Park West & W 72 St,7141.07,Central Park W & W 91 St,7453.01,40.775795,-73.976204,40.788666,-73.966797,member,11.6,20895,1


In [11]:
# reduce the dataset to drop extraneous columns and to save memory
df_final = df_copy.drop(columns={'ride_id','ended_at','start_station_id','end_station_id','value'})
df_final.head(5)


Unnamed: 0,rideable_type,started_at,start_station_name,end_station_name,start_lat,start_lng,end_lat,end_lng,member_casual,average_temp,bike_rides_daily
0,classic_bike,2022-01-01,Greenwich St & Perry St,Perry St & Bleecker St,40.734982,-74.006973,40.735355,-74.004829,member,11.6,20895
1,docked_bike,2022-01-01,Columbus Ave & W 72 St,5 Ave & E 78 St,40.777058,-73.978989,40.776321,-73.964272,casual,11.6,20895
2,classic_bike,2022-01-01,E 11 St & 1 Ave,Division St & Bowery,40.729538,-73.984268,40.714191,-73.996735,member,11.6,20895
3,classic_bike,2022-01-01,Hicks St & Montague St,Henry St & Degraw St,40.695129,-73.995949,40.68475,-73.999176,casual,11.6,20895
4,electric_bike,2022-01-01,Central Park West & W 72 St,Central Park W & W 91 St,40.775795,-73.976204,40.788666,-73.966797,member,11.6,20895


In [12]:
#save the final reduced file
df_final.to_csv('df_final.csv')

## Create a dual-axis chart with the number of daily bike rides and average temperature. 

In [None]:

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Adding the first trace
fig.add_trace(
    go.Scatter(
        x=df_final['started_at'],
        y=df_final['bike_rides_daily'],
        name='Daily bike rides',
         line=dict(color='blue')
    ),
    secondary_y=False
)

# Adding the second trace
fig.add_trace(
    go.Scatter(
        x=df_final['started_at'],
        y=df_final['average_temp'],
        name='Daily temperature',
        line=dict(color='red')
    ),
    secondary_y=True
)

# Setting titles for the axes
fig.update_layout(
    title_text="Daily Bike Rides and Temperature",
    xaxis_title="Date",
    yaxis_title="Bike Rides",
    yaxis2_title="Temperature"
)

# Show the plot
fig.show()


# The output below has been deleted because it expands the size the file.