# Creating Dashboards with streamlit

## 01. Import Libraries
## 02. Import Data
## 03. Data Wrangling/Preprocessing
## 04. Create Charts with plotly
#### 04.1 Bar Chart
#### 04.2 Dual-Axis Line Charts 
#### 04.3 The Map

## 01. Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import streamlit as st
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime as dt
from streamlit_keplergl import keplergl_static

#Install the requirements **streamlit**
#conda install -c conda-forge streamlit
#pip install plotly streamlit streamlit_keplergl numerize pillow

  _set_context_ca_bundle_path(ca_bundle_path)


In [2]:
## 02. Import Data

In [3]:
df = pd.read_csv('SeabornViz.csv')

In [4]:
df.head(3)

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,avgTemp,daily_bike_rides,_merge,value
0,B42E6C17AF007611,classic_bike,2022-01-01,2022-01-01 00:25:03,Hoboken Terminal - River St & Hudson Pl,HB102,Adams St & 2 St,HB407,40.736068,-74.029127,40.739814,-74.036904,member,11.6,592,both,1
1,D5C9BBDBC66CEBF6,classic_bike,2022-01-01,2022-01-01 13:57:13,14 St Ferry - 14 St & Shipyard Ln,HB202,Hoboken Terminal - River St & Hudson Pl,HB102,40.752961,-74.024353,40.736068,-74.029127,member,11.6,592,both,1
2,F7D69E45F52F0ECA,electric_bike,2022-01-01,2022-01-02 00:34:32,South Waterfront Walkway - Sinatra Dr & 1 St,HB103,Brunswick & 6th,JC081,40.736982,-74.027781,40.726012,-74.050389,casual,11.6,592,both,1


In [5]:
df.dtypes

ride_id                object
rideable_type          object
started_at             object
ended_at               object
start_station_name     object
start_station_id       object
end_station_name       object
end_station_id         object
start_lat             float64
start_lng             float64
end_lat               float64
end_lng               float64
member_casual          object
avgTemp               float64
daily_bike_rides        int64
_merge                 object
value                   int64
dtype: object

## 03. Data Wrangling/Preprocessing

In [6]:
#Create a month column
df['date'] = pd.to_datetime(df['started_at'], format = '%Y-%m-%d')

In [7]:
df['month'] = df['date'].dt.month

In [8]:
df['month'] = df['month'].astype('int')

In [9]:
df.head(3)

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,avgTemp,daily_bike_rides,_merge,value,date,month
0,B42E6C17AF007611,classic_bike,2022-01-01,2022-01-01 00:25:03,Hoboken Terminal - River St & Hudson Pl,HB102,Adams St & 2 St,HB407,40.736068,-74.029127,40.739814,-74.036904,member,11.6,592,both,1,2022-01-01,1
1,D5C9BBDBC66CEBF6,classic_bike,2022-01-01,2022-01-01 13:57:13,14 St Ferry - 14 St & Shipyard Ln,HB202,Hoboken Terminal - River St & Hudson Pl,HB102,40.752961,-74.024353,40.736068,-74.029127,member,11.6,592,both,1,2022-01-01,1
2,F7D69E45F52F0ECA,electric_bike,2022-01-01,2022-01-02 00:34:32,South Waterfront Walkway - Sinatra Dr & 1 St,HB103,Brunswick & 6th,JC081,40.736982,-74.027781,40.726012,-74.050389,casual,11.6,592,both,1,2022-01-01,1


In [10]:
df.dtypes

ride_id                       object
rideable_type                 object
started_at                    object
ended_at                      object
start_station_name            object
start_station_id              object
end_station_name              object
end_station_id                object
start_lat                    float64
start_lng                    float64
end_lat                      float64
end_lng                      float64
member_casual                 object
avgTemp                      float64
daily_bike_rides               int64
_merge                        object
value                          int64
date                  datetime64[ns]
month                          int64
dtype: object

In [11]:
#Deriving variable - Create the season column
df['season'] = [
"winter" if (month == 12 or 1 <= month <= 3)
    else "spring" if (4 < month <= 5)
    else "summer" if (6 <= month <= 8)
    else "fall"
for month in df['month']
    ]

In [12]:
df.head(3)

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,avgTemp,daily_bike_rides,_merge,value,date,month,season
0,B42E6C17AF007611,classic_bike,2022-01-01,2022-01-01 00:25:03,Hoboken Terminal - River St & Hudson Pl,HB102,Adams St & 2 St,HB407,40.736068,-74.029127,40.739814,-74.036904,member,11.6,592,both,1,2022-01-01,1,winter
1,D5C9BBDBC66CEBF6,classic_bike,2022-01-01,2022-01-01 13:57:13,14 St Ferry - 14 St & Shipyard Ln,HB202,Hoboken Terminal - River St & Hudson Pl,HB102,40.752961,-74.024353,40.736068,-74.029127,member,11.6,592,both,1,2022-01-01,1,winter
2,F7D69E45F52F0ECA,electric_bike,2022-01-01,2022-01-02 00:34:32,South Waterfront Walkway - Sinatra Dr & 1 St,HB103,Brunswick & 6th,JC081,40.736982,-74.027781,40.726012,-74.050389,casual,11.6,592,both,1,2022-01-01,1,winter


In [13]:
df.shape

(895485, 20)

In [14]:
df.columns

Index(['ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'avgTemp', 'daily_bike_rides', '_merge', 'value',
       'date', 'month', 'season'],
      dtype='object')

## 04. Create Charts with plotly

## 04.1 Bar Chart

In [15]:
#Group the data by the **'start_station_name'** and calculate the frequency/count
df_groupby_bar = df.groupby('start_station_name', as_index=False).agg({'value' : 'sum'})

In [16]:
df_groupby_bar.head(3)

Unnamed: 0,start_station_name,value
0,11 St & Washington St,15502
1,12 St & Sinatra Dr N,17137
2,14 St Ferry - 14 St & Shipyard Ln,17066


In [17]:
#Select the top 20 most popular stations
top_20 = df_groupby_bar.nlargest(20, 'value')

In [18]:
top_20.shape

(20, 2)

In [19]:
#Create the Bar chart plot and save it in a fig object
fig = go.Figure(go.Bar(x = top_20['start_station_name'], y = top_20['value']))

In [None]:
fig

In [None]:
#Assign go.Bar() to the “fig” object and apply settings customization
fig = go.Figure(go.Bar(x = top_20['start_station_name'], y = top_20['value'], marker={'color': top_20['value'],'colorscale': 'Blues'}))
fig.show()

In [84]:
##Bar chart updated [title, labels, wideness] **directly access and change the settings
fig.update_layout(
    title = 'Top 20 most popular bike stations in New York',
    xaxis_title = 'Start stations',
    yaxis_title ='Sum of trips',
    width = 900, height = 600
)

#Streamlit displays the plotly chart
st.plotly_chart(fig, use_container_width = True)



DeltaGenerator()

In [85]:
#Export the top 20 stations
top_20.to_csv('top20.csv')

## 04.2 Dual-Axis Line Chart

In [20]:
#Create a "fig" object for the make_subplots function
fig2 = make_subplots(specs = [[{"secondary_y": True}]]) #there will be a dual axis

In [21]:
#fig
df.columns

Index(['ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'avgTemp', 'daily_bike_rides', '_merge', 'value',
       'date', 'month', 'season'],
      dtype='object')

In [22]:
df.head(3)

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,avgTemp,daily_bike_rides,_merge,value,date,month,season
0,B42E6C17AF007611,classic_bike,2022-01-01,2022-01-01 00:25:03,Hoboken Terminal - River St & Hudson Pl,HB102,Adams St & 2 St,HB407,40.736068,-74.029127,40.739814,-74.036904,member,11.6,592,both,1,2022-01-01,1,winter
1,D5C9BBDBC66CEBF6,classic_bike,2022-01-01,2022-01-01 13:57:13,14 St Ferry - 14 St & Shipyard Ln,HB202,Hoboken Terminal - River St & Hudson Pl,HB102,40.752961,-74.024353,40.736068,-74.029127,member,11.6,592,both,1,2022-01-01,1,winter
2,F7D69E45F52F0ECA,electric_bike,2022-01-01,2022-01-02 00:34:32,South Waterfront Walkway - Sinatra Dr & 1 St,HB103,Brunswick & 6th,JC081,40.736982,-74.027781,40.726012,-74.050389,casual,11.6,592,both,1,2022-01-01,1,winter


In [None]:
#Create the line chart plot and add to the “fig” object - #two traces >> two lines
fig2.add_trace(
    go.Scatter(x = df['date'], y = df['daily_bike_rides'], name = 'Daily bike rides'), 
    secondary_y = False #primary axis
)

fig2.add_trace(
    go.Scatter(x=df['date'], y = df['avgTemp'], name = 'Daily temperature'), 
    secondary_y=True #secondary axis
)

In [None]:
#Assign go.Scatter(() to the “fig” object and apply settings customization **Colors**
fig2.add_trace(
    go.Scatter(x = df['date'], y = df['daily_bike_rides'], name = 'Daily bike rides', 
               marker={'color': df['daily_bike_rides'],'color': 'blue'}),
    secondary_y = False)

fig2.add_trace(
    go.Scatter(x=df['date'], y = df['avgTemp'], name = 'Daily temperature', 
               marker={'color': df['avgTemp'],'color': 'red'}),
    secondary_y=True)

In [None]:
#Line chart updated [title, labels, wideness] **directly access and change the settings
fig2.update_layout(
    title = 'Daily bike rides and temperature in 2022 - New York CitiBike',
    xaxis_title = '2022',
    width = 900, height = 600
)

st.plotly_chart(fig2, use_container_width=True)

In [24]:
#Release some unused memory
import gc 
gc.collect()

288

## 04.3 The Map

In [98]:
#Assign the file to the file
path_to_html = "NewYork_CitiBike_Trips_Aggregated.html"

#Read file and keep in variable 
with open(path_to_html, 'r') as f:
    html_data = f.read()

#Add header
st.header("NewYork_CitiBike_Trips_Aggregated")

#Render it on the dashboard *Show in web page 
st.components.v1.html(html_data,height = 1000)



DeltaGenerator()