### 2.6 Creating Dashboards with Python

#### Import Libraries & Data

In [1]:
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from datetime import datetime as dt
from streamlit_keplergl import keplergl_static

In [2]:
df = pd.read_csv('NY_data_sample.csv', index_col = 0)

#### Wrangle Data

In [3]:
df.dtypes

ride_id                object
rideable_type          object
started_at             object
ended_at               object
start_station_name     object
end_station_name       object
start_lat             float64
start_lng             float64
end_lat               float64
end_lng               float64
member_casual          object
date                   object
avgTemp               float64
value                   int64
bike_rides_daily        int64
trip_duration         float64
dtype: object

In [4]:
# Create a month column 

df['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')
df['month'] = df['date'].dt.month
df['month'] = df['month'].astype('int')

In [5]:
# Create the season column

df['season'] = [
"winter" if (month == 12 or 1 <= month <= 4)
    else "spring" if (4 < month <= 5)
    else "summer" if (6 <= month <= 9)
    else "fall"
for month in df['month']
    ]

In [6]:
df.shape

(298382, 18)

In [7]:
df.columns

Index(['ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'end_station_name', 'start_lat', 'start_lng',
       'end_lat', 'end_lng', 'member_casual', 'date', 'avgTemp', 'value',
       'bike_rides_daily', 'trip_duration', 'month', 'season'],
      dtype='object')

#### Use plotly to produce a bar chart for the most popular stations in New York.

In [8]:
## Groupby

df['value'] = 1 
df_groupby_bar = df.groupby('start_station_name', as_index=False).agg({'value': 'sum'})
top20 = df_groupby_bar.nlargest(20, 'value')

In [None]:
fig = go.Figure(go.Bar(x = top20['start_station_name'], y = top20['value']))
fig.show()

In [None]:
fig = go.Figure(go.Bar(x = top20['start_station_name'], y = top20['value'], marker={'color': top20['value'],'colorscale': 'Blues'}))
fig.show()

In [None]:
## Bar chart

fig.update_layout(
    title = 'Top 20 most popular bike stations in New York',
    xaxis_title = 'Start stations',
    yaxis_title ='Sum of trips',
    width = 1200, height = 700
)

In [17]:
# Save the top 20 stations as a csv file 

top20.to_csv('top20.csv')

#### Create a dual-axis line chart for the aggregated bike trips and temperatures in plotly.

In [None]:
# Convert 'date' column to datetime
df['date'] = pd.to_datetime(df['date'])

# Create a sample data
df_sample = df.resample('1D', on='date').mean().reset_index()

# Line chart with dual y-axes
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x=df_sample['date'], y=df_sample['bike_rides_daily'], name='Daily Bike Rides',
               line=dict(color='blue', width=2), opacity=0.7),
    secondary_y=False
)

fig.add_trace(
    go.Scatter(x=df_sample['date'], y=df_sample['avgTemp'], name='Daily Temperatures',
               line=dict(color='red', width=2), opacity=0.7),
    secondary_y=True
)

# Add figure title
fig.update_layout(
    title_text="Daily Bike Rides and Temperature"
)

# Set x-axis title
fig.update_xaxes(title_text="Date", showgrid=True)

# Set y-axes titles
fig.update_yaxes(title_text="Bike Rides", secondary_y=False, showgrid=True)
fig.update_yaxes(title_text="Temperature (°C)", secondary_y=True)

fig.show()



In [28]:
df.columns

Index(['ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'end_station_name', 'start_lat', 'start_lng',
       'end_lat', 'end_lng', 'member_casual', 'date', 'avgTemp', 'value',
       'bike_rides_daily', 'trip_duration', 'month', 'season'],
      dtype='object')

#### Drop columns not needed for dashboard

In [32]:
# Create a copy with fewer columns

df_1 = df.drop(columns = {'ride_id', 'started_at', 'ended_at', 'start_lat',
       'end_lat', 'start_lng', 'end_lng', 'trip_duration', 'member_casual'}) 

In [33]:
df_1.columns

Index(['rideable_type', 'start_station_name', 'end_station_name', 'date',
       'avgTemp', 'value', 'bike_rides_daily', 'month', 'season'],
      dtype='object')

In [34]:
df_1.to_csv('reduced_data_to_plot.csv')