# Building dashboards with streamlit

In [44]:
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from datetime import datetime as dt
from streamlit_keplergl import keplergl_static

### Wrangle data

In [46]:
df = pd.read_csv(r'/Users/yasersouri/Desktop/newyork_Data/NewYork_CitiBike_data2.csv', index_col = 0)

In [47]:
df.dtypes

ride_id                object
rideable_type          object
started_at             object
ended_at               object
start_station_name     object
start_station_id       object
end_station_name       object
end_station_id         object
start_lat             float64
start_lng             float64
end_lat               float64
end_lng               float64
member_casual          object
date                   object
avgTemp               float64
bike_rides_daily        int64
_merge                 object
tripduration          float64
dtype: object

In [50]:
# Create a month column 

df['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')
df['month'] = df['date'].dt.month
df['month'] = df['month'].astype('int')

In [52]:
# Create the season column

df['season'] = [
"winter" if (month == 12 or 1 <= month <= 4)
    else "spring" if (4 < month <= 5)
    else "summer" if (6 <= month <= 9)
    else "fall"
for month in df['month']
    ]

In [15]:
df.shape

(895485, 20)

In [17]:
df.columns

Index(['ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'date', 'avgTemp', 'bike_rides_daily', '_merge',
       'tripduration', 'month', 'season'],
      dtype='object')

In [54]:
df.rename(columns = {'started_at':'start_time','ended_at' : 'end_time'}, inplace = True)

## Create the plotly charts

In [56]:
## Groupby

df['value'] = 1 
df_groupby_bar = df.groupby('start_station_name', as_index=False).agg({'value': 'sum'})
top20 = df_groupby_bar.nlargest(20, 'value')

In [None]:
fig = go.Figure(go.Bar(x = top20['start_station_name'], y = top20['value'], marker={'color': top20['value'],'colorscale': 'Blues'}))
fig.show()

In [None]:
## Bar chart

fig.update_layout(
    title = 'Top 20 most popular bike stations in Chicago',
    xaxis_title = 'Start stations',
    yaxis_title ='Sum of trips',
    width = 900, height = 600
)

### Dual-Axis Line Charts in Plotly

In [None]:
# Line chart
# Drop missing values
df = df.dropna(subset=['date', 'bike_rides_daily', 'avgTemp'])

# Sort by date
df = df.sort_values('date')

fig2 = make_subplots(specs=[[{"secondary_y": True}]])

fig2.add_trace(
    go.Scatter(x=df['date'], y=df['bike_rides_daily'], mode='lines', name='Daily bike rides', line=dict(color='blue')),
    secondary_y=False
)

fig2.add_trace(
    go.Scatter(x=df['date'], y=df['avgTemp'], mode='lines', name='Daily temperature', line=dict(color='red')),
    secondary_y=True
)

fig2.update_layout(
    title='Daily Bike Rides and Temperature Over Time',
    xaxis_title='Date',
    yaxis_title='Daily Bike Rides',
    legend=dict(x=0.01, y=0.99)
)

fig2.update_yaxes(title_text="Daily bike rides", secondary_y=False)
fig2.update_yaxes(title_text="Temperature (°C)", secondary_y=True)

fig2.show()

In [59]:
import gc # this is a garbage collector
gc.collect()

422

In [60]:
# Save the top 20 stations as a csv file 

top20.to_csv(r'/Users/yasersouri/Desktop/newyork_Data/top20.csv')

In [61]:
df.columns

Index(['ride_id', 'rideable_type', 'start_time', 'end_time',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'date', 'avgTemp', 'bike_rides_daily', '_merge',
       'tripduration', 'month', 'season', 'value'],
      dtype='object')

### Reduce the row and column count

In [71]:
# Create a copy with fewer columns

df_1 = df.drop(columns = {'ride_id', 'start_time', 'end_time',
       'tripduration', 'start_station_id', 'end_station_id', 'member_casual','month','_merge'}) 

In [73]:
df_1.columns

Index(['rideable_type', 'start_station_name', 'end_station_name', 'start_lat',
       'start_lng', 'end_lat', 'end_lng', 'date', 'avgTemp',
       'bike_rides_daily', 'season', 'value'],
      dtype='object')

##### Create a random split

In [75]:
np.random.seed(32)
red = np.random.rand(len(df_1)) <= 0.92

In [80]:
small = df_1[~red]

In [82]:
small.shape

(71391, 12)

In [84]:
small.to_csv(r'/Users/yasersouri/Desktop/newyork_Data/reduced_data_to_plot_7.csv',index = False)

In [85]:
df_1.to_csv(r'/Users/yasersouri/Desktop/newyork_Data/reduced_data_to_plot.csv')