## Building dashboard with streamlit

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from datetime import datetime as dt
from streamlit_keplergl import keplergl_static

Wrangle data

In [None]:
df = pd.read_csv('new_york_data_1.csv', index_col = 0)

In [None]:
df.dtypes

In [None]:
# Create a month column 

df['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')
df['month'] = df['date'].dt.month
df['month'] = df['month'].astype('int')

In [None]:
# Create the season column

df['season'] = [
"winter" if (month == 12 or 1 <= month <= 4)
    else "spring" if (4 < month <= 5)
    else "summer" if (6 <= month <= 9)
    else "fall"
for month in df['month']
    ]

In [None]:
df.shape

In [None]:
df.columns

## Create the plotly charts

In [None]:
## Groupby

df['value'] = 1 
df_groupby_bar = df.groupby('start_station_name', as_index=False).agg({'value': 'sum'})
top20 = df_groupby_bar.nlargest(20, 'value')

In [None]:
fig = go.Figure(go.Bar(x = top20['start_station_name'], y = top20['value']))
fig.show()

In [None]:
fig = go.Figure(go.Bar(x = top20['start_station_name'], y = top20['value'], marker={'color': top20['value'],'colorscale': 'Blues'}))
fig.show()

In [None]:
## Bar chart

fig.update_layout(
    title = 'Top 20 most popular bike stations in Chicago',
    xaxis_title = 'Start stations',
    yaxis_title ='Sum of trips',
    width = 900, height = 600
)

In [None]:
# Line chart

fig = make_subplots(specs = [[{"secondary_y": True}]])

fig.add_trace(
go.Scatter(x = df['date'], y = df['bike_rides_daily'], name = 'Daily bike rides'),
secondary_y = False
)

fig.add_trace(
go.Scatter(x=df['date'], y = df['avgTemp'], name = 'Daily temperature'),
secondary_y=True
)
fig.show()

In [None]:
import gc # this is a garbage collector
gc.collect()

In [None]:
# Save the top 20 stations as a csv file 

top20.to_csv('top20.csv')

In [None]:
df.columns

Reduce the row and column count

In [None]:
# Create a copy with fewer columns

df_1 = df.drop(columns = {'ride_id', 'started_at', 'ended_at' , 'start_station_id', 'end_station_id', 'member_casual', 'merge_flag', 'month'}) 

Create a random split

In [None]:
np.random.seed(10)
red = np.random.rand(len(df_1)) <= 0.98

In [None]:
small = df_1[~red]

In [None]:
small.shape

In [None]:
small.to_csv('reduced_data_to_plot_7.csv',index = False)

In [None]:
df_1.to_csv('reduced_data_to_plot.csv')