# Creating Dashboards with streamlit -Part#2

## 01. Import Libraries
## 02. Import Data
## 03. Dataset Final Formatting
## 04. Export Data

## 01. Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import streamlit as st
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime as dt
from streamlit_keplergl import keplergl_static

#Install the requirements **streamlit**
#conda install -c conda-forge streamlit
#pip install plotly streamlit streamlit_keplergl numerize pillow

  _set_context_ca_bundle_path(ca_bundle_path)


## 02. Import Data

In [2]:
#The maine dataframe with the derived column
df = pd.read_csv('df_final_1.csv')

In [3]:
df.head(3)

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,avgTemp,daily_bike_rides,_merge,value,date,month,season
0,B42E6C17AF007611,classic_bike,2022-01-01,2022-01-01 00:25:03,Hoboken Terminal - River St & Hudson Pl,HB102,Adams St & 2 St,HB407,40.736068,-74.029127,40.739814,-74.036904,member,11.6,592,both,1,2022-01-01,1,winter
1,D5C9BBDBC66CEBF6,classic_bike,2022-01-01,2022-01-01 13:57:13,14 St Ferry - 14 St & Shipyard Ln,HB202,Hoboken Terminal - River St & Hudson Pl,HB102,40.752961,-74.024353,40.736068,-74.029127,member,11.6,592,both,1,2022-01-01,1,winter
2,F7D69E45F52F0ECA,electric_bike,2022-01-01,2022-01-02 00:34:32,South Waterfront Walkway - Sinatra Dr & 1 St,HB103,Brunswick & 6th,JC081,40.736982,-74.027781,40.726012,-74.050389,casual,11.6,592,both,1,2022-01-01,1,winter


In [4]:
df.dtypes

ride_id                object
rideable_type          object
started_at             object
ended_at               object
start_station_name     object
start_station_id       object
end_station_name       object
end_station_id         object
start_lat             float64
start_lng             float64
end_lat               float64
end_lng               float64
member_casual          object
avgTemp               float64
daily_bike_rides        int64
_merge                 object
value                   int64
date                   object
month                   int64
season                 object
dtype: object

In [5]:
df.shape

(895485, 20)

In [6]:
#Create a month column
df['date'] = pd.to_datetime(df['started_at'], format = '%Y-%m-%d')

In [7]:
df['month'] = df['date'].dt.month

In [8]:
df['month'] = df['month'].astype('int')

In [9]:
df.head(3)

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,avgTemp,daily_bike_rides,_merge,value,date,month,season
0,B42E6C17AF007611,classic_bike,2022-01-01,2022-01-01 00:25:03,Hoboken Terminal - River St & Hudson Pl,HB102,Adams St & 2 St,HB407,40.736068,-74.029127,40.739814,-74.036904,member,11.6,592,both,1,2022-01-01,1,winter
1,D5C9BBDBC66CEBF6,classic_bike,2022-01-01,2022-01-01 13:57:13,14 St Ferry - 14 St & Shipyard Ln,HB202,Hoboken Terminal - River St & Hudson Pl,HB102,40.752961,-74.024353,40.736068,-74.029127,member,11.6,592,both,1,2022-01-01,1,winter
2,F7D69E45F52F0ECA,electric_bike,2022-01-01,2022-01-02 00:34:32,South Waterfront Walkway - Sinatra Dr & 1 St,HB103,Brunswick & 6th,JC081,40.736982,-74.027781,40.726012,-74.050389,casual,11.6,592,both,1,2022-01-01,1,winter


In [10]:
df.dtypes

ride_id                       object
rideable_type                 object
started_at                    object
ended_at                      object
start_station_name            object
start_station_id              object
end_station_name              object
end_station_id                object
start_lat                    float64
start_lng                    float64
end_lat                      float64
end_lng                      float64
member_casual                 object
avgTemp                      float64
daily_bike_rides               int64
_merge                        object
value                          int64
date                  datetime64[ns]
month                          int64
season                        object
dtype: object

In [11]:
df.shape

(895485, 20)

In [12]:
#Top 20 Stations dataframe
top20 = pd.read_csv('top20.csv', index_col = 0)

In [13]:
top20.head(3)

Unnamed: 0,start_station_name,value
37,Grove St PATH,42556
75,South Waterfront Walkway - Sinatra Dr & 1 St,34245
44,Hoboken Terminal - River St & Hudson Pl,33020


In [14]:
top20.shape

(20, 2)

## 03. Dataset Final Formatting

In [15]:
top20.columns

Index(['start_station_name', 'value'], dtype='object')

In [16]:
df.columns

Index(['ride_id', 'rideable_type', 'started_at', 'ended_at',
       'start_station_name', 'start_station_id', 'end_station_name',
       'end_station_id', 'start_lat', 'start_lng', 'end_lat', 'end_lng',
       'member_casual', 'avgTemp', 'daily_bike_rides', '_merge', 'value',
       'date', 'month', 'season'],
      dtype='object')

In [17]:
#Create a copy of the main dataframe with fewer columns ** Reduce the row and column count
df_reduced = df.drop(columns = {'ride_id', 'rideable_type', 'started_at', 'ended_at',
                                'start_station_id', 'end_station_id', 'member_casual', '_merge', 'month'})

In [18]:
df_reduced.head(3)

Unnamed: 0,start_station_name,end_station_name,start_lat,start_lng,end_lat,end_lng,avgTemp,daily_bike_rides,value,date,season
0,Hoboken Terminal - River St & Hudson Pl,Adams St & 2 St,40.736068,-74.029127,40.739814,-74.036904,11.6,592,1,2022-01-01,winter
1,14 St Ferry - 14 St & Shipyard Ln,Hoboken Terminal - River St & Hudson Pl,40.752961,-74.024353,40.736068,-74.029127,11.6,592,1,2022-01-01,winter
2,South Waterfront Walkway - Sinatra Dr & 1 St,Brunswick & 6th,40.736982,-74.027781,40.726012,-74.050389,11.6,592,1,2022-01-01,winter


In [19]:
df_reduced.shape

(895485, 11)

In [20]:
#Create a random split **set the seed to 32
np.random.seed(32) #Ensure the results reproducibility

#Create a random sample of your main dataframe **df_reduced** [to be able to work with <25MB]
rand_reduced = np.random.rand(len(df_reduced)) <= 0.92

In [21]:
rand_reduced.dtype

dtype('bool')

In [22]:
#Create a dataframe all the rows from df_reduced that were not selected by the rand_reduced condition
df_small = df_reduced[~rand_reduced]

In [23]:
df_small.head(3)

Unnamed: 0,start_station_name,end_station_name,start_lat,start_lng,end_lat,end_lng,avgTemp,daily_bike_rides,value,date,season
3,Columbus Drive,Hilltop,40.718355,-74.038914,40.731169,-74.057574,11.6,592,1,2022-01-01,winter
7,Church Sq Park - 5 St & Park Ave,Willow Ave & 12 St,40.742659,-74.032233,40.751867,-74.030377,11.6,592,1,2022-01-01,winter
25,Washington St,Newport PATH,40.724294,-74.035483,40.727224,-74.033759,11.6,592,1,2022-01-01,winter


In [24]:
df_small.shape

(71391, 11)

# 04. Export Data

In [25]:
df_small.to_csv('rand_reduced_data_for_plot.csv',index = False)

In [26]:
df_reduced.to_csv('reduced_data_to_plot.csv')