In [None]:

#importing libraries 
import pandas as pd
import altair as alt 

#https://archive.ics.uci.edu/ml/datasets/Seoul+Bike+Sharing+Demand 
#reading csv file withing panda
df = pd.read_csv('/content/SeoulBikeData.csv', index_col=None, header=0, encoding='latin-1', parse_dates=[0], dayfirst = True)

#let's see how the data looks like, print first 10 rows
df.head(10)


Unnamed: 0,Date,Rented Bike Count,Hour,Temperature(°C),Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature(°C),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Seasons,Holiday,Functioning Day
0,2017-12-01,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
1,2017-12-01,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
2,2017-12-01,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,Winter,No Holiday,Yes
3,2017-12-01,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
4,2017-12-01,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,Winter,No Holiday,Yes
5,2017-12-01,100,5,-6.4,37,1.5,2000,-18.7,0.0,0.0,0.0,Winter,No Holiday,Yes
6,2017-12-01,181,6,-6.6,35,1.3,2000,-19.5,0.0,0.0,0.0,Winter,No Holiday,Yes
7,2017-12-01,460,7,-7.4,38,0.9,2000,-19.3,0.0,0.0,0.0,Winter,No Holiday,Yes
8,2017-12-01,930,8,-7.6,37,1.1,2000,-19.8,0.01,0.0,0.0,Winter,No Holiday,Yes
9,2017-12-01,490,9,-6.5,27,0.5,1928,-22.4,0.23,0.0,0.0,Winter,No Holiday,Yes


In [None]:
#let's see the size of the data
print(df.shape)


(8760, 14)


In [None]:
#since altair can only run 5000 rows, we have to reduce the number of rows
#without loosing important data
#instead of having data for each hour e.g 1 , 2 , 3 , 4....24
#we can only work with data for each two hours 1, 3, 5, 7,.....

#drop rows based on Hour Column where hour is an even number
df = df.drop(df[df['Hour']%2 == 0].index)
#let's see if our data extraction was correct
df.head(5)


Unnamed: 0,Date,Rented Bike Count,Hour,Temperature(°C),Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature(°C),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Seasons,Holiday,Functioning Day
1,2017-12-01,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
3,2017-12-01,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
5,2017-12-01,100,5,-6.4,37,1.5,2000,-18.7,0.0,0.0,0.0,Winter,No Holiday,Yes
7,2017-12-01,460,7,-7.4,38,0.9,2000,-19.3,0.0,0.0,0.0,Winter,No Holiday,Yes
9,2017-12-01,490,9,-6.5,27,0.5,1928,-22.4,0.23,0.0,0.0,Winter,No Holiday,Yes


In [None]:
dateClick = alt.selection_single(
    fields=[
        'Seasons',
    ],
    init={"Seasons": "Spring"},
)
#make Spring the default box plot

scatter_plot = alt.Chart(df).mark_point().encode(
    x= alt.X('month(Date):O'),
    #x='Date',
    y='Rented Bike Count',
    color='Seasons',
    tooltip=[
        alt.Tooltip('Seasons:N', title='Seasons'),
        alt.Tooltip('yearmonthdate(Date):O', title='Date'),
        alt.Tooltip('Rented Bike Count:Q', title='Rented Bike Count'),
    ]
).properties(
    width = 500,
    height = 500
).add_selection(
    dateClick
)





To move beyond a simple field name, we use `alt.X()` for the x encoding, and we use '`count()`' for the y encoding:


In [None]:
bar_by_seasons = alt.Chart(df).transform_filter(dateClick).mark_boxplot(extent='min-max').encode(
    x=alt.X('Rented Bike Count', bin=True),
    y='Temperature(°C)',
    color='Holiday',
    column='Seasons'
)



In [None]:
# Plot all charts together

alt.vconcat(
    scatter_plot,
    bar_by_seasons,
).resolve_scale(
    color="independent",
)

#Hover around and click on scatter plots for further filtering.
#Hover on the Box Plot for further detailed information as well. 