In [None]:
"""Name:       Kyle Wolff
Library:    Altair
URL:        
Description: This library allows users to create clear, interactive visualizations in Python.
It integrates seamlessly with Pandas DataFrames and is great for quick, clean data exploration."""

In [None]:
# Install Altair 
!pip install altair

In [3]:
import pandas as pd
import altair as alt

In [5]:
# Load the dataset
df = pd.read_csv("C:/Users/kylew/OneDrive - Babson College/Desktop/OIM7502_SP25/data/Bikeshare.csv")

In [7]:
#Display column headers
print(df.head())

   Unnamed: 0  season mnth  day  hr  holiday  weekday  workingday weathersit  \
0           1       1  Jan    1   0        0        6           0      clear   
1           2       1  Jan    1   1        0        6           0      clear   
2           3       1  Jan    1   2        0        6           0      clear   
3           4       1  Jan    1   3        0        6           0      clear   
4           5       1  Jan    1   4        0        6           0      clear   

   temp   atemp   hum  windspeed  casual  registered  bikers  
0  0.24  0.2879  0.81        0.0       3          13      16  
1  0.22  0.2727  0.80        0.0       8          32      40  
2  0.22  0.2727  0.80        0.0       5          27      32  
3  0.24  0.2879  0.75        0.0       3          10      13  
4  0.24  0.2879  0.75        0.0       0           1       1  


In [9]:
# Disable row limit
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [11]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [13]:
#Line chart of total bikers by hour
#This chart shows how bike usage varies across different hours of the day
alt.Chart(df).mark_line().encode(
    x='hr:O',              # x-axis: hour 
    y='bikers:Q'           # y-axis: number of bikers 
).properties(
    title='Total Bikers by Hour'
)

In [34]:
#Bar chart: Average bikers by day of the month

#Group the data by day and calculate the mean
df_avg_day = df.groupby('day', as_index=False)['bikers'].mean()

# Create bar chart
alt.Chart(df_avg_day).mark_bar().encode(
    x='day:O',             # x-axis: day of month
    y='bikers:Q'           # y-axis: average bikers
).properties(
    title='Average Bikers by Day of Month'
)

In [15]:
# Heatmap: Hour vs. Weather

# Group the data by hour and weather, then average the number of bikers
heat_df = df.groupby(['hr', 'weathersit'], as_index=False)['bikers'].mean()

# Create heatmap
alt.Chart(heat_df).mark_rect().encode(
    x='hr:O',              # x-axis: hour
    y='weathersit:N',      # y-axis: weather condition
    color='bikers:Q'       # color intensity based on average bikers
).properties(
    title='Average Bikers by Hour and Weather'
)

In [17]:
# Average bikers by temperature, separated by weather type

df_temp_weather = df.groupby(['temp', 'weathersit'], as_index=False)['bikers'].mean()

alt.Chart(df_temp_weather).mark_line().encode(
    x='temp:Q',            # x-axis: temperature
    y='bikers:Q',          # y-axis: average bikers
    color='weathersit:N',  # line color based on weather condition
    tooltip=['temp', 'bikers', 'weathersit']  # tooltip shows details on hover
).properties(
    title='Average Bikers by Temperature and Weather',
).interactive()  # Enable zoom and pan interactivity

In [31]:
# Line charts for each season

df_copy = df.copy()
df_copy['season'] = df_copy['season'].map({1: 'Winter', 2: 'Spring', 3: 'Summer', 4: 'Fall'})

#Group data by hour and season, then calculate average bikers
df_hour_season = df_copy.groupby(['hr', 'season'], as_index=False)['bikers'].mean()

#Create line chart
alt.Chart(df_hour_season).mark_line().encode(
    x='hr:O',              # x-axis: hour
    y='bikers:Q',          # y-axis: average bikers
    color='season:N',      # color based on season
    tooltip=['hr', 'bikers', 'season']
).properties(
).facet(
    column='season:N'      # Create separate chart for each season
).properties(
    title='Average Bikers by Hour Across Seasons'
)

In [25]:
# Interactive stacked bar chart that lets the user highlight either registered or casual riders
df_stacked = df.groupby('hr', as_index=False)[['registered', 'casual']].mean()

#Melt the dataframe into long format for Altair compatibility
df_melt = df_stacked.melt(id_vars='hr',
                          value_vars=['registered', 'casual'],
                          var_name='User Type',
                          value_name='Count')

#Create a selection object that links to the legend
highlight = alt.selection_single(fields=['User Type'], bind='legend')

#Create the bar chart with interactive highlights
alt.Chart(df_melt).mark_bar().encode(
    x='hr:O',
    y='Count:Q',
    color='User Type:N',
    opacity=alt.condition(highlight, alt.value(1), alt.value(0.2)),  # Highlight selected group
    tooltip=['hr', 'User Type', 'Count']
).add_selection(
    highlight
).properties(
    title='Highlight Casual or Registered Riders by Hour',
)



In [29]:
# Box plot of bikers by weather condition
alt.Chart(df).mark_boxplot().encode(
    x=alt.X('weathersit:N', title='Weather Condition'),
    y=alt.Y('bikers:Q', title='Biker Count'),
    color=alt.Color('weathersit:N', legend=None),
    tooltip=['weathersit', 'bikers']
).properties(
    title='Distribution of Biker Counts by Weather',
)