In [80]:
import numpy as np
import pandas as pd
import plotly.express as px

# Avoid truncating DataFrame previews
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Read cleaned data sets
bike_data = pd.read_csv('data/target/cleaned_bike_data.csv')
weather_data = pd.read_csv('data/target/cleaned_weather_data.csv')

# Check record length for both before manipulating
original_len = len(bike_data), len(weather_data)

In [81]:
# Cast strings (from CSV) back to datetime 
bike_data['start_time'] = pd.to_datetime(bike_data['start_time'])
bike_data['end_time'] = pd.to_datetime(bike_data['end_time'])
weather_data['start_time'] = pd.to_datetime(weather_data['start_time'])
weather_data['end_time'] = pd.to_datetime(weather_data['end_time'])

# Check types
print(bike_data.dtypes, weather_data.dtypes)

trip_id                      object
start_time           datetime64[ns]
end_time             datetime64[ns]
trip_duration                 int64
from_station_id               int64
from_station_name            object
to_station_id                 int64
to_station_name              object
user_type                    object
birth_year                    int64
name                         object
email                        object
dtype: object Type                  object
Severity              object
start_time    datetime64[ns]
end_time      datetime64[ns]
dtype: object


In [82]:
# Create a boolean mask to filter bike rentals that started during a weather event
start_mask = bike_data['start_time'].apply(lambda x: any(
    (x >= weather_data['start_time']) & (x <= weather_data['end_time'])
))

# Filter the bike_rentals DataFrame using the mask
rentals_started_during_bad_weather = bike_data[start_mask]
rentals_started_during_good_weather = bike_data[-start_mask]

# Get the number of rows
weather_len = rentals_started_during_bad_weather.shape[0]
non_weather_len = rentals_started_during_good_weather.shape[0]

print(f"{weather_len} rentals started during wet weather; {non_weather_len} started during dry weather")

rentals_started_during_bad_weather.head(20)

3410 rentals started during wet weather; 36590 started during dry weather


Unnamed: 0,trip_id,start_time,end_time,trip_duration,from_station_id,from_station_name,to_station_id,to_station_name,user_type,birth_year,name,email
0,22014544,2022-03-08 04:19:43,2022-03-08 04:26:42,6,456,2112 W Peterson Ave,458,Broadway & Thorndale Ave,Subscriber,1992,Frances Clayton,griddled@curtis.com
6,25953758,2022-12-29 04:45:24,2022-12-29 04:49:14,3,456,2112 W Peterson Ave,455,Maplewood Ave & Peterson Ave,Subscriber,1998,Tricia Hayes,tricia_hayes@flores-mooney.com
28,34B5E119EB5F0BB4,2023-09-09 05:26:00,2023-09-09 05:38:00,12,101,63rd St Beach,247,Shore Dr & 55th St,Subscriber,2001,Andrew Petersen,andrew_petersen@yahoo.com
36,21969317,2022-02-26 10:01:31,2022-02-26 10:06:57,5,109,900 W Harrison St,320,Loomis St & Lexington St,Subscriber,1990,Billy Hall,billy_hall@howard.info
37,22043134,2022-03-13 06:02:09,2022-03-13 06:05:32,3,109,900 W Harrison St,107,Desplaines St & Jackson Blvd,Subscriber,1982,Sylvia Davis,sylviadavis@gmail.com
39,21850668,2022-01-23 09:36:43,2022-01-23 09:43:32,6,109,900 W Harrison St,73,Jefferson St & Monroe St,Subscriber,1994,Megan Walker,meganwalker@colon.org
41,21989442,2022-03-02 06:50:17,2022-03-02 06:56:15,5,109,900 W Harrison St,282,Halsted St & Maxwell St,Subscriber,1995,Madison Bridges,madison.bridges@yahoo.com
42,22092740,2022-03-20 04:21:42,2022-03-20 04:31:06,9,109,900 W Harrison St,346,Ada St & Washington Blvd,Subscriber,1985,Brian Baker,bakerbrian@yahoo.com
66,25607003,2022-10-30 11:34:05,2022-10-30 12:12:19,38,109,900 W Harrison St,258,Logan Blvd & Elston Ave,Subscriber,1980,Lori Hoover,lori_hoover@holland.com
70,25790286,2022-11-27 05:42:01,2022-11-27 05:45:30,3,109,900 W Harrison St,107,Desplaines St & Jackson Blvd,Subscriber,1971,Edward Lee,edward_lee@yahoo.com


In [83]:
# Create a boolean mask to filter bike rentals that ended during a weather event
end_mask = bike_data['end_time'].apply(lambda x: any(
    (x >= weather_data['start_time']) & (x <= weather_data['end_time'])
))

# Filter the bike_rentals DataFrame using the mask
rentals_ended_during_bad_weather = bike_data[end_mask]
rentals_ended_during_good_weather = bike_data[-end_mask]

# Get the number of rows
weather_len = rentals_ended_during_bad_weather.shape[0]
non_weather_len = rentals_ended_during_good_weather.shape[0]

print(f"{weather_len} rentals ended during wet weather; {non_weather_len} ended during dry weather")

rentals_ended_during_bad_weather.head(20)

3336 rentals ended during wet weather; 36664 ended during dry weather


Unnamed: 0,trip_id,start_time,end_time,trip_duration,from_station_id,from_station_name,to_station_id,to_station_name,user_type,birth_year,name,email
0,22014544,2022-03-08 04:19:43,2022-03-08 04:26:42,6,456,2112 W Peterson Ave,458,Broadway & Thorndale Ave,Subscriber,1992,Frances Clayton,griddled@curtis.com
6,25953758,2022-12-29 04:45:24,2022-12-29 04:49:14,3,456,2112 W Peterson Ave,455,Maplewood Ave & Peterson Ave,Subscriber,1998,Tricia Hayes,tricia_hayes@flores-mooney.com
28,34B5E119EB5F0BB4,2023-09-09 05:26:00,2023-09-09 05:38:00,12,101,63rd St Beach,247,Shore Dr & 55th St,Subscriber,2001,Andrew Petersen,andrew_petersen@yahoo.com
36,21969317,2022-02-26 10:01:31,2022-02-26 10:06:57,5,109,900 W Harrison St,320,Loomis St & Lexington St,Subscriber,1990,Billy Hall,billy_hall@howard.info
37,22043134,2022-03-13 06:02:09,2022-03-13 06:05:32,3,109,900 W Harrison St,107,Desplaines St & Jackson Blvd,Subscriber,1982,Sylvia Davis,sylviadavis@gmail.com
39,21850668,2022-01-23 09:36:43,2022-01-23 09:43:32,6,109,900 W Harrison St,73,Jefferson St & Monroe St,Subscriber,1994,Megan Walker,meganwalker@colon.org
41,21989442,2022-03-02 06:50:17,2022-03-02 06:56:15,5,109,900 W Harrison St,282,Halsted St & Maxwell St,Subscriber,1995,Madison Bridges,madison.bridges@yahoo.com
42,22092740,2022-03-20 04:21:42,2022-03-20 04:31:06,9,109,900 W Harrison St,346,Ada St & Washington Blvd,Subscriber,1985,Brian Baker,bakerbrian@yahoo.com
66,25607003,2022-10-30 11:34:05,2022-10-30 12:12:19,38,109,900 W Harrison St,258,Logan Blvd & Elston Ave,Subscriber,1980,Lori Hoover,lori_hoover@holland.com
70,25790286,2022-11-27 05:42:01,2022-11-27 05:45:30,3,109,900 W Harrison St,107,Desplaines St & Jackson Blvd,Subscriber,1971,Edward Lee,edward_lee@yahoo.com


In [84]:
# Create a boolean mask to filter bike rentals that ended during a weather event but had not started in one
end_weather_bad_start_weather_good_mask = end_mask & ~start_mask

# Filter the bike_rentals DataFrame using the combined mask
rentals_ended_in_bad_weather_having_started_in_good = bike_data[end_weather_bad_start_weather_good_mask]

# Get the number of rows
rained_off_rentals = rentals_ended_in_bad_weather_having_started_in_good.shape[0]

print(f"{rained_off_rentals} rentals ended during wet weather, having started in dry weather")

rentals_ended_in_bad_weather_having_started_in_good.head(50)

250 rentals ended during wet weather, having started in dry weather


Unnamed: 0,trip_id,start_time,end_time,trip_duration,from_station_id,from_station_name,to_station_id,to_station_name,user_type,birth_year,name,email
329,5D303CF64569A309,2023-11-26 08:17:00,2023-11-26 08:48:00,31,80,Aberdeen St & Monroe St,508,Central Park Ave & North Ave,Subscriber,1980,Cheyenne Edwards,edwardscheyenne@hotmail.com
548,22569560,2022-05-07 07:18:44,2022-05-07 07:40:25,21,341,Adler Planetarium,97,Field Museum,Customer,0,Unknown,Unknown
575,24438839,2022-08-17 11:12:12,2022-08-17 11:41:05,28,341,Adler Planetarium,341,Adler Planetarium,Customer,0,Unknown,Unknown
576,24035112,2022-07-29 04:47:01,2022-07-29 09:44:21,297,341,Adler Planetarium,85,Michigan Ave & Oak St,Customer,0,Unknown,Unknown
637,969D9C85929CF987,2023-10-16 00:02:00,2023-10-16 00:21:00,19,341,Adler Planetarium,38,Clark St & Lake St,Subscriber,1992,Clarence Berg,clarenceberg@hotmail.com
652,22041753,2022-03-13 02:05:12,2022-03-13 02:21:18,16,511,Albany Ave & Bloomingdale Ave,375,Sacramento Blvd & Franklin Blvd,Subscriber,1999,Raymond Burton,burtonraymond@simpson.com
793,07C56C3B442F4AE1,2023-07-12 12:03:00,2023-07-12 12:29:00,26,368,Ashland Ave & Archer Ave,208,Laflin St & Cullerton St,Subscriber,1992,Holly Terry,hollyterry@yahoo.com
892,25747721,2022-11-21 02:17:48,2022-11-21 02:28:33,10,333,Ashland Ave & Blackhawk St,364,Larrabee St & Oak St,Subscriber,1995,Kyle Taylor,kyle_taylor@warren.biz
1104,3F1008C25D33F868,2023-01-28 00:09:00,2023-01-28 00:41:00,32,210,Ashland Ave & Division St,59,Wabash Ave & Roosevelt Rd,Subscriber,1973,Paula Castro,paulacastro@richardson-howard.com
1298,23880947,2022-07-21 13:10:24,2022-07-21 13:26:13,15,119,Ashland Ave & Lake St,134,Peoria St & Jackson Blvd,Subscriber,1990,Brandon Flynn,brandon_flynn@holmes.com


In [85]:
# Check how many of the rentals that ended in bad weather also started in bad weather
ended_in_weather_started_in_weather_mask = end_mask & start_mask

# Filter the bike_rentals DataFrame using the combined mask
bike_rentals_ended_during_weather_having_started = bike_data[ended_in_weather_started_in_weather_mask]

# Get the number of rows
wet_rentals = bike_rentals_ended_during_weather_having_started.shape[0]

print(f"{wet_rentals} rentals ended during wet weather, having started in wet weather")

bike_rentals_ended_during_weather_having_started.head(50)

3086 rentals ended during wet weather, having started in wet weather


Unnamed: 0,trip_id,start_time,end_time,trip_duration,from_station_id,from_station_name,to_station_id,to_station_name,user_type,birth_year,name,email
0,22014544,2022-03-08 04:19:43,2022-03-08 04:26:42,6,456,2112 W Peterson Ave,458,Broadway & Thorndale Ave,Subscriber,1992,Frances Clayton,griddled@curtis.com
6,25953758,2022-12-29 04:45:24,2022-12-29 04:49:14,3,456,2112 W Peterson Ave,455,Maplewood Ave & Peterson Ave,Subscriber,1998,Tricia Hayes,tricia_hayes@flores-mooney.com
28,34B5E119EB5F0BB4,2023-09-09 05:26:00,2023-09-09 05:38:00,12,101,63rd St Beach,247,Shore Dr & 55th St,Subscriber,2001,Andrew Petersen,andrew_petersen@yahoo.com
36,21969317,2022-02-26 10:01:31,2022-02-26 10:06:57,5,109,900 W Harrison St,320,Loomis St & Lexington St,Subscriber,1990,Billy Hall,billy_hall@howard.info
37,22043134,2022-03-13 06:02:09,2022-03-13 06:05:32,3,109,900 W Harrison St,107,Desplaines St & Jackson Blvd,Subscriber,1982,Sylvia Davis,sylviadavis@gmail.com
39,21850668,2022-01-23 09:36:43,2022-01-23 09:43:32,6,109,900 W Harrison St,73,Jefferson St & Monroe St,Subscriber,1994,Megan Walker,meganwalker@colon.org
41,21989442,2022-03-02 06:50:17,2022-03-02 06:56:15,5,109,900 W Harrison St,282,Halsted St & Maxwell St,Subscriber,1995,Madison Bridges,madison.bridges@yahoo.com
42,22092740,2022-03-20 04:21:42,2022-03-20 04:31:06,9,109,900 W Harrison St,346,Ada St & Washington Blvd,Subscriber,1985,Brian Baker,bakerbrian@yahoo.com
66,25607003,2022-10-30 11:34:05,2022-10-30 12:12:19,38,109,900 W Harrison St,258,Logan Blvd & Elston Ave,Subscriber,1980,Lori Hoover,lori_hoover@holland.com
70,25790286,2022-11-27 05:42:01,2022-11-27 05:45:30,3,109,900 W Harrison St,107,Desplaines St & Jackson Blvd,Subscriber,1971,Edward Lee,edward_lee@yahoo.com


In [86]:
# Group the data by generation

# Define the generation categories
generations = {
    'Baby Boomer Generation': (1946, 1964),
    'Generation X': (1965, 1979),
    'Millennials': (1980, 1994),
    'Generation Z': (1995, 2012)
}

# Remove non-subscribers
subscriber_bike_data = bike_data[bike_data['user_type'] == 'Subscriber'].copy()

# Create a new column 'generation' based on birth_year
def get_generation(birth_year):
    for gen, years in generations.items():
        if years[0] <= birth_year <= years[1]:
            return gen
    return 'Unknown'

subscriber_bike_data['generation'] = subscriber_bike_data['birth_year'].apply(get_generation)

generation_groups = subscriber_bike_data.groupby('generation')


In [87]:
# Number of Subscriber rentals by generation
group_sizes = generation_groups.size().sort_values(ascending=True)
print(group_sizes)

# Create a bar chart

# Create a bar chart
fig = px.bar(x=group_sizes.index, y=group_sizes.values, title='Subscriber Rentals by Generation')

# Customize the layout
fig.update_layout(
    xaxis_title='Generation',
    yaxis_title='Number of Subscriber Rentals',
    xaxis_tickangle=-45  # Rotate x-axis labels by 45 degrees
)

# Display the chart
fig.show()

generation
Baby Boomer Generation     1474
Generation X               2106
Millennials               12641
Generation Z              13239
dtype: int64


In [98]:
# Average length of a Subscriber rental by generation (non-winsorized)
avg_trip_duration_non_winsorized = generation_groups['trip_duration'].mean()
print("Non-winsorized average trip duration by generation:")
print(avg_trip_duration_non_winsorized)

# Winsorize the 'trip_duration' column to remove outliers
trip_duration_winsorized = np.clip(subscriber_bike_data['trip_duration'], subscriber_bike_data['trip_duration'].quantile(0.01), subscriber_bike_data['trip_duration'].quantile(0.99))
subscriber_bike_data['trip_duration_winsorized'] = trip_duration_winsorized

# Average length of a Subscriber rental by generation (winsorized)
avg_trip_duration_winsorized = generation_groups['trip_duration_winsorized'].mean()
print("\nWinsorized average trip duration by generation:")
print(avg_trip_duration_winsorized)

# Group the data by birth year and calculate the mean winsorized trip duration
birth_year_groups = subscriber_bike_data.groupby('birth_year')['trip_duration_winsorized'].mean().reset_index()

# Create the scatterplot
fig = px.scatter(birth_year_groups, x='birth_year', y='trip_duration_winsorized',
                 title='Average Trip Duration by Subscriber Birth Year',
                 labels={'birth_year': 'Birth Year', 'trip_duration_winsorized': 'Average Trip Duration (Minutes)'},
                 trendline='ols')

# Show the plot
fig.show()

Non-winsorized average trip duration by generation:
generation
Baby Boomer Generation    13.191316
Generation X              13.282051
Generation Z              14.491880
Millennials               19.065343
Name: trip_duration, dtype: float64

Winsorized average trip duration by generation:
generation
Baby Boomer Generation    11.960651
Generation X              12.279677
Generation Z              12.504041
Millennials               12.505735
Name: trip_duration_winsorized, dtype: float64


In [89]:
# Percentage of Subscriber rentals, subdivided by duration in each age group

# Define the duration categories
duration_bins = [0, 25, 35, 45, np.inf]
duration_labels = ['Up to 25 min', '25-35 min', '35-45 min', '45 min and over']

# Bin the 'trip_duration' column into the defined categories
subscriber_bike_data['duration_category'] = pd.cut(subscriber_bike_data['trip_duration'], bins=duration_bins, labels=duration_labels, include_lowest=True)

# Calculate the percentage of rentals in each duration category for each generation group
duration_percentages = (
    subscriber_bike_data
    .groupby(['generation', 'duration_category'])
    .size() / subscriber_bike_data.groupby('generation').size() * 100
)

# Print the result for the first set of bin labels
print("Results for the first set of bin labels:")
print(duration_percentages.unstack(level=1))


Results for the first set of bin labels:
duration_category       Up to 25 min  25-35 min  35-45 min  45 min and over
generation                                                                 
Baby Boomer Generation     90.705563   5.970149   2.170963         1.153324
Generation X               89.696106   6.647673   2.231719         1.424501
Generation Z               89.326988   6.782990   2.666365         1.185890
Millennials                89.106874   6.882367   2.721304         1.202437






In [90]:
# # Repeat with bins adjusted
duration_bins = [0, 5, 15, 25, np.inf]
duration_labels = ['Up to 5 min', '5-15 min', '15-25 min', '25 min and over']

# Bin the 'trip_duration' column into the defined categories
subscriber_bike_data['duration_category'] = pd.cut(subscriber_bike_data['trip_duration'], bins=duration_bins, labels=duration_labels, include_lowest=True)

# Calculate the percentage of rentals in each duration category for each generation group
duration_percentages = (
    subscriber_bike_data
    .groupby(['generation', 'duration_category'])
    .size() / subscriber_bike_data.groupby('generation').size() * 100
)

# Print the result for the first set of bin labels
print("Results for the first set of bin labels:")
print(duration_percentages.unstack(level=1))

Results for the first set of bin labels:
duration_category       Up to 5 min   5-15 min  15-25 min  25 min and over
generation                                                                
Baby Boomer Generation    25.915875  49.660787  15.128901         9.294437
Generation X              25.830959  47.245964  16.619183        10.303894
Generation Z              23.967067  48.961402  16.398520        10.635244
Millennials               24.341429  48.034175  16.731271        10.806107






In [91]:
# Number of rentals by user_type

user_type_groups = bike_data.groupby('user_type')
print(user_type_groups.size())

bike_data.head(50)

user_type
Customer      10540
Subscriber    29460
dtype: int64


Unnamed: 0,trip_id,start_time,end_time,trip_duration,from_station_id,from_station_name,to_station_id,to_station_name,user_type,birth_year,name,email
0,22014544,2022-03-08 04:19:43,2022-03-08 04:26:42,6,456,2112 W Peterson Ave,458,Broadway & Thorndale Ave,Subscriber,1992,Frances Clayton,griddled@curtis.com
1,22112934,2022-03-22 15:37:12,2022-03-22 15:41:36,4,456,2112 W Peterson Ave,457,Clark St & Elmdale Ave,Subscriber,1998,Beth Harris,beth.harris@hotmail.com
2,21757938,2022-01-04 04:18:55,2022-01-04 04:25:27,6,456,2112 W Peterson Ave,458,Broadway & Thorndale Ave,Subscriber,2001,Andrew Petersen,andrew_petersen@yahoo.com
3,22119398,2022-03-23 11:12:23,2022-03-23 11:35:50,23,456,2112 W Peterson Ave,447,Glenwood Ave & Morse Ave,Subscriber,2003,Tracy Morgan,lealness@yoder.com
4,23503206,2022-07-02 00:45:25,2022-07-02 00:56:50,11,456,2112 W Peterson Ave,344,Ravenswood Ave & Lawrence Ave,Subscriber,1983,Courtney Howell,courtney_howell@yahoo.com
5,25297896,2022-10-06 03:52:31,2022-10-06 04:09:39,17,456,2112 W Peterson Ave,463,Clark St & Berwyn Ave,Subscriber,1991,Chelsea Horton,chorton@hotmail.com
6,25953758,2022-12-29 04:45:24,2022-12-29 04:49:14,3,456,2112 W Peterson Ave,455,Maplewood Ave & Peterson Ave,Subscriber,1998,Tricia Hayes,tricia_hayes@flores-mooney.com
7,065A2049EE4C26C3,2023-02-03 12:24:00,2023-02-03 12:36:00,12,456,2112 W Peterson Ave,467,Western Ave & Lunt Ave,Subscriber,2006,Carlos Adkins,carlosadkins@gmail.com
8,067CE96BAC83029A,2023-05-19 12:02:00,2023-05-19 12:20:00,18,456,2112 W Peterson Ave,475,Washtenaw Ave & Lawrence Ave,Subscriber,1999,Mary Moore,mary.moore@carr.com
9,68C2D846EEC8AEA1,2023-08-05 07:50:00,2023-08-05 08:08:00,18,456,2112 W Peterson Ave,520,Greenview Ave & Jarvis Ave,Subscriber,1996,Daniel Hartman,daniel_hartman@yahoo.com


In [92]:
# Average length of rental by user_type
avg_trip_duration = user_type_groups['trip_duration'].mean()
print(avg_trip_duration)



user_type
Customer      60.155408
Subscriber    16.302749
Name: trip_duration, dtype: float64


In [93]:
# An analysis of which kiosks are most frequently used
from_station_groups = bike_data.groupby('from_station_name')
print(from_station_groups.size())

# Analysis of departure kiosk popularity, grouped by generation, then station
dept_station_generation_groups = subscriber_bike_data.groupby(['generation', 'from_station_name'])
print(dept_station_generation_groups.size())

# Analysis of arrival kiosk popularity, grouped by station, then generation
arr_station_generation_groups = subscriber_bike_data.groupby(['to_station_name', 'generation'])
print(arr_station_generation_groups.size())

# Recommendations on which kiosks and age categories should be chosen to trial targeted advertisements. 
# A list of recommendations on how the data quality can be improved, and any other information or insights you think is relevant. 

from_station_name
2112 W Peterson Ave                             13
63rd St Beach                                   20
900 W Harrison St                               74
Aberdeen St & Jackson Blvd                     125
Aberdeen St & Monroe St                        105
Aberdeen St & Randolph St                       96
Ada St & Washington Blvd                        83
Adler Planetarium                              136
Albany Ave & Bloomingdale Ave                   37
Albany Ave & Montrose Ave                       14
Archer (Damen) Ave & 37th St                     6
Artesian Ave & Hubbard St                       51
Ashland Ave & 13th St                           16
Ashland Ave & 21st St                            7
Ashland Ave & 50th St                            1
Ashland Ave & 63rd St                            2
Ashland Ave & 66th St                            2
Ashland Ave & 74th St                            1
Ashland Ave & 78th St                            1
Ashland Ave &

In [94]:
# Reset the index of the grouped data
ungrouped_data = station_generation_groups.size().reset_index()

# Create a scatter plot
fig = px.scatter(ungrouped_data, x="to_station_name", y=0, color="generation",
                 labels={"to_station_name": "Station", "size": "Number of Arrivals", "generation": "Generation"})

# Customize the layout
fig.update_layout(title="Scatter Plot of Arrivals by Station and Generation")

# Display the plot
fig.show()

In [95]:
# Create the heatmap
fig = px.density_heatmap(ungrouped_data, x='to_station_name', y='generation', z=0, nbinsx=4)

# Customize the layout
fig.update_layout(
    title='Destination Popularity by Passenger Generation',
    xaxis_title='Station',
    yaxis_title='Generation'
)

# Display the plot
fig.show()

# Filter the data for younger generations
younger_gens = ungrouped_data[ungrouped_data['generation'].isin(['Generation Z', 'Millennials'])]

# Create the heatmap for younger generations
fig1 = px.density_heatmap(younger_gens, x='generation', y='to_station_name', z=0, nbinsx=2)
fig1.update_layout(
    title='Destination Popularity for Younger Passenger Generations',
    xaxis_title='Generation',
    yaxis_title='Station'
)

# Filter the data for older generations
older_gens = ungrouped_data[ungrouped_data['generation'].isin(['Baby Boomer Generation', 'Generation X'])]

# Create the heatmap for older generations
fig2 = px.density_heatmap(older_gens, x='generation', y='to_station_name', z=0, nbinsx=2)
fig2.update_layout(
    title='Destination Popularity for Older Passenger Generations',
    xaxis_title='Generation',
    yaxis_title='Station'
)

# Display the plots
fig1.show()
fig2.show()

In [96]:
# Most popular destinations
to_station_groups = bike_data.groupby('to_station_name')
print(to_station_groups.size())

to_station_name
2112 W Peterson Ave                              8
63rd St Beach                                   28
900 W Harrison St                               62
Aberdeen St & Jackson Blvd                     127
Aberdeen St & Monroe St                         94
Aberdeen St & Randolph St                       84
Ada St & Washington Blvd                        68
Adler Planetarium                              132
Albany Ave & 26th St                             3
Albany Ave & Bloomingdale Ave                   36
Albany Ave & Montrose Ave                       12
Archer (Damen) Ave & 37th St                     3
Archer (Damen) Ave & 37th St (*)                 1
Artesian Ave & Hubbard St                       32
Ashland Ave & 13th St                           31
Ashland Ave & 21st St                           14
Ashland Ave & 50th St                            2
Ashland Ave & 63rd St                            1
Ashland Ave & 66th St                            2
Ashland Ave & 6

In [97]:
original_len_bike, original_len_weather = original_len
print(f"{original_len_bike - len(bike_data)} bike records and {original_len_weather - len(weather_data)} weather records have been removed")

0 bike records and 0 weather records have been removed
