## Name: Tridib Paul TUrjo
## Student ID:202059523

## Name: Md Jawad Ul Tazwar
## Student ID: 202057543

## Presentation Video link: https://youtu.be/P8wDpish9nA?si=wxdhndjjy2GaoU51

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
import numpy as np
from sklearn.linear_model import LinearRegression

In [2]:
df = pd.read_csv('open_gym.csv')

# GymSight: Activity Viewer

## Seasonal & Annual Gym Trends Snapshot

In [3]:
df['open_gym_start'] = pd.to_datetime(df['open_gym_start'])
df['quarter'] = df['open_gym_start'].dt.to_period('Q')
df['year'] = df['open_gym_start'].dt.to_period('Y')

In [4]:
def plot_trend(period='Y', facility='All'):
    if facility == 'All':
        filtered_df = df
    else:
        filtered_df = df[df['facility_title'] == facility]
    
    plt.figure(figsize=(10, 6))
    
    if period == 'Y':
        period_totals = filtered_df.groupby('year')['total'].sum().reset_index()
        period_totals['year'] = period_totals['year'].dt.to_timestamp()
        sns.lineplot(data=period_totals, x='year', y='total', marker='o', linestyle='-', color='b')
        plt.title(f'Yearly Total Participants in Open Gym Sessions ({facility})')
        plt.xlabel('Year', size = 12)
    else:
        period_totals = filtered_df.groupby('quarter')['total'].sum().reset_index()
        period_totals['quarter'] = period_totals['quarter'].dt.to_timestamp()
        
        quarter_mapping = {1: 'Q1', 2: 'Q2', 3: 'Q3', 4: 'Q4'}
        period_totals['quarter_name'] = period_totals['quarter'].dt.quarter.map(quarter_mapping)
        
        sns.scatterplot(data=period_totals, x='quarter', y='total', hue='quarter_name', palette=['red', 'green', 'blue', 'orange'], legend='full', s=100)
        sns.lineplot(data=period_totals, x='quarter', y='total', color='grey', linestyle='--')
        
        plt.title(f'Quarterly Total Participants in Open Gym Sessions ({facility})')
        plt.xlabel('Quarter', size = 12)
    
    plt.gca().set_facecolor('#f0f0f0')  
    plt.gcf().set_facecolor('#e0e0e0') 
    plt.ylabel('Total Participants', size = 12)
    plt.grid(False)
    plt.tight_layout()
    plt.show()

period_widget = widgets.RadioButtons(
    options=[('Yearly', 'Y'), ('Quarterly', 'Q')],
    value='Y',
    description='Period:',
    layout={'width': 'max-content'}
)

facilities_dropdown = widgets.Dropdown(
    options=['All'] + list(df['facility_title'].unique()),
    value='All',
    description='Facility:',
)

dropdowns_hbox = widgets.HBox([period_widget, facilities_dropdown])
widgets.interactive(plot_trend, period=period_widget, facility=facilities_dropdown)


interactive(children=(RadioButtons(description='Period:', layout=Layout(width='max-content'), options=(('Yearl…

## Demographic Dynamics

In [5]:
df['open_gym_start'] = pd.to_datetime(df['open_gym_start'])
facilities = ['All'] + df['facility_title'].unique().tolist()
def plot_gender_by_facility(facility):
    if facility != 'All':
        df_filtered = df[df['facility_title'] == facility]
    else:
        df_filtered = df

    gender_totals_filtered = df_filtered.groupby('year').agg({'total_females': 'sum', 'total_males': 'sum'}).reset_index()

    plt.figure(figsize=(10, 6))
    width = 0.4
    ind = range(len(gender_totals_filtered))
    plt.bar(ind, gender_totals_filtered['total_females'], width, label='Females', color='lightcoral', edgecolor='black')
    plt.bar([i + width for i in ind], gender_totals_filtered['total_males'], width, label='Males', color='lightblue', edgecolor='black')

    plt.ylabel('Total Participants', size = 12)
    plt.xlabel('Year', labelpad=16, size = 12)
    plt.title(f'Participants by Gender Over the Years ({facility})')
    plt.xticks([i + width / 2 for i in ind], gender_totals_filtered['year'].dt.year, rotation=45)
    plt.legend(loc='best')
    plt.grid(True, axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()

def plot_residency_by_facility(facility):
    if facility != 'All':
        df_filtered = df[df['facility_title'] == facility]
    else:
        df_filtered = df

    residency_totals_filtered = df_filtered.groupby('year').agg({'total_residents': 'sum', 'total_non_residents': 'sum'}).reset_index()

    plt.figure(figsize=(10, 6))
    width = 0.4
    ind = range(len(residency_totals_filtered))
    plt.bar(ind, residency_totals_filtered['total_residents'], width, label='Residents', color='cadetblue', edgecolor='black')
    plt.bar([i + width for i in ind], residency_totals_filtered['total_non_residents'], width, label='Non-Residents', color='powderblue', edgecolor='black')

    plt.ylabel('Total Participants', size = 12)
    plt.xlabel('Year', labelpad=16, size = 12)
    plt.title(f'Participants by Residency Over the Years ({facility})')
    plt.xticks([i + width / 2 for i in ind], residency_totals_filtered['year'].dt.year, rotation=45)
    plt.legend(loc='best')
    plt.grid(True, axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()

def display_plots(facility, plot_type):
    if plot_type == 'Gender':
        plot_gender_by_facility(facility)
    else:  # plot_type == 'Residency'
        plot_residency_by_facility(facility)

facility_dropdown = widgets.Dropdown(options=facilities, value='All', description='Facility:')
plot_type = widgets.RadioButtons(options=['Gender', 'Residency'], value='Gender', description='Type:', orientation='horizontal')

widgets.interactive(display_plots, facility=facility_dropdown, plot_type=plot_type)


interactive(children=(Dropdown(description='Facility:', options=('All', 'Bond Park Community Center', 'Herbert…

## Weekly Gym Traffic

In [6]:
df2 = pd.read_csv('open_gym.csv')
df2['open_gym_start'] = pd.to_datetime(df2['open_gym_start'])
df2['gym_hour'] = df2['open_gym_start'].dt.hour
df2['weekday'] = df2['open_gym_start'].dt.day_name()


grouped_data = df2.groupby(['gym_hour', 'weekday', 'facility_title'])['total'].sum().reset_index()
pivot_data = grouped_data.pivot_table(index="gym_hour", columns="weekday", values="total", aggfunc='sum', fill_value=0)
pivot_data = pivot_data[["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]]

hours_to_include = list(range(8, 20)) 
pivot_data_filtered = pivot_data.loc[hours_to_include]
pivot_data_filtered.loc['20-23'] = pivot_data.loc[20:23].sum() 

hour_labels = [f"{hour%12 or 12} {'AM' if hour < 12 else 'PM'}" for hour in hours_to_include]
hour_labels.append("8 PM - 12 AM") 
pivot_data_filtered.index = hour_labels
pivot_data_filtered = pivot_data_filtered.fillna(0)
def plot_heatmap(facility='All'):
    if facility == 'All':
        heatmap_data = grouped_data.groupby(['gym_hour', 'weekday'])['total'].sum().reset_index()
    else:
        heatmap_data = grouped_data[grouped_data['facility_title'] == facility].groupby(['gym_hour', 'weekday'])['total'].sum().reset_index()
        
    pivot_data = heatmap_data.pivot(index="gym_hour", columns="weekday", values="total")
    pivot_data = pivot_data.reindex(range(0, 24), fill_value=0)  
    
    pivot_data = pivot_data[["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]]
    
    hours_to_include = list(range(8, 20))  # from 8 AM to 7 PM
    pivot_data_filtered = pivot_data.loc[hours_to_include]
    pivot_data_filtered.loc['20-23'] = pivot_data.loc[20:23].sum()  
    
    hour_labels = [f"{hour%12 or 12} {'AM' if hour < 12 else 'PM'}" for hour in hours_to_include]
    hour_labels.append("8 PM - 12 AM")  
    pivot_data_filtered.index = hour_labels
    pivot_data_filtered = pivot_data_filtered.fillna(0)

    cubehelix_cmap = sns.cubehelix_palette(start=.5, rot=-.5, as_cmap=True)
    
    plt.figure(figsize=(12, 7))
    sns.heatmap(data=pivot_data_filtered, cmap= cubehelix_cmap, cbar_kws={'label': 'Activity Level'}, linewidths=.5)

    
    plt.title(f'Gym Usage Heatmap by Hour and Day of Week ({facility})')
    plt.ylabel('Hour of Day', size = 12)
    plt.yticks(ticks=range(len(pivot_data_filtered.index)), labels=pivot_data_filtered.index, va="top")
    plt.xlabel('Day of Week', labelpad=18, size = 12)
    plt.show()


facilities_dropdown = widgets.Dropdown(
    options=['All'] + list(df2['facility_title'].unique()),
    value='All',
    description='Facility:'
)

widgets.interactive(plot_heatmap, facility=facilities_dropdown)


interactive(children=(Dropdown(description='Facility:', options=('All', 'Bond Park Community Center', 'Herbert…

## Activity Trends Timeline

In [7]:

df1 = pd.read_csv('open_gym.csv')
df1['open_gym_start'] = pd.to_datetime(df1['open_gym_start'])
df1['year'] = df1['open_gym_start'].dt.year
grouped = df1.groupby(['open_gym_activity', 'year'])['total'].sum().reset_index()


def plot_data(activity):
    filtered_data = grouped[grouped['open_gym_activity'] == activity]
    X = filtered_data['year'].astype(int).values.reshape(-1, 1)
    y = filtered_data['total'].values
    reg = LinearRegression().fit(X, y)
    y_pred = reg.predict(X)

    plt.figure(figsize=(10, 6))
    custom_palette = sns.color_palette("coolwarm")
    sns.set_palette(custom_palette)
    sns.set_style("darkgrid")
    
    sns.scatterplot(data=filtered_data, x='year', y='total', label='Actual Attendance', s=100)
    plt.plot(filtered_data['year'], y_pred, color='red', label='Trend Line', linestyle='--')
    plt.title(f'Trends in {activity} Attendance Over Time', fontsize=16)
    plt.xlabel('Year', fontsize=12)
    plt.ylabel('Total Attendance', fontsize=12)
    plt.legend(title='Legend', fontsize=9, title_fontsize=12, loc='upper left')
    plt.tight_layout()
    plt.show()

activity_dropdown = widgets.Dropdown(
    options=sorted(grouped['open_gym_activity'].unique()),
    description='Activity:',
    value=sorted(grouped['open_gym_activity'].unique())[0], 
    disabled=False,
)

plot_output = widgets.Output()
def update_plot(change):
    activity = change['new'] 
    with plot_output:
        plot_output.clear_output(wait=True) 
        plot_data(activity)  

activity_dropdown.observe(update_plot, names='value')
display(activity_dropdown, plot_output)

with plot_output:
    plot_data(activity_dropdown.value)

Dropdown(description='Activity:', options=('Badminton', 'Basketball', 'Homeschool', 'Open Gym', 'Open Studio',…

Output()

## Attribution
- https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.to_period.html
- https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pivot.html
- https://seaborn.pydata.org/tutorial/introduction.html
- https://ipywidgets.readthedocs.io/en/stable/examples/Widget%20Basics.html
- ChatGPT for syntax finding and debiging