In [1]:
import pandas as pd
data = pd.read_csv("csxl_data.csv")

In [2]:
data['start'] = pd.to_datetime(data['start'])
data['end'] = pd.to_datetime(data['end'])

# Convert reservation_length from seconds into days
data['reservation_length'] = (data['end'] - data['start']).dt.total_seconds() / 3600 / 24

# Filter out reservations that are way too long (for ex, start and end a month apart – looking at you, user 385)
# TODO: Confirm threshold for reservation length
filtered_data = data[data['reservation_length'] < 5]

# Find the total amount of time each user spent in the XL (according to reservations)
total_user_reservation_times = filtered_data.groupby('user_id')['reservation_length'].sum().reset_index()

total_user_reservation_times.columns = ['user_id', 'total_time']

In [3]:
import altair as alt

In [4]:
top_10_users = total_user_reservation_times.sort_values(by='total_time', ascending=False).head(10)

# Note: Used ChatGPT to help add the medals
top_10_users['user_id'].iloc[0] = "🥇 " + str(top_10_users['user_id'].iloc[0])
top_10_users['user_id'].iloc[1] = "🥈 " + str(top_10_users['user_id'].iloc[1])
top_10_users['user_id'].iloc[2] = "🥉 " + str(top_10_users['user_id'].iloc[2])

# Create a horizontal bar chart leaderboard (filtering out reservations > 5 hours long)
chart = alt.Chart(top_10_users).mark_bar().encode(
    y=alt.Y('user_id:O', title='User ID', sort='-x'),
    x=alt.X('total_time:Q', title='Total Time (Days)'),
    tooltip=alt.Tooltip('total_time:Q', format='.2f')
).properties(
    title='Total Time per User'
)

chart.show()