In [None]:
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('asmbly_enrollment_counts_2023.csv')

df.drop(columns=["Company Name"], inplace=True)

df.head()

In [None]:
df["Membership Start Date"] = pd.to_datetime(df["Membership Start Date"])
df["Membership Expiration Date"] = pd.to_datetime(df["Membership Expiration Date"])


In [None]:
# case where member had monthly memberships all year
full_year_monthlies = df.where((df["2023 Membership Enrollment Count"] >= 12) & (df["Membership Cost"] < 200))[["Account ID"]].dropna()

# case where member had one yearly memberships spanning the whole year
full_year_yearlies = df.where((df["Membership Cost"] > 200) & (df["Membership Start Date"] >= "12/01/2022") & (df["Membership Start Date"] <= "01/31/2023"))[["Account ID"]].dropna()

# cases where member had a mix of yearly and monthly memberships spanning 2023
# 1. Member had annual membership that started in 2022 then continued on monthly memberships the rest of the year
mixed_annual_monthlies_1 = df.where((df["Membership Cost"] > 200) & (df["Membership Expiration Date"] <= "11/30/2023") & (df["2023 Membership Enrollment Count"] > 12 - df["Membership Expiration Date"].dt.month))[["Account ID"]].dropna()

# 2. Member had monthly memberships for part of 2023, then started an annual membership for the rest of 2023
mixed_annual_monthlies_2 = df.where((df["Membership Cost"] > 200) & (df["Membership Expiration Date"] >= "01/31/2024") & (df["2023 Membership Enrollment Count"] > df["Membership Start Date"].dt.month - 1))[["Account ID"]].dropna()

# case where member had two yearly memberships spanning 2022 and 2023
two_yearlies = df.where((df["Membership Cost"] > 200) & (df["2023 Membership Enrollment Count"] == 1) & (df["2022 Membership Enrollment Count"] >= 1))[["Account ID"]].dropna()

final = pd.concat([full_year_monthlies, full_year_yearlies, mixed_annual_monthlies_1, mixed_annual_monthlies_2, two_yearlies])

In [None]:
total = full_year_monthlies + full_year_yearlies + mixed_annual_monthlies_1 + mixed_annual_monthlies_2 + two_yearlies
print(total)

In [None]:
final.head()

In [None]:
final.groupby("Account ID").count()

In [None]:
events_df = pd.read_csv("all_events_2023.csv")

In [None]:
category_counts = events_df.groupby("Event Category Name")[["Event ID"]].count()
category_counts.head()

In [None]:
category_counts.loc["Woodworking", "Event ID"] = category_counts.loc["Woodworking", "Event ID"] + category_counts.loc["Woodshop Safety", "Event ID"] + category_counts.loc["Woodshop Mentor Series", "Event ID"]

category_counts.drop(["Woodshop Safety", "Woodshop Mentor Series"], inplace=True)
category_counts.rename({"_3D Printing": "3D Printing"}, inplace=True)
category_counts.rename(columns = {"Event ID": "Event Count"}, inplace=True)

category_counts.head(10)



In [None]:
category_counts.to_csv("category_counts.csv")

In [None]:
registrations_df = pd.read_csv("all_event_registrations_2023.csv")

In [None]:
registrations_df.head()


In [None]:
counts = registrations_df.groupby("Event Category Name")[["Account ID"]].count()

counts.rename(columns = {"Account ID": "Registration Count"}, inplace=True)
counts.rename({"_3D Printing": "3D Printing"}, inplace=True)

counts.loc["Woodworking", "Registration Count"] = counts.loc["Woodworking", "Registration Count"] + counts.loc["Woodshop Safety", "Registration Count"] + counts.loc["Woodshop Mentor Series", "Registration Count"]

counts.drop(["Woodshop Safety", "Woodshop Mentor Series"], inplace=True)

counts.sort_values(by="Registration Count", ascending=False).to_csv("category_registrations.csv")

In [None]:
counts.sum()