In [4]:
import pandas as pd
from collections import Counter
from datetime import datetime

def create_membership_data() -> dict[str, list]:
    membership_data = {
        "Names": ["John", "Mary", "Peter", "Jeff", "Bill", "Lisa", "Jill", "John", "Mary", "Peter", "Jeff", "Bill", "Lisa", "Jill"],
        "Ages": [23, 78, 22, 19, 45, 33, 20, 23, 78, 22, 19, 45, 33, 20],
        "Account_balance": [1000, 970, 3000, 500, 0, -23, 1230, 1000, 970, 3000, 500, 0, -23, 1230],
        "Membership": ["Basic", "Basic", "Premium", "Advanced", "Advanced", "Premium", "Basic", "Basic", "Basic", "Premium", "Advanced", "Advanced", "Premium", "Basic"],
        "Newsletter_subscription": [True, False, True, False, False, True, True, True, False, True, False, False, True, True],
        "Membership_date": [
            datetime(2020, 1, 1), 
            datetime(2021, 2, 15), 
            datetime(2020, 3, 10),
            datetime(2019, 4, 20), 
            datetime(2018, 5, 25), 
            datetime(2022, 6, 30),
            datetime(2021, 7, 14), 
            datetime(2020, 8, 22), 
            datetime(2019, 9, 5),
            datetime(2022, 10, 11), 
            datetime(2018, 11, 29), 
            datetime(2021, 12, 7),
            datetime(2020, 2, 14), 
            datetime(2019, 1, 19)
        ]
    }
    
    return membership_data

membership_data: dict[str, list] = create_membership_data()
membership_df = pd.DataFrame(membership_data)
print(membership_df)

    Names  Ages  Account_balance Membership  Newsletter_subscription  \
0    John    23             1000      Basic                     True   
1    Mary    78              970      Basic                    False   
2   Peter    22             3000    Premium                     True   
3    Jeff    19              500   Advanced                    False   
4    Bill    45                0   Advanced                    False   
5    Lisa    33              -23    Premium                     True   
6    Jill    20             1230      Basic                     True   
7    John    23             1000      Basic                     True   
8    Mary    78              970      Basic                    False   
9   Peter    22             3000    Premium                     True   
10   Jeff    19              500   Advanced                    False   
11   Bill    45                0   Advanced                    False   
12   Lisa    33              -23    Premium                     

In [57]:
membership_df["Names"]

0      John
1      Mary
2     Peter
3      Jeff
4      Bill
5      Lisa
6      Jill
7      John
8      Mary
9     Peter
10     Jeff
11     Bill
12     Lisa
13     Jill
Name: Names, dtype: object

In [59]:
membership_df[["Names", "Ages"]]

Unnamed: 0,Names,Ages
0,John,23
1,Mary,78
2,Peter,22
3,Jeff,19
4,Bill,45
5,Lisa,33
6,Jill,20
7,John,23
8,Mary,78
9,Peter,22


In [60]:
print(membership_df["Names"][0])

John


In [22]:
def average_age(df: pd.DataFrame) -> float:
    return df["Ages"].mean()

def average_account_balance(df: pd.DataFrame) -> float:
    return df["Account_balance"].mean()

def average_account_balance_by_membership(df: pd.DataFrame) -> pd.DataFrame:
    return df.groupby("Membership")["Account_balance"].mean()

def most_common_membership(df: pd.DataFrame) -> str:
    return df["Membership"].value_counts().idxmax()

def most_common_membership_by_age(df: pd.DataFrame) -> pd.DataFrame:
    return df.groupby("Membership")["Ages"].mean()

def most_popular_membership_type(df: pd.DataFrame) -> str:
    return df.groupby("Membership")["Names"].count().idxmax()

def most_popular_membership_type_v2(df: pd.DataFrame) -> str:
    return df['Membership'].value_counts().idxmax()

def most_popular_membership_type_v3(df: pd.DataFrame) -> str:
    membership_counts = Counter(df["Membership"])
    return membership_counts.most_common(1)[0][0]

def percentage_newsletter_subscribed(df: pd.DataFrame) -> float:
    return df["Newsletter_subscription"].value_counts(normalize=True)[True]

def percentage_newsletter_subscribed_v2(df: pd.DataFrame) -> float:
    subscribed_count = df["Newsletter_subscription"].sum()
    total_count = len(df)
    return (subscribed_count / total_count) * 100

In [39]:
# Performing the analysis
avg_age = average_age(membership_df)
avg_account_balance = average_account_balance(membership_df)
avg_account_balance_by_membership = average_account_balance_by_membership(membership_df)
# most_common_membership = most_common_membership(membership_df)
# most_common_membership_by_age = most_common_membership_by_age(membership_df)
# most_common_membership_by_account_balance = most_common_membership_by_account_balance(membership_df)
# most_popular_membership_type = most_popular_membership_type(membership_df)
# most_popular_membership_type_v2 = most_popular_membership_type_v2(membership_df)
# most_popular_membership_type_v3 = most_popular_membership_type_v3(membership_df)
# percentage_newsletter_subscribed = percentage_newsletter_subscribed(membership_df)
# percentage_newsletter_subscribed_v2 = percentage_newsletter_subscribed_v2(membership_df)

print(f"Average age: {avg_age}")
print(f"Average account balance: {avg_account_balance}")
print(f"Average account balance by membership: \n{avg_account_balance_by_membership}")

Average age: 34.285714285714285
Average account balance: 953.8571428571429
Average account balance by membership: 
Membership
Advanced     250.000000
Basic       1066.666667
Premium     1488.500000
Name: Account_balance, dtype: float64
