In [None]:
import pandas as pd


# Exercise 1

In [None]:
# This code is provided for you. Please read it and understand what it's doing
def payment_df_to_cohort_df(payment_df):
    # Calculate the cohort month for each customer. This is the first month that the customer made a payment
    cohort_df = payment_df.groupby('customer_id').payment_date.min().reset_index()
    cohort_df['cohort'] = cohort_df['payment_date'].dt.to_period('M').dt.to_timestamp()
    payment_df = payment_df.merge(cohort_df[['customer_id', 'cohort']], on='customer_id')

    # Round payments to the nearest month. month needs to be a pd.datetime still
    # Using payment_month simplifies how CVF thinks about cohorts
    payment_df['payment_month'] = payment_df['payment_date'].dt.to_period('M').dt.to_timestamp()

    # Calculate the payment period. However, I want the result to be an integer month
    # payment_period integers are one way to understand cohorts
    payment_df['payment_period'] = (
        (payment_df['payment_month'].dt.year - payment_df['cohort'].dt.year) * 12 +
        (payment_df['payment_month'].dt.month - payment_df['cohort'].dt.month)
    )

    grouped_df = payment_df.groupby(["cohort", "payment_period"])["amount"].sum()
    grouped_df = grouped_df.reset_index().pivot(index="cohort", columns="payment_period", values="amount").rename_axis(index=None, columns=None)
    return grouped_df




In [None]:
test_payment_df = pd.DataFrame({
    'customer_id': [1, 1, 2, 2, 3],
    'payment_date': pd.to_datetime(['2020-01-01', '2020-02-01', '2020-02-01', '2020-03-01', '2020-04-01']),
    'amount': [200, 100, 400, 300, 500]
})


In [None]:
EXERCISE_1_COHORT_DF = payment_df_to_cohort_df(test_payment_df)
EXERCISE_1_COHORT_DF

In [None]:
# Example spend file. Each cohort is spending $1000 a month
# We don't actually use the spend_df until exercise 3
EXERCISE_SPEND_DF = pd.DataFrame({
    "cohort_start_at": [pd.Timestamp(date) for date in ['2020-01-01 00:00:00', '2020-02-01 00:00:00', '2020-04-01 00:00:00']],
    "spend": [1000.0, 1000.0, 1000.0],
}).set_index("cohort_start_at")
EXERCISE_SPEND_DF

# Exercise 2

In [None]:
def apply_predictions_list(m0 : float, churn : float, actual_values : list = list[float], n_months : int = 6) -> list:
    pass

In [None]:
def apply_predictions_to_cohort_df(predictions_dict : dict, cohort_df :pd.DataFrame) -> pd.DataFrame:
    pass

In [None]:
# We will extend the cohort dataframe from exercise 1
EXERCISE_2_BEST_PREDICTIONS_DICT = {
        "m0": 0.4,
        "churn": 0.1,
}
# Showing how predictions look different per scenario
# EXERCISE_2_AVERAGE_PREDICTIONS_DICT = {
#         "m0": 0.3,
#         "churn": 0.2,
# }
# EXERCISE_2_WORST_PREDICTIONS_DICT = {
#         "m0": 0.1,
#         "churn": 0.3,
# }

In [None]:
# Compare your results to these expected values
"""
                0      1      2      3       4       5
2020-01-01  200.0  100.0   90.0   81.0   72.90   65.61
2020-02-01  400.0  300.0  270.0  243.0  218.70  196.83
2020-04-01  500.0  450.0  405.0  364.5  328.05  295.25
"""
EXPECTED_PREDICTED_SERIES = [0.4, 0.36, 0.32, 0.29, 0.26, 0.24, 0.21, 0.19, 0.17, 0.15, 0.14, 0.13]
assert apply_predictions_list(EXERCISE_2_BEST_PREDICTIONS_DICT["m0"], EXERCISE_2_BEST_PREDICTIONS_DICT["churn"], [], 12) == EXPECTED_PREDICTED_SERIES


apply_predictions_to_cohort_df(EXERCISE_1_COHORT_DF, EXERCISE_2_BEST_PREDICTIONS_DICT)


# Exercise 3

In [None]:
def apply_threshold_to_cohort_df(cohort_df : pd.DataFrame, spend_df : pd.DataFrame, threshold : list[dict]) -> pd.DataFrame:
    pass

In [None]:
EXAMPLE_THRESHOLD_DICT = [{
    "payment_period_month": 1,
    "minimum_payment_percent": 0.25
}]

In [None]:
""" The above should return a threshold_df that looks like this
                0     1
2020-01-01   True  False
2020-02-01   True  True
2020-04-01   True  False
"""
threshold_df = apply_threshold_to_cohort_df(EXERCISE_1_COHORT_DF, EXERCISE_SPEND_DF, EXAMPLE_THRESHOLD_DICT)
threshold_df


# Exercise 4

In [None]:
def get_cvf_cashflows_df(cohort_df : pd.DataFrame, spend_df : pd.DataFrame, threshold : list[dict], trade_list : dict) -> pd.DataFrame:
    # Go through each trade in trade_list and apply the correct sharing_percentage per cohort
    # Apply the cash_cap to the cohort_df (cut off after the sum reaches the cash_cap)
    # Apply the payment_delay (add months to the payment_periods)
    pass

In [None]:
"""This represents the final output of the function when run only on the cohort_df.

            2020-01-01  2020-02-01  2020-03-01  2020-04-01  2020-05-01
# We have not reached the cash_cap, but we are collecting 100% payments in month 1
2020-01-01       160.0       100.0         0.0         0.0         0.0
# We have reached the cash_cap
2020-02-01         0.0       320.0        80.0         0.0         0.0
# We have not reached the cash_cap
2020-04-01         0.0         0.0         0.0       250.0         0.0
"""
EXAMPLE_TRADE_LIST_1 = [{
    "cohort_start": pd.Timestamp('2020-01-01 00:00:00'),
    "sharing_percentage": 0.8,
    "cash_cap": 500,
},
{
    "cohort_start": pd.Timestamp('2020-02-01 00:00:00'),
    "sharing_percentage": 0.8,
    "cash_cap": 400,
},
{
    "cohort_start": pd.Timestamp('2020-04-01 00:00:00'),
    "sharing_percentage": 0.5,
    "cash_cap": 500,
}]



cvf_cashflows_df = get_cvf_cashflows_df(EXERCISE_1_COHORT_DF, EXERCISE_SPEND_DF, EXAMPLE_THRESHOLD_DICT, EXAMPLE_TRADE_LIST_1)
cvf_cashflows_df


In [None]:

"""This represents the final output of the function when run on the best predicted cohort_df

            2020-01-01  2020-02-01  2020-03-01  2020-04-01  2020-05-01  \
# We do not hit our cash cap, even after collecting 100% payments in month 0
2020-01-01       200.0        80.0        90.0        64.8       58.32
# We hit our cash cap, after collecting 100% payments in month 2
2020-02-01         0.0       320.0       240.0       270.0      194.40
# We hit our cash cap, without collection any payments at 100%
2020-04-01         0.0         0.0         0.0       250.0      225.00

            2020-06-01  2020-07-01  2020-08-01  2020-09-01  2020-10-01  \
2020-01-01       52.49       47.24       42.51       38.26       34.44
2020-02-01       75.60        0.00        0.00        0.00        0.00
2020-04-01      202.50      182.25      164.02       76.23        0.00

            2020-11-01  2020-12-01  2021-01-01  2021-02-01  2021-03-01
2020-01-01       30.99        27.9         0.0         0.0         0.0
2020-02-01        0.00         0.0         0.0         0.0         0.0
2020-04-01        0.00         0.0         0.0         0.0         0.0
"""
EXAMPLE_TRADE_LIST_2 = [{
    "cohort_start": pd.Timestamp('2020-01-01 00:00:00'),
    "sharing_percentage": 0.8,
    "cash_cap": 1100,
},
{
    "cohort_start": pd.Timestamp('2020-02-01 00:00:00'),
    "sharing_percentage": 0.8,
    "cash_cap": 1100,
},
{
    "cohort_start": pd.Timestamp('2020-04-01 00:00:00'),
    "sharing_percentage": 0.5,
    "cash_cap": 1100,
}]
EXAMPLE_THRESHOLD_DICT_2 = [
    {
        "payment_period_month": 0,
        "minimum_payment_percent": 0.3
    },
    {
        "payment_period_month": 2,
        "minimum_payment_percent": 0.3
    },
]


predicted_cohort_df = apply_predictions_to_cohort_df(EXERCISE_1_COHORT_DF, EXERCISE_2_BEST_PREDICTIONS_DICT)
predicted_cvf_cashflows_df = get_cvf_cashflows_df(predicted_cohort_df, EXERCISE_SPEND_DF, EXAMPLE_THRESHOLD_DICT_2, EXAMPLE_TRADE_LIST_2)
predicted_cvf_cashflows_df