# Churn target optimisation

In [None]:
from datetime import timedelta
import os
from pathlib import Path

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from src.modeling import run_modeling
from config.config import THRESHOLD

In [None]:
os.chdir("..")

## Loading relationship quality

In [None]:
df_add = pd.read_csv("data/sales_client_relationship_dataset.csv")

In [None]:
df_add.columns

In [None]:
df_add["quali_relation"].value_counts()

## Loading model predictions

In [None]:
train, test, clf = run_modeling(folds=1)

In [None]:
# predict and add probabilites to test
preds = clf.predict_proba(test.drop(columns=["client_id", "churn"]))[:, 1]
test["preds"] = preds

## Churn Distributions

In [None]:
opt_df = test.merge(df_add, on="client_id", how="left")

In [None]:
df = pd.read_csv("data/transactions_dataset.csv", sep=";")
df["date_order"] = pd.to_datetime(df["date_order"])

In [None]:
test_stamp = df.date_order.max() - timedelta(days=THRESHOLD)
df = df[df.date_order < test_stamp]

In [None]:
df = df[df.client_id.isin(opt_df.client_id.unique())]

In [None]:
year_cap = df.date_order.max() - timedelta(days=365)

In [None]:
time_since_client = (
    df.date_order.max() - df.groupby("client_id").date_order.min()
)
time_since_client = time_since_client.reset_index()
time_since_client.loc[
    time_since_client.date_order > timedelta(days=365), "date_order"
] = timedelta(days=365)
time_since_client["nr_days_client"] = time_since_client["date_order"].dt.days
time_since_client["nr_days_client"] = np.where(
    time_since_client["nr_days_client"] == 0,
    1,
    time_since_client["nr_days_client"],
)
time_since_client = time_since_client.drop(columns="date_order")

In [None]:
df = df[df.date_order >= year_cap]

In [None]:
sales_value = df.groupby("client_id").sales_net.sum().reset_index()
sales_value.columns = ["client_id", "sales_last_year"]

In [None]:
opt_df = opt_df.merge(sales_value, on="client_id", how="left")
opt_df = opt_df.merge(time_since_client, on="client_id")

In [None]:
opt_df["potential_revenue"] = (
    opt_df["sales_last_year"] / opt_df.nr_days_client
) * (365 / (opt_df.avg_time_purchase + 1))

In [None]:
plt.figure(figsize=(10, 5))
sns.scatterplot(
    data=opt_df, x="preds", y="potential_revenue", hue="quali_relation"
)

In [None]:
plt.figure(figsize=(10, 5))
sns.scatterplot(
    data=opt_df[opt_df.preds > 0.2],
    x="preds",
    y="potential_revenue",
    hue="quali_relation",
)

In [None]:
plt.figure(figsize=(10, 5))
sns.scatterplot(
    data=opt_df[opt_df.preds > 0.2],
    x="quali_relation",
    y="potential_revenue",
    hue="preds",
)

## Churn Optimisation

In [None]:
opt_df.groupby("quali_relation").potential_revenue.quantile(q=0.8)

In [None]:
# assign cost based on quantile and quality of relation
opt_df["cost_intervention"] = np.where(
    (opt_df.quali_relation == "Agreeable client")
    & (opt_df.potential_revenue < 10000),
    25,
    np.nan,
)
opt_df["cost_intervention"] = np.where(
    (opt_df.quali_relation == "Agreeable client")
    & (opt_df.potential_revenue >= 10000),
    100,
    opt_df["cost_intervention"],
)
opt_df["cost_intervention"] = np.where(
    (opt_df.quali_relation == "Demanding client")
    & (opt_df.potential_revenue < 9500),
    100,
    opt_df["cost_intervention"],
)
opt_df["cost_intervention"] = np.where(
    (opt_df.quali_relation == "Demanding client")
    & (opt_df.potential_revenue >= 9500),
    250,
    opt_df["cost_intervention"],
)
opt_df["cost_intervention"] = np.where(
    (opt_df.quali_relation == "Difficult client")
    & (opt_df.potential_revenue >= 9800),
    400,
    opt_df["cost_intervention"],
)

In [None]:
# assign client rank based on quantile and quality of relation
opt_df["client_rank"] = np.where(
    (opt_df.quali_relation == "Agreeable client")
    & (opt_df.potential_revenue < 10000),
    "Low Hanging Fruits",
    "",
)
opt_df["client_rank"] = np.where(
    (opt_df.quali_relation == "Agreeable client")
    & (opt_df.potential_revenue >= 10000),
    "Prime",
    opt_df["client_rank"],
)
opt_df["client_rank"] = np.where(
    (opt_df.quali_relation == "Demanding client")
    & (opt_df.potential_revenue < 9500),
    "Nice to Have",
    opt_df["client_rank"],
)
opt_df["client_rank"] = np.where(
    (opt_df.quali_relation == "Demanding client")
    & (opt_df.potential_revenue >= 9500),
    "Valuable",
    opt_df["client_rank"],
)
opt_df["client_rank"] = np.where(
    (opt_df.quali_relation == "Difficult client")
    & (opt_df.potential_revenue >= 9800),
    "High Efforts & Return",
    opt_df["client_rank"],
)

In [None]:
# remove the not targeted group
focus_df = opt_df.dropna(subset="cost_intervention").copy()

# calculate expected value and expected value over cost
focus_df["potential_revenue"] = (
    0.5 * focus_df.preds * focus_df["potential_revenue"]
)
focus_df["expected_value"] = (
    focus_df["potential_revenue"] - focus_df["cost_intervention"] / 0.78
)
focus_df["return_per_cost"] = (
    focus_df["expected_value"] / focus_df["cost_intervention"]
)

In [None]:
# sort by maximal return over cost to sum expected value of most optimal efforts first
focus_df = focus_df.sort_values("return_per_cost", ascending=False)
focus_df["cost_sum"] = focus_df.cost_intervention.cumsum()
focus_df["expected_value_sum"] = focus_df.expected_value.cumsum()
focus_df["return_per_cost_sum"] = focus_df.return_per_cost.cumsum()

In [None]:
# get list of clients we would care for with a sales budget of 100k
contact_clients = focus_df[focus_df.cost_sum < 100_000]
contact_clients.client_rank.value_counts()

In [None]:
# plot optimisation of expected value
plt.figure(figsize=(15, 6))
sns.lineplot(
    data=focus_df, x="cost_sum", y="expected_value_sum", color="#29BA74"
)
plt.vlines(
    x=100_000,
    ymin=focus_df.expected_value_sum.min(),
    ymax=focus_df[focus_df.cost_sum < 100_000].expected_value_sum.iloc[-1],
    color="grey",
)
plt.vlines(
    x=focus_df[
        focus_df.return_per_cost_sum == focus_df.return_per_cost_sum.max()
    ].cost_sum,
    ymin=focus_df.expected_value_sum.min(),
    ymax=focus_df.expected_value_sum.max(),
    color="grey",
)
plt.xlabel("Sales budget")
plt.ylabel("Sum of expected value")
Path("results").mkdir(parents=True, exist_ok=True)
plt.savefig(Path("results") / "churn_optimisation.png", transparent=True)

In [None]:
# expeted value at 100k sales budget
focus_df[focus_df.cost_sum < 100_000].expected_value_sum.iloc[-1]

In [None]:
# optimal expected value
focus_df.expected_value_sum.max()

In [None]:
# sales budget at optimal expected value
focus_df[
    focus_df.expected_value_sum == focus_df.expected_value_sum.max()
].cost_sum