# Churn target optimisation

In [None]:
from datetime import timedelta
import os

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from src.modeling import run_modeling
from config.config import THRESHOLD

In [None]:
os.chdir("..")

## Loading relationship quality

In [None]:
df_add = pd.read_csv("data/sales_client_relationship_dataset.csv")

In [None]:
df_add.columns

In [None]:
df_add["quali_relation"].value_counts()

## Loading model predictions

In [None]:
train, test, clf = run_modeling(folds=1)

In [None]:
# predict and add probabilites to test
preds = clf.predict_proba(test.drop(columns=["client_id", "churn"]))[:, 1]
test["preds"] = preds

## Target optimisation

In [None]:
opt_df = test.merge(df_add, on="client_id", how="left")

In [None]:
df = pd.read_csv("data/transactions_dataset.csv", sep=";")
df["date_order"] = pd.to_datetime(df["date_order"])

In [None]:
test_stamp = df.date_order.max() - timedelta(days=THRESHOLD)
df = df[df.date_order < test_stamp]

In [None]:
df = df[df.client_id.isin(opt_df.client_id.unique())]

In [None]:
df = df[df.date_order >= (df.date_order.max() - timedelta(days=365))]

In [None]:
sales_value = df.groupby("client_id").sales_net.sum().reset_index()
sales_value.columns = ["client_id", "sales_last_year"]

In [None]:
opt_df = opt_df.merge(sales_value, on="client_id", how="left")

In [None]:
plt.figure(figsize=(10, 5))
sns.scatterplot(
    data=opt_df, x="preds", y="sales_last_year", hue="quali_relation"
)

In [None]:
plt.figure(figsize=(10, 5))
sns.scatterplot(
    data=opt_df[opt_df.preds > 0.2],
    x="quali_relation",
    y="sales_last_year",
    hue="preds",
)