In [11]:
# Load kedro environment (not needed in .py)
from pathlib import Path
from kedro.framework.context import load_context

# Load a context to be able to work in the notebook
#current_dir = Path.cwd()
current_dir = Path("/u01/share/cesar/aa_engine_uy/notebooks/")
proj_path = current_dir.parent
context = load_context(proj_path)
catalog = context.catalog
credentials = context.config_loader.get("credentials*","credentials*/**")
parameters = context.config_loader.get("parameters*","parameters*/**")

from aa_engine_pkg.assets.utils import *
from aa_engine_pkg.assets.core.data.kedro.catalog_expansion.partitioned_sql import SQLPartitionedDataSet

In [13]:
arpu=catalog.load('arpu_quality')
prod_basico=catalog.load('eop')
campanas=catalog.load('campanas')
cliente_activo=catalog.load("cliente_activo")
date='20210405'

2021-05-21 15:03:28,776 - kedro.io.data_catalog - INFO - Loading data from `arpu_quality` (SQLPartitionedDataSet)...
2021-05-21 15:03:28,778 - kedro.io.data_catalog - INFO - Loading data from `eop` (SQLPartitionedDataSet)...
2021-05-21 15:03:28,779 - kedro.io.data_catalog - INFO - Loading data from `campanas` (SQLPartitionedDataSet)...
2021-05-21 15:03:28,780 - kedro.io.data_catalog - INFO - Loading data from `cliente_activo` (SQLPartitionedDataSet)...


In [14]:
cliente_activo_df= create_cliente_activo(cliente_activo,date)

2021-05-21 15:03:31,706 - aa_engine_pkg.assets.utils.utilities - INFO - Creating cliente_activo...
select distinct CUSTOMER_ID from stg_uy_eop_customer where PRC_TIPO_ID = 3 and DATE_EXP = 202103


- 6003 - RET - IN - $350 x 6 meses	1017	1%
- 493-RET- BONIF BASICO PROP S/CARGO	831	1%
- 004 - RET - IN - $350 x 12 meses	820	1%
- 5995 - RET - IN - 670 x 9 meses	665	0%
- 7321 - RET IN - ADICIONALES - $91 x 12 Meses	651	0%

In [15]:
offre_id=tuple([6003,493,6004,5995,7321])
offre_id

(6003, 493, 6004, 5995, 7321)

In [16]:
# Initialize logger
log = initialize_logger()
# Load data for required period
look_back_months = 3
periods_to_load = get_last_k_periods(date, look_back_months)
start_date = periods_to_load[-1]
periods_to_load = tuple(periods_to_load)
period_to_load = get_previous_month(date)

# Get arpu_quality table
log.info("Loading arpu")
df_arpu = arpu.filter_by_period(date=periods_to_load).drop_duplicates()
log.info(f"Read arpu {df_arpu.shape[0]} clientes")

date_exp = get_previous_month(date)
log.info("Loading eop for {date_exp}")
df_basico = prod_basico.filter_by_period(date=date_exp).drop_duplicates()
log.info(f"Read eop {df_basico.shape[0]} clientes")

# Calculate period to load for active clients
log.info(f"Loading campanas...")
df_campanas = campanas.filter_by_query(query=f"select CUSTOMER_ID,ID, DESCRIPTION, START_DATE,END_DATE from stg_uy_campana where START_DATE<=to_date({date}, 'yyyymmdd') AND END_DATE>to_date({date}, 'yyyymmdd') AND ID in {offre_id}").drop_duplicates()
log.info(f"Read campanas {df_campanas.shape[0]} clientes")

2021-05-21 15:06:12,832 - aa_engine_pkg.assets.utils.utilities - INFO - Loading arpu
select * from stg_uy_arpu_quality where CHARGES_YYYYMM in ('202104', '202103', '202102')
2021-05-21 15:07:08,266 - aa_engine_pkg.assets.utils.utilities - INFO - Read arpu 348414 clientes
2021-05-21 15:07:08,269 - aa_engine_pkg.assets.utils.utilities - INFO - Loading eop for {date_exp}
select DATE_EXP, CUSTOMER_ID, PRC_CODIGO, PRODUCTO, PRC_TIPO_ID, TEC_ID, MOP, TENURE from stg_uy_eop_customer where DATE_EXP = 202103
2021-05-21 15:07:44,068 - aa_engine_pkg.assets.utils.utilities - INFO - Read eop 393146 clientes
2021-05-21 15:07:44,070 - aa_engine_pkg.assets.utils.utilities - INFO - Loading campanas...
2021-05-21 15:07:45,270 - aa_engine_pkg.assets.utils.utilities - INFO - Read campanas 7227 clientes


In [17]:
df_arpu_agrupado=df_arpu.groupby(["CUSTOMER_ID"]).agg({"ARPU":"mean"}).rename(columns={"ARPU":"ARPU_MEAN"}).reset_index()

In [18]:
df_prd_arpu=df_basico.loc[(df_basico.PRC_TIPO_ID==3) & (df_basico.CUSTOMER_ID.isin(cliente_activo_df.CUSTOMER_ID))].merge(df_arpu_agrupado[["CUSTOMER_ID","ARPU_MEAN"]],on="CUSTOMER_ID",how="left").drop_duplicates()

In [19]:
df_prd_arpu.CUSTOMER_ID.value_counts()

56102910    1
56053892    1
879366      1
53330850    1
854794      1
           ..
220990      1
52258374    1
821061      1
56139590    1
56098816    1
Name: CUSTOMER_ID, Length: 115470, dtype: int64

In [20]:
df_campanas["val"]=1
df_offers=df_campanas.pivot_table(index="CUSTOMER_ID",columns="DESCRIPTION",values="val")
df_offers.fillna(0,inplace=True)
df_offers["n_offers"]=df_offers.sum(axis=1)
df_offers.reset_index(inplace=True)

In [21]:
data=df_prd_arpu[["DATE_EXP","CUSTOMER_ID","PRODUCTO","ARPU_MEAN"]].merge(df_offers, on="CUSTOMER_ID",how="left")
data.fillna(0,inplace=True)

In [22]:
data.head()

Unnamed: 0,DATE_EXP,CUSTOMER_ID,PRODUCTO,ARPU_MEAN,493-RET- BONIF BASICO PROP S/CARGO,5995 - RET - IN - 670 x 9 meses,6003 - RET - IN - $350 x 6 meses,6004 - RET - IN - $350 x 12 meses,7321 - RET IN - ADICIONALES - $91 x 12 Meses,n_offers
0,202103,1842700,DIRECTV ORO MIX,2662.003333,0.0,0.0,0.0,0.0,0.0,0.0
1,202103,1843500,DIRECTV PLATA,2048.36,0.0,0.0,0.0,0.0,0.0,0.0
2,202103,1845000,DIRECTV PLATA,1394.523333,0.0,0.0,0.0,0.0,0.0,0.0
3,202103,1846300,DIRECTV ORO MIX,1722.52,0.0,0.0,0.0,0.0,0.0,0.0
4,202103,1847600,DIRECTV ORO MIX,1477.44,0.0,0.0,0.0,0.0,0.0,0.0


In [123]:
data.to_csv(f"/u01/share/cesar/arpu_analisi_offer_uy/analisis_{date}.csv",decimal=",",sep=";")

In [4]:
def create_cliente_activo(cliente_activo: SQLPartitionedDataSet,
                          date: str) -> pd.DataFrame:
    """Creates master table with features related to EoP state of customers for one period of data

    Parameters
    ----------
    cliente_activo:
        dataset defined in ´catalog.yml´ - list of active customers at EoP for the given period
    date:
        period to process
    
    Returns
    -------
        Mastertable with information of clientes at EoP
    """

    # Initialize logger
    log = initialize_logger()

    # Load active clientes for period
    log.info(f"Creating cliente_activo...")
    period_to_load = get_previous_month(date)
    df_clientes_activos = cliente_activo.filter_by(date=period_to_load)

    # Return
    return df_clientes_activos

In [None]:
cliente_activo_df= create_cliente_activo(cliente_activo,date)

In [None]:
# Initialize logger
log = initialize_logger()

table_name = "target_xsell"
write_to_parquet = parameters["write_to_parquet"]
overwrite = parameters["targets"][table_name]["overwrite"]
end_date = str(parameters["end_date"])

# Check if target can be created (date + max window < end_date)
target_parameters = parameters["targets"][table_name]
max_window = max([target_parameters[x] for x in target_parameters.keys() if x.endswith("window")])
upper_bound = (pd.to_datetime(date) + timedelta(days=max_window)).strftime("%Y%m%d")
previous_sunday = dt.today() - timedelta(days=dt.today().weekday()+1)

In [None]:
# Compare with what is already processed
path = f"{parameters['paths']['target_path']}{table_name}/"
os.makedirs(path, exist_ok=True)
processed_dates = os.listdir(path)
match = [file for file in processed_dates if str(date) in file]

In [None]:
start_date = date
end_date = (pd.to_datetime(date) + timedelta(days=parameters["targets"][table_name]["calculation_window"])).strftime("%Y%m%d")
cancel_end_date = (pd.to_datetime(date) + timedelta(days=parameters["targets"][table_name]["activation_window"])).strftime("%Y%m%d")

In [None]:
print(start_date,end_date,cancel_end_date)

In [None]:
# Load data for required period
df_activaciones = activaciones_premium.filter_by(date=[start_date,
                                                       end_date], target=True)
log.info(f"Read {df_activaciones.shape[0]} activations")
df_reconexiones = reconexiones_basicos.filter_by(date=[start_date,
                                                       end_date], target=True)
log.info(f"Read {df_reconexiones.shape[0]} reconnections")
df_cancelaciones = cancelaciones_premium.filter_by(date=[start_date,
                                                         cancel_end_date], target=True)
log.info(f"Read {df_cancelaciones.shape[0]} cancelations")

# get EoP active clients from previous period to exclude new clients
prev_period = get_previous_month(start_date)
df_clientes = cliente_activo_df

In [None]:
log.info(f"Read {df_clientes.shape[0]} clients")

df_activaciones[vars_to_string] = df_activaciones[vars_to_string].astype(str)
df_reconexiones[vars_to_string] = df_reconexiones[vars_to_string].astype(str)
df_cancelaciones[vars_to_string] = df_cancelaciones[vars_to_string].astype(str)

In [None]:
df_activaciones.head()

In [None]:
df_cancelaciones.head()

In [None]:
df_reconexiones.head()

In [None]:
df_activaciones["FECHA"] = df_activaciones["FECHA"].dt.normalize()
df_reconexiones["FECHA"] = df_reconexiones["FECHA"].dt.normalize()

if pd.to_datetime(cancel_end_date) > pd.to_datetime(end_date):
    df_cancelaciones["FECHA"] = df_cancelaciones["FECHA"].dt.normalize()

In [None]:
df_activaciones.head()

In [None]:
# 1. Calculate premium product activations in current period
# merge and keep outer join
cp_xsells_multi = pd.merge(df_activaciones,
                           df_reconexiones,
                           on=vars_to_merge,
                           how="left"
                           )


In [None]:
cp_xsells_multi.head()

In [None]:
# keep only customer that are not in both
cp_xsells_multi["FLAG_ACTIVATION_PREMIUM"] = np.where(cp_xsells_multi["DATE_EXP_y"].isna(), 1, 0)
cp_xsells_multi = cp_xsells_multi[cp_xsells_multi["FLAG_ACTIVATION_PREMIUM"] == 1]

In [None]:
cp_xsells_multi.head()

In [None]:
cp_xsells_multi = drop_extra_rename_remaining(cp_xsells_multi,
                                                      suffix_extra="_y",
                                                      suffix_remaining="_x",
                                                      suffix_new_name=""
                                                      )

In [None]:
cp_xsells_multi.head()

In [None]:
pd.to_datetime(cancel_end_date) > pd.to_datetime(end_date)

In [None]:
# keep only last event of xsell in period of interest
df_cp_xsells = cp_xsells_multi.sort_values(["CUSTOMER_ID", "PRODUCTO_ID", "FECHA"]
                                           ).drop_duplicates(subset=["CUSTOMER_ID", "PRODUCTO_ID"],
                                                             keep="last")


In [None]:
# keep only last event of xsell in period of interest
df_cp_xsells = cp_xsells_multi.sort_values(["CUSTOMER_ID", "PRODUCTO_ID", "FECHA"]
                                           ).drop_duplicates(subset=["CUSTOMER_ID", "PRODUCTO_ID"],
                                                             keep="last")
if pd.to_datetime(cancel_end_date) > pd.to_datetime(end_date):
    df_cp_xsells_cancels = pd.merge(df_cp_xsells,
                                    df_cancelaciones,
                                    on=["CUSTOMER_ID", "PRODUCTO_ID"],
                                    how="left",
                                    validate="1:m"
                                    )

    # check time difference between xsell and product cancelation
    df_cp_xsells_cancels["FECHA_DIFF"] = (df_cp_xsells_cancels["FECHA_y"] - df_cp_xsells_cancels[
        "FECHA_x"]) / np.timedelta64(1, "D")
    df_cp_xsells_cancels = drop_extra_rename_remaining(df_cp_xsells_cancels,
                                                       suffix_extra="_y",
                                                       suffix_remaining="_x",
                                                       suffix_new_name=""
                                                       )
    mask_cancels_before_buying = (df_cp_xsells_cancels["FECHA_DIFF"] < 0)
    mask_cancels_before_activation_window = (df_cp_xsells_cancels["FECHA_DIFF"] >= 0) & \
                                            (df_cp_xsells_cancels["FECHA_DIFF"] <=
                                             parameters["targets"]["target_xsell"]["activation_window"])
    df_cp_xsells_cancels["TARGET"] = np.where(
        mask_cancels_before_buying | mask_cancels_before_activation_window, 0, 1)
else:
    df_cp_xsells_cancels = df_cp_xsells.copy()
    df_cp_xsells_cancels["TARGET"] = np.where(df_cp_xsells_cancels["FLAG_ACTIVATION_PREMIUM"] == 1, 1, 0)

In [None]:
df_cp_xsells_cancels.head()

In [None]:
# group target products into super category (fox, hbo, adultos) to create target variable
cp_xsells_final = df_cp_xsells_cancels.loc[df_cp_xsells_cancels["TARGET"] == 1, \
                                           ["CUSTOMER_ID", "PRODUCTO_ID", "PRODUCTO", "TARGET", "FECHA"]]
condlist = [cp_xsells_final["PRODUCTO_ID"].isin(parameters["targets"][table_name]["xsell_products"]["fox"]),
            cp_xsells_final["PRODUCTO_ID"].isin(parameters["targets"][table_name]["xsell_products"]["hbo"]),
            cp_xsells_final["PRODUCTO_ID"].isin(parameters["targets"][table_name]["xsell_products"]["adultos"])]

# Impute product
choicelist = ["FOX", "HBO", "ADULTOS"]
cp_xsells_final["TARGET_PRODUCT"] = np.select(condlist, choicelist, default="error")

agg_dict = {"TARGET": "max",
            "FECHA": "max",
            "PRODUCTO_ID": "max"}
cp_xsells_final = cp_xsells_final.groupby(["CUSTOMER_ID", "TARGET_PRODUCT"]).agg(agg_dict).reset_index()

target = pd.merge(df_clientes,
                  cp_xsells_final,
                  on="CUSTOMER_ID",
                  how="outer",
                  validate="1:m",
                  indicator=True)

target["DATE_EXP"] = prev_period
target["TARGET"].fillna(0, inplace=True)
target["TARGET_PRODUCT"].fillna("NO_COMPRA", inplace=True)
target["PRODUCTO_ID"].fillna("NO_COMPRA", inplace=True)
target["TARGET"] = target["TARGET"].astype(np.int32)
target.rename({"FECHA": "FECHA_TARGET"}, inplace=True)
target["DATE_CALC"] = date

In [None]:
target.head()

In [None]:
def create_target_xsell(activaciones_premium: SQLPartitionedDataSet,
                        reconexiones_basicos: SQLPartitionedDataSet,
                        cancelaciones_premium: SQLPartitionedDataSet,
                        cliente_activo: pd.DataFrame,
                        parameters: Dict,
                        date: str) -> pd.DataFrame:
    
    """The following function will define the target variable for the xsell model of Premium products, considering:
    #### Products including:
        - FOX
        - HBO
        - Pack Adultos
    #### Target definition:
        - Existing customer acquires Premium product
        - Stays active for 3 or more months (end of discount price period)
    ---
    ## Target methodology
        1. Identify events 5229 (activation) and 171 (reconnection of product) related to product category 1 (Premium)
    and exclude products that are out of scope
        2. Identify events 171 (reconnection of product) related to category 3 (basic) products
        3. Exclude events from (1.) that happen on the same day as events on (2.) -> Reconnections of basic means churn
    involuntario
        4. Filter (get latest event) customers that have multiple events on the same product during the same period
        5. Check if product was canceled during the following 90 days
        6. Create target: "customer buys any premium product"
        7. Create target_product: "customer buys this product"
    Parameters
    ----------
    activaciones_premium:
        dataset defined in ``catalog_raw.yml`` with raw data information related to new activations of the set of
        Premium products defined above
    reconexiones_basicos:
        dataset defined in ``catalog_raw.yml`` with raw data information related to reconnections of programming
        services after a disconnection due to payment defaults
    cancelaciones_premium:
        dataset defined in ``catalog_raw.yml`` with raw data information related to product cancelations of the set of
        Premium products defined above
    cliente_activo:
        pandas dataframe with active customers for period
    date:
        period to process
    parameters:
        set of project parameters defined in ``parameters.yml``
    Returns
    -------
    pd.DataFrame
        pandas dataframe with xsell target for period
    """

    # Initialize logger
    log = initialize_logger()

    table_name = "target_xsell"
    write_to_parquet = parameters["write_to_parquet"]
    overwrite = parameters["targets"][table_name]["overwrite"]
    end_date = str(parameters["end_date"])

    # Check if target can be created (date + max window < end_date)
    target_parameters = parameters["targets"][table_name]
    max_window = max([target_parameters[x] for x in target_parameters.keys() if x.endswith("window")])
    upper_bound = (pd.to_datetime(date) + timedelta(days=max_window)).strftime("%Y%m%d")
    previous_sunday = dt.today() - timedelta(days=dt.today().weekday()+1)
    
    if pd.to_datetime(upper_bound, format="%Y%m%d") > previous_sunday:
        log.info(f"Cannot create xsell target for {date}: Not enough future information")
        return None

    # Compare with what is already processed
    path = f"{parameters['paths']['target_path']}{table_name}/"
    os.makedirs(path, exist_ok=True)
    processed_dates = os.listdir(path)
    match = [file for file in processed_dates if str(date) in file]

    if len(match) > 0 and overwrite is False:
        # If table is found, read parquet:
        log.info(f"Reading {match[0]} table")
        target = pd.read_parquet(path + match[0], engine="pyarrow")

    else:
        start_date = date
        end_date = (pd.to_datetime(date) + timedelta(
            days=parameters["targets"][table_name]["calculation_window"])).strftime("%Y%m%d")
        cancel_end_date = (pd.to_datetime(date) + timedelta(
            days=parameters["targets"][table_name]["activation_window"])).strftime("%Y%m%d")

        # Load data for required period
        df_activaciones = activaciones_premium.filter_by(date=[start_date,
                                                               end_date], target=True)
        log.info(f"Read {df_activaciones.shape[0]} activations")
        df_reconexiones = reconexiones_basicos.filter_by(date=[start_date,
                                                               end_date], target=True)
        log.info(f"Read {df_reconexiones.shape[0]} reconnections")
        df_cancelaciones = cancelaciones_premium.filter_by(date=[start_date,
                                                                 cancel_end_date], target=True)
        log.info(f"Read {df_cancelaciones.shape[0]} cancelations")

        # get EoP active clients from previous period to exclude new clients
        prev_period = get_previous_month(start_date)
        df_clientes = cliente_activo

        log.info(f"Read {df_clientes.shape[0]} clients")

        df_activaciones[vars_to_string] = df_activaciones[vars_to_string].astype(str)
        df_reconexiones[vars_to_string] = df_reconexiones[vars_to_string].astype(str)
        df_cancelaciones[vars_to_string] = df_cancelaciones[vars_to_string].astype(str)

        df_activaciones["FECHA"] = df_activaciones["FECHA"].dt.normalize()
        df_reconexiones["FECHA"] = df_reconexiones["FECHA"].dt.normalize()

        if pd.to_datetime(cancel_end_date) > pd.to_datetime(end_date):
            df_cancelaciones["FECHA"] = df_cancelaciones["FECHA"].dt.normalize()

        # 1. Calculate premium product activations in current period
        # merge and keep outer join
        cp_xsells_multi = pd.merge(df_activaciones,
                                   df_reconexiones,
                                   on=vars_to_merge,
                                   how="left"
                                   )

        # keep only customer that are not in both
        cp_xsells_multi["FLAG_ACTIVATION_PREMIUM"] = np.where(cp_xsells_multi["DATE_EXP_y"].isna(), 1, 0)
        cp_xsells_multi = cp_xsells_multi[cp_xsells_multi["FLAG_ACTIVATION_PREMIUM"] == 1]
        cp_xsells_multi = drop_extra_rename_remaining(cp_xsells_multi,
                                                      suffix_extra="_y",
                                                      suffix_remaining="_x",
                                                      suffix_new_name=""
                                                      )

        # keep only last event of xsell in period of interest
        df_cp_xsells = cp_xsells_multi.sort_values(["CUSTOMER_ID", "PRODUCTO_ID", "FECHA"]
                                                   ).drop_duplicates(subset=["CUSTOMER_ID", "PRODUCTO_ID"],
                                                                     keep="last")
        if pd.to_datetime(cancel_end_date) > pd.to_datetime(end_date):
            df_cp_xsells_cancels = pd.merge(df_cp_xsells,
                                            df_cancelaciones,
                                            on=["CUSTOMER_ID", "PRODUCTO_ID"],
                                            how="left",
                                            validate="1:m"
                                            )

            # check time difference between xsell and product cancelation
            df_cp_xsells_cancels["FECHA_DIFF"] = (df_cp_xsells_cancels["FECHA_y"] - df_cp_xsells_cancels[
                "FECHA_x"]) / np.timedelta64(1, "D")
            df_cp_xsells_cancels = drop_extra_rename_remaining(df_cp_xsells_cancels,
                                                               suffix_extra="_y",
                                                               suffix_remaining="_x",
                                                               suffix_new_name=""
                                                               )
            mask_cancels_before_buying = (df_cp_xsells_cancels["FECHA_DIFF"] < 0)
            mask_cancels_before_activation_window = (df_cp_xsells_cancels["FECHA_DIFF"] >= 0) & \
                                                    (df_cp_xsells_cancels["FECHA_DIFF"] <=
                                                     parameters["targets"]["target_xsell"]["activation_window"])
            df_cp_xsells_cancels["TARGET"] = np.where(
                mask_cancels_before_buying | mask_cancels_before_activation_window, 0, 1)
        else:
            df_cp_xsells_cancels = df_cp_xsells.copy()
            df_cp_xsells_cancels["TARGET"] = np.where(df_cp_xsells_cancels["FLAG_ACTIVATION_PREMIUM"] == 1, 1, 0)

        # group target products into super category (fox, hbo, adultos) to create target variable
        cp_xsells_final = df_cp_xsells_cancels.loc[df_cp_xsells_cancels["TARGET"] == 1, \
                                                   ["CUSTOMER_ID", "PRODUCTO_ID", "PRODUCTO", "TARGET", "FECHA"]]
        condlist = [cp_xsells_final["PRODUCTO_ID"].isin(parameters["targets"][table_name]["xsell_products"]["fox"]),
                    cp_xsells_final["PRODUCTO_ID"].isin(parameters["targets"][table_name]["xsell_products"]["hbo"]),
                    cp_xsells_final["PRODUCTO_ID"].isin(parameters["targets"][table_name]["xsell_products"]["adultos"])]

        # Impute product
        choicelist = ["FOX", "HBO", "ADULTOS"]
        cp_xsells_final["TARGET_PRODUCT"] = np.select(condlist, choicelist, default="error")

        agg_dict = {"TARGET": "max",
                    "FECHA": "max",
                    "PRODUCTO_ID": "max"}
        cp_xsells_final = cp_xsells_final.groupby(["CUSTOMER_ID", "TARGET_PRODUCT"]).agg(agg_dict).reset_index()

        target = pd.merge(df_clientes,
                          cp_xsells_final,
                          on="CUSTOMER_ID",
                          how="outer",
                          validate="1:m",
                          indicator=True)

        target["DATE_EXP"] = prev_period
        target["TARGET"].fillna(0, inplace=True)
        target["TARGET_PRODUCT"].fillna("NO_COMPRA", inplace=True)
        target["PRODUCTO_ID"].fillna("NO_COMPRA", inplace=True)
        target["TARGET"] = target["TARGET"].astype(np.int32)
        target.rename({"FECHA": "FECHA_TARGET"}, inplace=True)
        target["DATE_CALC"] = date

        if write_to_parquet:
            file = f"{parameters['paths']['target_path']}{table_name}/{table_name}_{date}.parquet"
            target.to_parquet(file, engine="pyarrow")

        # Return
        log.info(
            f"""Exporting target for period {start_date} and rate {
            np.round(100 * target[target['TARGET'] == 1]['CUSTOMER_ID'].nunique() / target['CUSTOMER_ID'].nunique(), 2)
            }%""")

    return target