In [105]:
# Load kedro environment (not needed in .py)
from pathlib import Path
from kedro.framework.context import load_context

# Load a context to be able to work in the notebook
#current_dir = Path.cwd()
current_dir = Path("/u01/share/cesar/aa_engine_uy/notebooks/")
proj_path = current_dir.parent
context = load_context(proj_path)
catalog = context.catalog
credentials = context.config_loader.get("credentials*","credentials*/**")
parameters = context.config_loader.get("parameters*","parameters*/**")
from aa_engine_pkg.assets.utils import *
from aa_engine_pkg.assets.core.data.kedro.catalog_expansion.partitioned_sql import SQLPartitionedDataSet

In [106]:
upgrades_basicos=catalog.load('upgrades_basicos')
eop=catalog.load("eop")#catalog.load("eop")
cliente_activo=catalog.load("cliente_activo")
agendas_basicos=catalog.load("agendas_basicos")

2021-03-22 15:19:05,687 - kedro.io.data_catalog - INFO - Loading data from `upgrades_basicos` (SQLPartitionedDataSet)...
2021-03-22 15:19:05,689 - kedro.io.data_catalog - INFO - Loading data from `eop` (SQLPartitionedDataSet)...
2021-03-22 15:19:05,690 - kedro.io.data_catalog - INFO - Loading data from `cliente_activo` (SQLPartitionedDataSet)...
2021-03-22 15:19:05,691 - kedro.io.data_catalog - INFO - Loading data from `agendas_basicos` (SQLPartitionedDataSet)...


In [93]:
def create_cliente_activo(cliente_activo: SQLPartitionedDataSet,
                          date: str) -> pd.DataFrame:
    """Creates master table with features related to EoP state of customers for one period of data

    Parameters
    ----------
    cliente_activo:
        dataset defined in ´catalog.yml´ - list of active customers at EoP for the given period
    date:
        period to process
    
    Returns
    -------
        Mastertable with information of clientes at EoP
    """

    # Initialize logger
    log = initialize_logger()

    # Load active clientes for period
    log.info(f"Creating cliente_activo...")
    period_to_load = get_previous_month(date)
    df_clientes_activos = cliente_activo.filter_by(date=period_to_load)

    # Return
    return df_clientes_activos

In [246]:
def create_target_upsell(upgrades_basicos: SQLPartitionedDataSet,
                         eop: SQLPartitionedDataSet,
                         agendas_basicos: SQLPartitionedDataSet,
                         cliente_activo: pd.DataFrame,
                         parameters: Dict,
                         date: str) -> pd.DataFrame:
    """Function that takes care of generating the target feature for the up-sell model
    Up-selling includes:
        - Switching to a higher level of service on the same tecnology (e.g., from Silver SD to Gold SD)

    Target definition:
        - Existing customer switches service.
        - Stays with new product for at least 84 days
        - Doesn't have an agenda in the lat 28 days

    Target methodology:
        0. Definition of product ranking (update as needed)
        1. Loading of customer base (e.g., all post-paid customers in Uruguay)
        2. Loading of upgrade events (108) for period of interest (calculation_window)
        3. Loading of events (107,108,133,142) to detect customers that switch products for period of
        interest (activation_window)
        4. Loading of event (100108) for period (date - 28 days, date)
        5. For customers that have an event (108) in the calculation_window, compare previous service to new one to determine if it
    is an upgrade using the product ranking.
        6. For said customers in (5), check if another event happens in the activation_window after the 108 event. If it does not happen, then the
    customers are target for the model.

    Parameters
    ----------
    upgrades_basicos: 
        dataset defined in ``catalog_raw.yml`` with raw data information related to upgrades of programming service products
    eop:
        dataset defined in ``catalog_raw.yml`` with raw data information related to the client's EoP state
    date:
        period to process
    parameters:
        set of project parameters defined in ``parameters.yml``
    
    Returns
    -------
    pd.DataFrame
        Master table with up-sell target feature for one period (date+1; date+calculation_window)
    """
# Initialize logger
    log = initialize_logger()
    
    table_name = "target_upsell"
    write_to_parquet = parameters["write_to_parquet"]
    overwrite = parameters["targets"][table_name]["overwrite"]
    end_date = str(parameters["end_date"])
    log.info(f"Start the process of create upsell target for {date}")
    
    # Check if target can be created (date + max window < previous sunday)
    target_parameters = parameters["targets"][table_name]
    max_window = max([target_parameters[x] for x in target_parameters.keys() if x.endswith("window")])
    upper_bound = (pd.to_datetime(date) + timedelta(days=max_window)).strftime("%Y%m%d")
    previous_sunday = dt.today() - timedelta(days=dt.today().weekday() + 1)
    
    if pd.to_datetime(upper_bound, format="%Y%m%d") > previous_sunday:
        log.info(f"Cannot create upsell target for {date}: Not enough future information")
        return None

    # Compare with what is already processed
    path = f"{parameters['paths']['target_path']}{table_name}/"
    os.makedirs(path, exist_ok=True)
    processed_dates = os.listdir(path)
    match = [file for file in processed_dates if str(date) in file]
    if len(match) > 0 and overwrite is False:
        # If table is found, read parquet:
        log.info(f"Reading {match[0]} table")
        df_final = pd.read_parquet(path + match[0], engine="pyarrow")

    else:
        product_rank = parameters["targets"]["target_upsell"]["upsell_products_rank"]
        products_allowed_to_move=parameters["targets"]["target_upsell"]["products_allowed_to_move"]
        product_tecnology=parameters["targets"]["target_upsell"]["product_and_tecnology"]
        product_tecnology = {value : key for (key, value) in product_tecnology.items()}

        start_date = date
        end_date = (pd.to_datetime(date) + timedelta(days=parameters["targets"]["target_upsell"]["calculation_window"])
                    ).strftime("%Y%m%d")
        cancel_date = (pd.to_datetime(date) + timedelta(days=parameters["targets"]["target_upsell"]["activation_window"])
                       ).strftime("%Y%m%d")

        end_date_upgrades = (pd.to_datetime(date) + timedelta(days=2*parameters["targets"]["target_upsell"]["calculation_window"])).strftime("%Y%m%d")

        start_date_agendas = (pd.to_datetime(start_date)-timedelta(days=28)).strftime("%Y%m%d")

        # Get EoP active clients from previous period to exclude new clients
        period_to_load = get_previous_month(start_date)
        df_clientes = eop.filter_by(condition=f"PRC_TIPO_ID = 3 AND PRC_CODIGO  IN {tuple(products_allowed_to_move)}",
                                     #base of customers that can made an upgrade
                                     date=period_to_load)
        
        # Get the user tecnology
        df_clientes["tecno_eop"]=df_clientes["PRC_CODIGO"].map(product_tecnology)
        df_clientes["tecno_eop"]=[y.split(" ")[2] for x,y in enumerate(df_clientes["tecno_eop"])]

        # Get data for target creation
        moves=tuple([ value[0] for (key,value) in product_rank.items()])
        df_upgrades = upgrades_basicos.filter_by(condition=f"EVENTO_ID = 108 AND PRODUCTO_ID IN {moves}",
                                                 date=[start_date, end_date_upgrades],
                                                 target=True)
        #Tecnology of the basic product after the movement.
        df_upgrades["tecno_up"]=[y.split(" ")[2] for x,y in enumerate(df_upgrades.PRODUCTO_ID.map(product_tecnology))]

        #CANCELATIONS
        df_cancelations = upgrades_basicos.filter_by(date=[start_date,
                                                           cancel_date],
                                                     target=True)

        # Keep only first move by CUSTOMER, PRODUCT
        df_cancelations.sort_values(["CUSTOMER_ID", "PRODUCTO_ID", "FECHA"], ascending=[False, False, True], inplace=True)
        df_cancelations.drop_duplicates(subset=["CUSTOMER_ID", "PRODUCTO_ID"], keep="last", inplace=True)

        #AGENDA
        df_agenda = agendas_basicos.filter_by(date=[start_date_agendas, end_date])
        df_agenda.rename(columns={"FECHA":"FECHA_AGENDA"},inplace=True)

        df_clientes_upgrades = pd.merge(
            df_clientes[["CUSTOMER_ID", "PRC_CODIGO","tecno_eop"]],
            df_upgrades[["CUSTOMER_ID", "PRODUCTO_ID", "FECHA","tecno_up"]],
            on=["CUSTOMER_ID"],
            how="inner",
            validate="1:m")

        df_clientes_upgrades.sort_values(["CUSTOMER_ID", "PRC_CODIGO", "FECHA"], ascending=[False, False, True],
                                                 inplace=True)
        df_clientes_upgrades.drop_duplicates(subset=["CUSTOMER_ID", "PRC_CODIGO"], keep="last", inplace=True)

        del df_upgrades;
        gc.collect()
        
        #RANKING products
        df_product_rank = pd.DataFrame(product_rank.items(), columns=["PRODUCTO_RANK_INI", "PRC_CODIGO"])
        df_product_rank = df_product_rank.explode("PRC_CODIGO")

        # Rank initial product (PRC_CODIGO) from EOP table
        df_clientes_upgrades_ranked = pd.merge(df_clientes_upgrades,
                                               df_product_rank,
                                               on="PRC_CODIGO",
                                               how="left",
                                               validate="m:1")

        del df_clientes_upgrades
        gc.collect()

        # Rank last product (PRODUCTO_ID) from plan_evento table
        df_product_rank.rename(columns={"PRC_CODIGO": "PRODUCTO_ID",
                                        "PRODUCTO_RANK_INI": "PRODUCTO_RANK_END"}, inplace=True)
        df_clientes_upgrades_ranked = pd.merge(df_clientes_upgrades_ranked,
                                               df_product_rank,
                                               on="PRODUCTO_ID",
                                               how="left",
                                               validate="m:1")

        #Solving the situation of MIX, this tecnology is SD
        df_clientes_upgrades_ranked["tecno_eop"]=np.where(df_clientes_upgrades_ranked.tecno_eop.isin(["MIX"]),"SD",df_clientes_upgrades_ranked.tecno_eop)
        df_clientes_upgrades_ranked["tecno_up"]=np.where(df_clientes_upgrades_ranked.tecno_up.isin(["MIX"]),"SD",df_clientes_upgrades_ranked.tecno_up)

        # Calculate target based on initial and end product plus tecnology
        df_clientes_upgrades_ranked["TARGET"] = np.where((df_clientes_upgrades_ranked["PRODUCTO_RANK_END"] > \
                                                         df_clientes_upgrades_ranked["PRODUCTO_RANK_INI"]) & ((df_clientes_upgrades_ranked["tecno_eop"] == \
                                                                                                               df_clientes_upgrades_ranked["tecno_up"])), 1, 0)


        # Merge with target df to check for activation period
        df_target = pd.merge(df_clientes_upgrades_ranked,
                             df_cancelations[["CUSTOMER_ID", "PRODUCTO_ID", "FECHA"]],
                             on=["CUSTOMER_ID", "PRODUCTO_ID"],
                             how="left",
                             validate="1:m")

        del df_clientes_upgrades_ranked, df_cancelations;
        gc.collect()

        # Compute time difference between events
        df_target["DATE_DIFF"] = (df_target["FECHA_y"] - df_target["FECHA_x"]) / np.timedelta64(1, "D")
        log.info(f" Number of events 108 ending as upgrades before product changes rule {df_target.TARGET.sum()}")

        df_target["TARGET"] = np.where((df_target["DATE_DIFF"] > 0) & \
                                       (df_target["DATE_DIFF"] <= parameters["targets"]["target_upsell"][
                                           "activation_window"]),
                                       0,
                                       df_target["TARGET"])
        df_target = drop_extra_rename_remaining(df_target)
        log.info(f" Number of events 108 ending as upgrades after tecnology changes rule {df_target.TARGET.sum()}")
        
        # quitar upgrades agendados el mes anterior
        df_final = pd.merge(df_target,
                        df_agenda,
                        left_on=['CUSTOMER_ID'],
                        right_on=['CUSTOMER_ID'],
                        how='left')
        # quitar agendas mes anterior
        upgrades_agendados_antes = df_final[(df_final.FECHA_AGENDA)<(pd.to_datetime(start_date)).strftime("%Y%m%d")]
        df_final = df_final.drop(upgrades_agendados_antes.index)
        log.info(f" Number of events 108 ending as upgrades after 100108 rule {df_final.TARGET.sum()}")
        
        # quitar upgrades mes futuro no agendados este mes
        upgrades_futuros = df_final[df_final.FECHA>(pd.to_datetime(start_date)+timedelta(days=28)).strftime("%Y%m%d")]
        df_final = df_final.drop(upgrades_futuros[upgrades_futuros.FECHA_AGENDA.isna()].index)

        df_final.sort_values(["CUSTOMER_ID", "TARGET"], ascending=False,inplace=True)
        df_final.drop_duplicates(subset=["CUSTOMER_ID"], keep="first", inplace=True)
        
        # Merge back to EOP
        df_final = pd.merge(cliente_activo[["CUSTOMER_ID"]],
                            df_final[["CUSTOMER_ID", "TARGET", "FECHA", "PRC_CODIGO"]],
                            on="CUSTOMER_ID",
                            how="left",
                            validate="1:1")
        
        target=df_final.loc[df_final.CUSTOMER_ID.isin(cliente_activo.CUSTOMER_ID.unique())]

        del df_target, df_final;
        gc.collect()

        target["TARGET"].fillna(0, inplace=True)
        target["TARGET"] =  target["TARGET"].astype(np.int32)
        target["DATE_EXP"] = period_to_load
        target["DATE_CALC"] = date
        target.rename({"FECHA": "FECHA_TARGET"}, inplace=True)
        log.info(f"should be equals| {len(target.CUSTOMER_ID.unique())}=={target.shape[0]}")
        if write_to_parquet:
            file = f"{parameters['paths']['target_path']}{table_name}/{table_name}_{date}.parquet"
            #file = f"/u01/share/cesar/others_uy/target_corregido/{table_name}_{date}.parquet"
            target.to_parquet(file, engine="pyarrow")

        # Return
        log.info(
            f"""Exporting target for period {start_date} and rate {
            np.round(100 * target[target['TARGET'] == 1]['CUSTOMER_ID'].nunique() / target['CUSTOMER_ID'].nunique(), 2)
            }%""")

    return target

In [247]:
periods=parameters['models']['upsell']['binary']['train_periods']+parameters['models']['upsell']['binary']['test_periods']+parameters['models']['upsell']['binary']['dev_periods']

In [248]:
for date in periods:
    cliente_activo_df=create_cliente_activo(cliente_activo,date)
    create_target_upsell(upgrades_basicos,
                         eop,
                         agendas_basicos,
                         cliente_activo_df,
                         parameters,
                         date)

2021-03-22 19:54:19,943 - aa_engine_pkg.assets.utils.utilities - INFO - Creating cliente_activo...
select distinct CUSTOMER_ID from stg_uy_customer_status where UPPER(STATUS) LIKE '%ACTIVO%' and DATE_EXP = 201811
2021-03-22 19:54:23,255 - aa_engine_pkg.assets.utils.utilities - INFO - Start the process of create upsell target for 20181203
select DATE_EXP, CUSTOMER_ID, PRC_CODIGO, PRODUCTO, PRC_TIPO_ID, TEC_ID, MOP, TENURE from stg_uy_eop_customer where DATE_EXP = 201811 and PRC_TIPO_ID = 3 AND PRC_CODIGO  IN (135, 216, 217, 147, 169, 132)
select * from stg_uy_plan_evento where PROD_CATEGORY_ID = 3 and EVENTO_ID IN (107,108,133,142) and FECHA > to_date('20181203235900', 'yyyymmddhh24miss') and FECHA <= to_date('20190128235900', 'yyyymmddhh24miss') and EVENTO_ID = 108 AND PRODUCTO_ID IN (135, 216, 217, 147, 169, 132, 139)
select * from stg_uy_plan_evento where PROD_CATEGORY_ID = 3 and EVENTO_ID IN (107,108,133,142) and FECHA > to_date('20181203235900', 'yyyymmddhh24miss') and FECHA <= to_

### check targets

In [193]:
df=list()
for date in test_periods:
    aux=pd.read_parquet(f"/u01/share/cesar/others_uy/target_corregido/target_upsell_{date}.parquet")
    df.append(aux)
new_target=pd.concat(df)


In [194]:
new_target.head()

Unnamed: 0,CUSTOMER_ID,TARGET,FECHA,PRC_CODIGO,DATE_EXP,DATE_CALC
0,2038798,0,NaT,,202005,20200615
1,2040174,0,NaT,,202005,20200615
2,2066350,0,NaT,,202005,20200615
3,2100151,0,NaT,,202005,20200615
4,2106243,0,NaT,,202005,20200615


In [195]:
#old target+master
model = "upsell"
model_type = "binary"
data=list()
for date in test_periods:
    file_master = f"{parameters['paths']['master_path']}master_total/master_total_{date}.parquet"
    df = pd.read_parquet(file_master)

    file_target = f"{parameters['paths']['target_path']}target_{model}/target_{model}_{date}.parquet"
    df_target = pd.read_parquet(file_target)

    data.append(df.merge(df_target[["CUSTOMER_ID","TARGET"]],on="CUSTOMER_ID",how="left"))
data=pd.concat(data)


In [196]:
data.head()

Unnamed: 0,CUSTOMER_ID,AQY_ARPU_nanmean_1,AQY_FLAG_DISCOUNT_nansum_1,AQY_PROP_DISCOUNT_nanmean_1,AQY_ARPU_nanmean_3,AQY_FLAG_DISCOUNT_nansum_3,AQY_PROP_DISCOUNT_nanmean_3,AQY_ARPU_nanmean_6,AQY_FLAG_DISCOUNT_nansum_6,AQY_PROP_DISCOUNT_nanmean_6,...,EVF_BALANCE_BROUGHT_FORWARD_nanmax_5,EVF_BALANCE_BROUGHT_FORWARD_nanmin_5,EVF_MONTO_nanmean_6,EVF_MONTO_nanmax_6,EVF_MONTO_nanmin_6,EVF_BALANCE_BROUGHT_FORWARD_nanmean_6,EVF_BALANCE_BROUGHT_FORWARD_nanmax_6,EVF_BALANCE_BROUGHT_FORWARD_nanmin_6,DATE_CALC,TARGET
0,53238376,0.524253,0.0,0.327524,0.506094,0.0,0.341268,0.486441,0.0,0.356955,...,0.45466,0.44583,0.450925,0.468702,0.444516,0.449618,0.45466,0.44583,20200615,
1,53240440,0.55448,0.0,0.309646,0.5346,0.0,0.325359,0.514009,0.0,0.342429,...,0.834607,0.458274,0.536389,0.835313,0.453753,0.609697,0.834607,0.458274,20200615,
2,53241310,0.188885,1.0,0.835124,0.162252,3.0,0.867505,0.149154,5.0,0.888875,...,0.408479,0.311271,0.342087,0.404609,0.323101,0.342377,0.408479,0.311271,20200615,
3,53327082,0.890481,0.0,0.090533,0.593946,2.0,0.532049,0.510334,4.0,0.670011,...,1.0,0.351329,0.640741,1.0,0.349069,0.573314,1.0,0.351329,20200615,
4,53327959,0.149078,1.0,0.754694,0.204858,3.0,0.674095,0.218532,5.0,0.672981,...,0.494082,0.360626,0.359101,0.366789,0.349304,0.391581,0.494082,0.360626,20200615,0.0


In [197]:
data=data.merge(new_target,on=["CUSTOMER_ID","DATE_CALC"], how="left")

In [198]:
data.head()

Unnamed: 0,CUSTOMER_ID,AQY_ARPU_nanmean_1,AQY_FLAG_DISCOUNT_nansum_1,AQY_PROP_DISCOUNT_nanmean_1,AQY_ARPU_nanmean_3,AQY_FLAG_DISCOUNT_nansum_3,AQY_PROP_DISCOUNT_nanmean_3,AQY_ARPU_nanmean_6,AQY_FLAG_DISCOUNT_nansum_6,AQY_PROP_DISCOUNT_nanmean_6,...,EVF_MONTO_nanmin_6,EVF_BALANCE_BROUGHT_FORWARD_nanmean_6,EVF_BALANCE_BROUGHT_FORWARD_nanmax_6,EVF_BALANCE_BROUGHT_FORWARD_nanmin_6,DATE_CALC,TARGET_x,TARGET_y,FECHA,PRC_CODIGO,DATE_EXP
0,53238376,0.524253,0.0,0.327524,0.506094,0.0,0.341268,0.486441,0.0,0.356955,...,0.444516,0.449618,0.45466,0.44583,20200615,,0.0,NaT,,202005
1,53240440,0.55448,0.0,0.309646,0.5346,0.0,0.325359,0.514009,0.0,0.342429,...,0.453753,0.609697,0.834607,0.458274,20200615,,0.0,NaT,,202005
2,53241310,0.188885,1.0,0.835124,0.162252,3.0,0.867505,0.149154,5.0,0.888875,...,0.323101,0.342377,0.408479,0.311271,20200615,,0.0,NaT,,202005
3,53327082,0.890481,0.0,0.090533,0.593946,2.0,0.532049,0.510334,4.0,0.670011,...,0.349069,0.573314,1.0,0.351329,20200615,,0.0,NaT,,202005
4,53327959,0.149078,1.0,0.754694,0.204858,3.0,0.674095,0.218532,5.0,0.672981,...,0.349304,0.391581,0.494082,0.360626,20200615,0.0,0.0,NaT,,202005


In [199]:
data.TARGET_x.fillna(0,inplace=True)
data.TARGET_y.fillna(0,inplace=True)

In [224]:
sum((data.TARGET_x==1) &(data.TARGET_y==0))

63

In [209]:
#Scoring
model_path = parameters["paths"]["model_path"]
model_format = parameters["models"][model][model_type]["model_to_load"]
model_folder = model_path + model_format + f"_{model}_{model_type}_model/"
lgbmodel = lgbm.Booster(model_file=model_folder + model_format + ".txt")
fill_na_with = np.nan
score_var = "SCORE"
vars_to_model = lgbmodel.feature_name()  # get features used in model training
scoring_data=list()
for date in test_periods:
    df_score = data.loc[data.DATE_CALC==date]
    df_score = add_selected_variables(df_score, vars_to_model, fill_na_with)
    df_score.loc[:, score_var] = lgbmodel.predict(df_score[vars_to_model])
    df_score.loc[:, score_var] = df_score[score_var] * 100
    df_score.loc[:,"decil"]=pd.qcut(df_score[score_var], 10, labels=sorted(np.arange(1, 10 + 1), reverse=True), precision=5,duplicates="drop")
    scoring_data.append(df_score)
scoring_data=pd.concat(scoring_data)

['EVE_CAT_COMBINED_100133_1_nansum_21', 'CAM_OFERTA_COMBINADA_RENT_OTROS_nansum_21', 'MAN_ESTADOWO_ID_AG_nansum_14', 'EVE_CAT_COMBINED_123_1_nansum_252', 'EVE_CAT_COMBINED_123_1_nansum_168', 'EVE_CAT_COMBINED_100133_1_nansum_336', 'CAM_OFERTA_COMBINADA_UPGR_BASICO_nansum_84', 'EVE_CAT_COMBINED_100133_1_nansum_168', 'CAM_OFERTA_COMBINADA_UPGR_BASICO_nansum_28', 'EVE_CAT_COMBINED_123_1_nansum_14', 'MAN_ESTADOWO_ID_AG_nansum_84', 'CAM_OFERTA_COMBINADA_UPGR_BASICO_nansum_21', 'CAM_OFERTA_COMBINADA_UPGR_BASICO_nansum_14', 'CAM_OFERTA_COMBINADA_RENT_OTROS_nansum_252', 'CAM_OFERTA_COMBINADA_RENT_OTROS_nansum_28', 'CAM_OFERTA_COMBINADA_UPGR_BASICO_nansum_252', 'CAM_OFERTA_COMBINADA_RENT_OTROS_nansum_336', 'EVE_CAT_COMBINED_123_1_nansum_21', 'EVE_CAT_COMBINED_100133_1_nansum_28', 'MAN_ESTADOWO_ID_AG_nansum_168', 'CAM_OFERTA_COMBINADA_UPGR_BASICO_nansum_336', 'EVE_CAT_COMBINED_123_1_nansum_84', 'MAN_ESTADOWO_ID_AG_nansum_28', 'CAM_OFERTA_COMBINADA_UPGR_BASICO_nansum_168', 'CAM_OFERTA_COMBINADA_R

In [210]:
scoring_data.head()

Unnamed: 0,CUSTOMER_ID,AQY_ARPU_nanmean_1,AQY_FLAG_DISCOUNT_nansum_1,AQY_PROP_DISCOUNT_nanmean_1,AQY_ARPU_nanmean_3,AQY_FLAG_DISCOUNT_nansum_3,AQY_PROP_DISCOUNT_nanmean_3,AQY_ARPU_nanmean_6,AQY_FLAG_DISCOUNT_nansum_6,AQY_PROP_DISCOUNT_nanmean_6,...,CAM_OFERTA_COMBINADA_RENT_OTROS_nansum_84,EVE_CAT_COMBINED_123_1_nansum_28,CAM_OFERTA_COMBINADA_RENT_OTROS_nansum_14,EVE_CAT_COMBINED_123_1_nansum_336,EVE_CAT_COMBINED_100133_1_nansum_84,CAM_OFERTA_COMBINADA_RENT_OTROS_nansum_168,EVE_CAT_COMBINED_100133_1_nansum_252,EVE_CAT_COMBINED_100133_1_nansum_14,SCORE,decil
0,53238376,0.524253,0.0,0.327524,0.506094,0.0,0.341268,0.486441,0.0,0.356955,...,,,,,,,,,0.225786,3
1,53240440,0.55448,0.0,0.309646,0.5346,0.0,0.325359,0.514009,0.0,0.342429,...,,,,,,,,,0.071455,8
2,53241310,0.188885,1.0,0.835124,0.162252,3.0,0.867505,0.149154,5.0,0.888875,...,,,,,,,,,0.134119,5
3,53327082,0.890481,0.0,0.090533,0.593946,2.0,0.532049,0.510334,4.0,0.670011,...,,,,,,,,,0.172675,4
4,53327959,0.149078,1.0,0.754694,0.204858,3.0,0.674095,0.218532,5.0,0.672981,...,,,,,,,,,0.040074,10


In [218]:
scoring_data["diff_target"]=np.where((scoring_data.TARGET_x==1) & (scoring_data.TARGET_y==0),1,0)

In [221]:
scoring_data.groupby(['decil']).agg({'CUSTOMER_ID':'count','TARGET_x':'sum','diff_target':'sum'})

Unnamed: 0_level_0,CUSTOMER_ID,TARGET_x,diff_target
decil,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,46624,3.0,0
9,46620,4.0,0
8,46621,7.0,0
7,46620,11.0,1
6,46622,9.0,2
5,46620,13.0,2
4,46620,24.0,1
3,46621,39.0,1
2,46621,123.0,8
1,46623,422.0,48


In [95]:
# Initialize logger
log = initialize_logger()

table_name = "target_upsell"
write_to_parquet = parameters["write_to_parquet"]
overwrite = parameters["targets"][table_name]["overwrite"]
end_date = str(parameters["end_date"])
log.info(f"Start the process of create upsell target for {date}")

# Check if target can be created (date + max window < previous sunday)
target_parameters = parameters["targets"][table_name]
max_window = max([target_parameters[x] for x in target_parameters.keys() if x.endswith("window")])
upper_bound = (pd.to_datetime(date) + timedelta(days=max_window)).strftime("%Y%m%d")
previous_sunday = dt.today() - timedelta(days=dt.today().weekday() + 1)

2021-03-22 14:09:46,221 - aa_engine_pkg.assets.utils.utilities - INFO - Start the process of create upsell target for 20200101


In [96]:
if pd.to_datetime(upper_bound, format="%Y%m%d") > previous_sunday:
        log.info(f"Cannot create upsell target for {date}: Not enough future information")

In [97]:
# Compare with what is already processed
path = f"{parameters['paths']['target_path']}{table_name}/"
os.makedirs(path, exist_ok=True)
processed_dates = os.listdir(path)
match = [file for file in processed_dates if str(date) in file]
if len(match) > 0 and overwrite is False:
    # If table is found, read parquet:
    log.info(f"Reading {match[0]} table")
    df_final = pd.read_parquet(path + match[0], engine="pyarrow")

In [98]:
product_rank = parameters["targets"]["target_upsell"]["upsell_products_rank"]
products_allowed_to_move=parameters["targets"]["target_upsell"]["products_allowed_to_move"]
product_tecnology=parameters["targets"]["target_upsell"]["product_and_tecnology"]
product_tecnology = {value : key for (key, value) in product_tecnology.items()}

start_date = date
end_date = (pd.to_datetime(date) + timedelta(days=parameters["targets"]["target_upsell"]["calculation_window"])
            ).strftime("%Y%m%d")
cancel_date = (pd.to_datetime(date) + timedelta(days=parameters["targets"]["target_upsell"]["activation_window"])
               ).strftime("%Y%m%d")

end_date_upgrades = (pd.to_datetime(date) + timedelta(days=2*parameters["targets"]["target_upsell"]["calculation_window"])).strftime("%Y%m%d")

start_date_agendas = (pd.to_datetime(start_date)-timedelta(days=28)).strftime("%Y%m%d")

In [99]:
# Get EoP active clients from previous period to exclude new clients
period_to_load = get_previous_month(start_date)
df_clientes = eop.filter_by(condition=f"PRC_TIPO_ID = 3 AND PRC_CODIGO  IN {tuple(products_allowed_to_move)}",
                             #base of customers that can made an upgrade
                             date=period_to_load)

select DATE_EXP, CUSTOMER_ID, PRC_CODIGO, PRODUCTO, PRC_TIPO_ID, TEC_ID, MOP, TENURE from stg_uy_eop_customer where DATE_EXP = 201912 and PRC_TIPO_ID = 3 AND PRC_CODIGO  IN (135, 216, 217, 147, 169, 132)


In [100]:
# Get the user tecnology
df_clientes["tecno_eop"]=df_clientes["PRC_CODIGO"].map(product_tecnology)
df_clientes["tecno_eop"]=[y.split(" ")[2] for x,y in enumerate(df_clientes["tecno_eop"])]

# Get data for target creation
moves=tuple([ value[0] for (key,value) in product_rank.items()])
df_upgrades = upgrades_basicos.filter_by(condition=f"EVENTO_ID = 108 AND PRODUCTO_ID IN {moves}",
                                         date=[start_date, end_date_upgrades],
                                         target=True)
#Tecnology of the basic product after the movement.
df_upgrades["tecno_up"]=[y.split(" ")[2] for x,y in enumerate(df_upgrades.PRODUCTO_ID.map(product_tecnology))]

#CANCELATIONS
df_cancelations = upgrades_basicos.filter_by(date=[start_date,
                                                   cancel_date],
                                             target=True)

# Keep only first move by CUSTOMER, PRODUCT
df_cancelations.sort_values(["CUSTOMER_ID", "PRODUCTO_ID", "FECHA"], ascending=[False, False, True], inplace=True)
df_cancelations.drop_duplicates(subset=["CUSTOMER_ID", "PRODUCTO_ID"], keep="last", inplace=True)

#AGENDA
df_agenda = agendas_basicos.filter_by(date=[start_date_agendas, end_date])
df_agenda.rename(columns={"FECHA":"FECHA_AGENDA"},inplace=True)

df_clientes_upgrades = pd.merge(
    df_clientes[["CUSTOMER_ID", "PRC_CODIGO","tecno_eop"]],
    df_upgrades[["CUSTOMER_ID", "PRODUCTO_ID", "FECHA","tecno_up"]],
    on=["CUSTOMER_ID"],
    how="inner",
    validate="1:m")

df_clientes_upgrades.sort_values(["CUSTOMER_ID", "PRC_CODIGO", "FECHA"], ascending=[False, False, True],
                                         inplace=True)
df_clientes_upgrades.drop_duplicates(subset=["CUSTOMER_ID", "PRC_CODIGO"], keep="last", inplace=True)
    
del df_upgrades;
gc.collect()

select * from stg_uy_plan_evento where PROD_CATEGORY_ID = 3 and EVENTO_ID IN (107,108,133,142) and FECHA > to_date('20200101235900', 'yyyymmddhh24miss') and FECHA <= to_date('20200226235900', 'yyyymmddhh24miss') and EVENTO_ID = 108 AND PRODUCTO_ID IN (135, 216, 217, 147, 169, 132, 139)
select * from stg_uy_plan_evento where PROD_CATEGORY_ID = 3 and EVENTO_ID IN (107,108,133,142) and FECHA > to_date('20200101235900', 'yyyymmddhh24miss') and FECHA <= to_date('20200428235900', 'yyyymmddhh24miss')
select CUSTOMER_ID, PRODUCTO_ID, PRODUCTO, TRUNC(FECHA) FECHA_AGENDA from stg_uy_plan_evento where PROD_CATEGORY_ID = 3 and EVENTO_ID=100108 and FECHA >= to_date('20191204', 'yyyymmdd') and FECHA < to_date('20200129', 'yyyymmdd')


1999

In [101]:
df_product_rank = pd.DataFrame(product_rank.items(), columns=["PRODUCTO_RANK_INI", "PRC_CODIGO"])
df_product_rank = df_product_rank.explode("PRC_CODIGO")

# Rank initial product (PRC_CODIGO) from EOP table
df_clientes_upgrades_ranked = pd.merge(df_clientes_upgrades,
                                       df_product_rank,
                                       on="PRC_CODIGO",
                                       how="left",
                                       validate="m:1")

del df_clientes_upgrades
gc.collect()

# Rank last product (PRODUCTO_ID) from plan_evento table
df_product_rank.rename(columns={"PRC_CODIGO": "PRODUCTO_ID",
                                "PRODUCTO_RANK_INI": "PRODUCTO_RANK_END"}, inplace=True)
df_clientes_upgrades_ranked = pd.merge(df_clientes_upgrades_ranked,
                                       df_product_rank,
                                       on="PRODUCTO_ID",
                                       how="left",
                                       validate="m:1")

#Solving the situation of MIX, this tecnology is SD
df_clientes_upgrades_ranked["tecno_eop"]=np.where(df_clientes_upgrades_ranked.tecno_eop.isin(["MIX"]),"SD",df_clientes_upgrades_ranked.tecno_eop)
df_clientes_upgrades_ranked["tecno_up"]=np.where(df_clientes_upgrades_ranked.tecno_up.isin(["MIX"]),"SD",df_clientes_upgrades_ranked.tecno_up)

# Calculate target based on initial and end product plus tecnology
df_clientes_upgrades_ranked["TARGET"] = np.where((df_clientes_upgrades_ranked["PRODUCTO_RANK_END"] > \
                                                 df_clientes_upgrades_ranked["PRODUCTO_RANK_INI"]) & ((df_clientes_upgrades_ranked["tecno_eop"] == \
                                                                                                       df_clientes_upgrades_ranked["tecno_up"])), 1, 0)


# Merge with target df to check for activation period
df_target = pd.merge(df_clientes_upgrades_ranked,
                     df_cancelations[["CUSTOMER_ID", "PRODUCTO_ID", "FECHA"]],
                     on=["CUSTOMER_ID", "PRODUCTO_ID"],
                     how="left",
                     validate="1:m")

del df_clientes_upgrades_ranked, df_cancelations;
gc.collect()

# Compute time difference between events
df_target["DATE_DIFF"] = (df_target["FECHA_y"] - df_target["FECHA_x"]) / np.timedelta64(1, "D")
log.info(f" Number of events 108 ending as upgrades before product changes rule {df_target.TARGET.sum()}")

df_target["TARGET"] = np.where((df_target["DATE_DIFF"] > 0) & \
                               (df_target["DATE_DIFF"] <= parameters["targets"]["target_upsell"][
                                   "activation_window"]),
                               0,
                               df_target["TARGET"])
df_target = drop_extra_rename_remaining(df_target)
log.info(f" Number of events 108 ending as upgrades after tecnology changes rule {df_target.TARGET.sum()}")

2021-03-22 14:09:54,151 - aa_engine_pkg.assets.utils.utilities - INFO -  Number of events 108 ending as upgrades before product changes rule 289
2021-03-22 14:09:54,157 - aa_engine_pkg.assets.utils.utilities - INFO -  Number of events 108 ending as upgrades after tecnology changes rule 270


In [102]:
# quitar upgrades agendados el mes anterior
df_final = pd.merge(df_target,
                df_agenda,
                left_on=['CUSTOMER_ID'],
                right_on=['CUSTOMER_ID'],
                how='left')
# quitar agendas mes anterior
upgrades_agendados_antes = df_final[(df_final.FECHA_AGENDA)<(pd.to_datetime(start_date)).strftime("%Y%m%d")]
df_final = df_final.drop(upgrades_agendados_antes.index)
log.info(f" Number of events 108 ending as upgrades after 100108 rule {df_final.TARGET.sum()}")

2021-03-22 14:09:56,521 - aa_engine_pkg.assets.utils.utilities - INFO -  Number of events 108 ending as upgrades after 100108 rule 263


In [36]:
# quitar upgrades mes futuro no agendados este mes
upgrades_futuros = df_final[df_final.FECHA>(pd.to_datetime(start_date)+timedelta(days=28)).strftime("%Y%m%d")]
df_final = df_final.drop(upgrades_futuros[upgrades_futuros.FECHA_AGENDA.isna()].index)

df_final.sort_values(["CUSTOMER_ID", "TARGET"], ascending=False,inplace=True)
df_final.drop_duplicates(subset=["CUSTOMER_ID"], keep="first", inplace=True)

In [37]:
df_final.head()

Unnamed: 0,CUSTOMER_ID,PRC_CODIGO,tecno_eop,PRODUCTO_ID_x,FECHA,tecno_up,PRODUCTO_RANK_INI,PRODUCTO_RANK_END,TARGET,DATE_DIFF,PRODUCTO_ID_y,PRODUCTO,FECHA_AGENDA
1,55182592,169,HD,139,2020-01-15 11:12:33,HD,5,7,1,0.0,,,NaT
4,54559026,135,SD,132,2020-01-27 13:30:50,SD,1,6,1,0.0,,,NaT
9,54516639,135,SD,132,2020-01-06 16:12:28,SD,1,6,1,0.0,,,NaT
12,54506936,135,SD,132,2020-01-25 15:06:23,SD,1,6,1,0.0,,,NaT
16,54432172,169,HD,139,2020-01-03 08:36:28,HD,5,7,1,0.0,,,NaT


In [38]:
# Merge back to EOP
df_final = pd.merge(cliente_activo_df[["CUSTOMER_ID"]],
                    df_final[["CUSTOMER_ID", "TARGET", "FECHA", "PRC_CODIGO"]],
                    on="CUSTOMER_ID",
                    how="left",
                    validate="1:1")

In [39]:
df_final["TARGET"].fillna(0, inplace=True)

In [40]:
target=df_final.loc[df_final.CUSTOMER_ID.isin(cliente_activo_df.CUSTOMER_ID.unique())]

#del df_target, df_final;
#gc.collect()

target["TARGET"].fillna(0, inplace=True)
target["TARGET"] =  target["TARGET"].astype(np.int32)
target["DATE_EXP"] = period_to_load
target["DATE_CALC"] = date
target.rename({"FECHA": "FECHA_TARGET"}, inplace=True)

In [41]:
target.TARGET.value_counts()

0    117042
1       121
Name: TARGET, dtype: int64

In [8]:
def create_target_upsell(upgrades_basicos: SQLPartitionedDataSet,
                         eop: SQLPartitionedDataSet,
                         agendas_basicos: SQLPartitionedDataSet,
                         cliente_activo: pd.DataFrame,
                         parameters: Dict,
                         date: str) -> pd.DataFrame:
    """Function that takes care of generating the target feature for the up-sell model
    Up-selling includes:
        - Switching to a higher level of service on the same tecnology (e.g., from Silver SD to Gold SD)

    Target definition:
        - Existing customer switches service.
        - Stays with new product for at least 84 days

    Target methodology:
        0. Definition of product ranking (update as needed)
        1. Loading of customer base (e.g., all post-paid customers in Colombia)
        2. Loading of upgrade events (108) for period of interest (calculation_window)
        3. Loading of events (107,108,133,142) to detect customers that switch products for period of
        interest (activation_window)
        4. For customers that have an event (108) in the calculation_window, compare previous service to new one to determine if it
    is an upgrade using the product ranking.
        5. For said customers in (4), check if another event happens in the activation_window after the 108 event. If it does not happen, then the
    customers are target for the model.

    Parameters
    ----------
    upgrades_basicos: 
        dataset defined in ``catalog_raw.yml`` with raw data information related to upgrades of programming service products
    eop:
        dataset defined in ``catalog_raw.yml`` with raw data information related to the client's EoP state
    date:
        period to process
    parameters:
        set of project parameters defined in ``parameters.yml``
    
    Returns
    -------
    pd.DataFrame
        Master table with up-sell target feature for one period (date+1; date+calculation_window)
    """

    # Initialize logger
    log = initialize_logger()
    
    table_name = "target_upsell"
    write_to_parquet = parameters["write_to_parquet"]
    overwrite = parameters["targets"][table_name]["overwrite"]
    end_date = str(parameters["end_date"])
    log.info(f"Start the process of create upsell target for {date}")
    
    # Check if target can be created (date + max window < previous sunday)
    target_parameters = parameters["targets"][table_name]
    max_window = max([target_parameters[x] for x in target_parameters.keys() if x.endswith("window")])
    upper_bound = (pd.to_datetime(date) + timedelta(days=max_window)).strftime("%Y%m%d")
    previous_sunday = dt.today() - timedelta(days=dt.today().weekday() + 1)
    
    if pd.to_datetime(upper_bound, format="%Y%m%d") > previous_sunday:
        log.info(f"Cannot create upsell target for {date}: Not enough future information")
        return None

    # Compare with what is already processed
    path = f"{parameters['paths']['target_path']}{table_name}/"
    os.makedirs(path, exist_ok=True)
    processed_dates = os.listdir(path)
    match = [file for file in processed_dates if str(date) in file]
    if len(match) > 0 and overwrite is False:
        # If table is found, read parquet:
        log.info(f"Reading {match[0]} table")
        df_final = pd.read_parquet(path + match[0], engine="pyarrow")

    else:
        product_rank = parameters["targets"]["target_upsell"]["upsell_products_rank"]
        products_allowed_to_move=parameters["targets"]["target_upsell"]["products_allowed_to_move"]
        product_tecnology=parameters["targets"]["target_upsell"]["product_and_tecnology"]
        product_tecnology = {value : key for (key, value) in product_tecnology.items()}

        start_date = date
        end_date = (pd.to_datetime(date) + timedelta(days=parameters["targets"]["target_upsell"]["calculation_window"])).strftime("%Y%m%d")
        cancel_date = (pd.to_datetime(date) + timedelta(days=parameters["targets"]["target_upsell"]["activation_window"])).strftime("%Y%m%d")

        # Get EoP active clients from previous period to exclude new clients
        period_to_load = get_previous_month(start_date)
        df_clientes = eop.filter_by(condition=f"PRC_TIPO_ID = 3 AND PRC_CODIGO  IN {tuple(products_allowed_to_move)}",
                                     #base of customers that can made an upgrade
                                     date=period_to_load)

        # Get the user tecnology
        df_clientes["tecno_eop"]=df_clientes["PRC_CODIGO"].map(product_tecnology)
        df_clientes["tecno_eop"]=[y.split(" ")[2] for x,y in enumerate(df_clientes["tecno_eop"])]

        # Get data for target creation
        moves=tuple([ value[0] for (key,value) in product_rank.items()])
        df_upgrades = upgrades_basicos.filter_by(condition=f"EVENTO_ID = 108 AND PRODUCTO_ID IN {moves}",
                                                 date=[start_date, end_date],
                                                 target=True)
        #Tecnology of the basic product.
        df_upgrades["tecno_up"]=[y.split(" ")[2] for x,y in enumerate(df_upgrades.PRODUCTO_ID.map(product_tecnology))]

        df_cancelations = upgrades_basicos.filter_by(date=[start_date,
                                                           cancel_date],
                                                     target=True)

        df_clientes_upgrades = pd.merge(
            df_clientes[["CUSTOMER_ID", "PRC_CODIGO","tecno_eop"]],
            df_upgrades[["CUSTOMER_ID", "PRODUCTO_ID", "FECHA","tecno_up"]],
            on=["CUSTOMER_ID"],
            how="inner",
            validate="1:m")
        del df_upgrades;
        gc.collect()

        df_clientes_upgrades.sort_values(["CUSTOMER_ID", "PRC_CODIGO", "FECHA"], ascending=[False, False, True],
                                         inplace=True)
        df_clientes_upgrades.drop_duplicates(subset=["CUSTOMER_ID", "PRC_CODIGO"], keep="last", inplace=True)

        df_product_rank = pd.DataFrame(product_rank.items(), columns=["PRODUCTO_RANK_INI", "PRC_CODIGO"])
        df_product_rank = df_product_rank.explode("PRC_CODIGO")


            # Rank initial product (PRC_CODIGO) from EOP table
        df_clientes_upgrades_ranked = pd.merge(df_clientes_upgrades,
                                               df_product_rank,
                                               on="PRC_CODIGO",
                                               how="left",
                                               validate="m:1")

        del df_clientes_upgrades
        gc.collect()

        # Rank last product (PRODUCTO_ID) from plan_evento table
        df_product_rank.rename(columns={"PRC_CODIGO": "PRODUCTO_ID",
                                        "PRODUCTO_RANK_INI": "PRODUCTO_RANK_END"}, inplace=True)
        df_clientes_upgrades_ranked = pd.merge(df_clientes_upgrades_ranked,
                                               df_product_rank,
                                               on="PRODUCTO_ID",
                                               how="left",
                                               validate="m:1")

        #Solving the situation of MIX, this tecnology is SD
        df_clientes_upgrades_ranked["tecno_eop"]=np.where(df_clientes_upgrades_ranked.tecno_eop.isin(["MIX"]),"SD",df_clientes_upgrades_ranked.tecno_eop)
        df_clientes_upgrades_ranked["tecno_up"]=np.where(df_clientes_upgrades_ranked.tecno_up.isin(["MIX"]),"SD",df_clientes_upgrades_ranked.tecno_up)
        
        # Calculate target based on initial and end product plus tecnology
        df_clientes_upgrades_ranked["TARGET"] = np.where((df_clientes_upgrades_ranked["PRODUCTO_RANK_END"] > \
                                                         df_clientes_upgrades_ranked["PRODUCTO_RANK_INI"]) & ((df_clientes_upgrades_ranked["tecno_eop"] == \
                                                                                                               df_clientes_upgrades_ranked["tecno_up"])), 1, 0)

        # Keep only first move by CUSTOMER, PRODUCT
        df_cancelations.sort_values(["CUSTOMER_ID", "PRODUCTO_ID", "FECHA"], ascending=[False, False, True], inplace=True)
        df_cancelations.drop_duplicates(subset=["CUSTOMER_ID", "PRODUCTO_ID"], keep="last", inplace=True)

        # Merge with target df to check for activation period
        df_target = pd.merge(df_clientes_upgrades_ranked,
                             df_cancelations[["CUSTOMER_ID", "PRODUCTO_ID", "FECHA"]],
                             on=["CUSTOMER_ID", "PRODUCTO_ID"],
                             how="left",
                             validate="1:m")

        del df_clientes_upgrades_ranked, df_cancelations;
        gc.collect()

        # Compute time difference between events
        df_target["DATE_DIFF"] = (df_target["FECHA_y"] - df_target["FECHA_x"]) / np.timedelta64(1, "D")
        log.info(f" Number of events 108 ending as upgrades before product changes rule {df_target.TARGET.sum()}")

        df_target["TARGET"] = np.where((df_target["DATE_DIFF"] > 0) & \
                                       (df_target["DATE_DIFF"] <= parameters["targets"]["target_upsell"][
                                           "activation_window"]),
                                       0,
                                       df_target["TARGET"])
        df_target = drop_extra_rename_remaining(df_target)
        log.info(f" Number of events 108 ending as upgrades after product changes rule {df_target.TARGET.sum()}")

        # Merge back to EOP
        df_final = pd.merge(df_clientes[["CUSTOMER_ID", "PRC_CODIGO"]],
                            df_target[["CUSTOMER_ID", "TARGET", "FECHA", "PRODUCTO_ID"]],
                            on="CUSTOMER_ID",
                            how="left",
                            validate="1:1")
        
        target=df_final.loc[df_final.CUSTOMER_ID.isin(cliente_activo.CUSTOMER_ID.unique())]
        
        del df_target, df_final;
        gc.collect()

        target["TARGET"].fillna(0, inplace=True)
        target["TARGET"] =  target["TARGET"].astype(np.int32)
        target["DATE_EXP"] = period_to_load
        target["DATE_CALC"] = date
        target.rename({"FECHA": "FECHA_TARGET"}, inplace=True)
        
        if write_to_parquet:
            file = f"{parameters['paths']['target_path']}{table_name}/{table_name}_{date}.parquet"
            target.to_parquet(file, engine="pyarrow")

        # Return
        log.info(
            f"""Exporting target for period {start_date} and rate {
            np.round(100 * target[target['TARGET'] == 1]['CUSTOMER_ID'].nunique() / target['CUSTOMER_ID'].nunique(), 2)
            }%""")

    return target
    

In [226]:
dates=calculate_dates_to_process_for_target(parameters,'target_upsell_noagenda')
print(dates)

KeyError: 'target_upsell_noagenda'

In [6]:
for date in dates:
    print(date)
    cliente_activo_df= create_cliente_activo(cliente_activo,date)
    create_target_upsell(upgrades_basicos, eop, cliente_activo_df,parameters,date) 
    

20181203
2021-01-13 19:09:58,421 - aa_engine_pkg.assets.utils.utilities - INFO - Creating cliente_activo...
select distinct CUSTOMER_ID from stg_uy_customer_status where UPPER(STATUS) LIKE '%ACTIVO%' and DATE_EXP = 201811


  % ((self.server_version_info,))


2021-01-13 19:10:03,173 - aa_engine_pkg.assets.utils.utilities - INFO - Start the process of create upsell target for 20181203
select DATE_EXP, CUSTOMER_ID, PRC_CODIGO, PRODUCTO, PRC_TIPO_ID, TEC_ID, MOP, TENURE from stg_uy_eop_customer where DATE_EXP = 201811 and PRC_TIPO_ID = 3 AND PRC_CODIGO  IN (135, 216, 217, 147, 169, 132)
select * from stg_uy_plan_evento where PROD_CATEGORY_ID = 3 and EVENTO_ID IN (107,108,133,142) and FECHA > to_date('20181203235900', 'yyyymmddhh24miss') and FECHA <= to_date('20181231235900', 'yyyymmddhh24miss') and EVENTO_ID = 108 AND PRODUCTO_ID IN (135, 216, 217, 147, 169, 132, 139)
select * from stg_uy_plan_evento where PROD_CATEGORY_ID = 3 and EVENTO_ID IN (107,108,133,142) and FECHA > to_date('20181203235900', 'yyyymmddhh24miss') and FECHA <= to_date('20190331235900', 'yyyymmddhh24miss')
2021-01-13 19:10:07,406 - aa_engine_pkg.assets.utils.utilities - INFO -  Number of events 108 ending as upgrades before product changes rule 139
2021-01-13 19:10:07,412 -