In [1]:
# Load kedro environment (not needed in .py)
from pathlib import Path
from kedro.framework.context import load_context

# Load a context to be able to work in the notebook
#current_dir = Path.cwd()
current_dir = Path("/u01/share/cesar/aa_engine_uy/notebooks/")
proj_path = current_dir.parent
context = load_context(proj_path)
catalog = context.catalog
credentials = context.config_loader.get("credentials*","credentials*/**")
parameters = context.config_loader.get("parameters*","parameters*/**")
from aa_engine_pkg.assets.utils import *
from aa_engine_pkg.assets.core.data.kedro.catalog_expansion.partitioned_sql import SQLPartitionedDataSet

In [2]:
upgrades_basicos=catalog.load('upgrades_basicos')
eop=catalog.load("eop")
cliente_activo=catalog.load("cliente_activo")
agendas_basicos=catalog.load("agendas_basicos")

2021-03-25 19:27:22,983 - kedro.io.data_catalog - INFO - Loading data from `upgrades_basicos` (SQLPartitionedDataSet)...
2021-03-25 19:27:22,986 - kedro.io.data_catalog - INFO - Loading data from `eop` (SQLPartitionedDataSet)...
2021-03-25 19:27:22,987 - kedro.io.data_catalog - INFO - Loading data from `cliente_activo` (SQLPartitionedDataSet)...
2021-03-25 19:27:22,988 - kedro.io.data_catalog - INFO - Loading data from `agendas_basicos` (SQLPartitionedDataSet)...


In [3]:
def create_cliente_activo(cliente_activo: SQLPartitionedDataSet,
                          date: str) -> pd.DataFrame:
    """Creates master table with features related to EoP state of customers for one period of data

    Parameters
    ----------
    cliente_activo:
        dataset defined in ´catalog.yml´ - list of active customers at EoP for the given period
    date:
        period to process
    
    Returns
    -------
        Mastertable with information of clientes at EoP
    """

    # Initialize logger
    log = initialize_logger()

    # Load active clientes for period
    log.info(f"Creating cliente_activo...")
    period_to_load = get_previous_month(date)
    df_clientes_activos = cliente_activo.filter_by(date=period_to_load)

    # Return
    return df_clientes_activos

In [4]:
date="20191007"
cliente_activo_df=create_cliente_activo(cliente_activo,date)

2021-03-25 19:27:25,480 - aa_engine_pkg.assets.utils.utilities - INFO - Creating cliente_activo...
select distinct CUSTOMER_ID from stg_uy_customer_status where UPPER(STATUS) LIKE '%ACTIVO%' and DATE_EXP = 201909


  % ((self.server_version_info,))


In [5]:
# Initialize logger
log = initialize_logger()

In [6]:
product_rank = parameters["targets"]["target_upsell"]["upsell_products_rank"]
product_tecnology=parameters["targets"]["target_upsell"]["product_and_tecnology"]
product_tecnology = {value : key for (key, value) in product_tecnology.items()}

start_date = date
end_date = (pd.to_datetime(date) + timedelta(days=parameters["targets"]["target_upsell"]["calculation_window"])
            ).strftime("%Y%m%d")
cancel_date = (pd.to_datetime(date) + timedelta(days=parameters["targets"]["target_upsell"]["activation_window"])
               ).strftime("%Y%m%d")

end_date_upgrades = (pd.to_datetime(date) + timedelta(days=2*parameters["targets"]["target_upsell"]["calculation_window"])).strftime("%Y%m%d")

start_date_agendas = (pd.to_datetime(start_date)-timedelta(days=28)).strftime("%Y%m%d")

In [7]:
# Get EoP active clients from previous period to exclude new clients
products_allowed_to_move=tuple([key for (key, value) in product_tecnology.items() if value.find('ORO')==-1]) 
period_to_load = get_previous_month(start_date)
df_clientes = eop.filter_by(condition=f"PRC_TIPO_ID = 3 AND PRC_CODIGO  IN {products_allowed_to_move}",
                             #base of customers that can made an upgrade
                             date=period_to_load)

# Get the user tecnology
df_clientes["tecno_eop"]=df_clientes["PRC_CODIGO"].map(product_tecnology)
df_clientes["tecno_eop"]=[y.split(" ")[2] for x,y in enumerate(df_clientes["tecno_eop"])]
df_clientes["tecno_eop"]=np.where(df_clientes.tecno_eop=="MIX","SD",df_clientes.tecno_eop)

select DATE_EXP, CUSTOMER_ID, PRC_CODIGO, PRODUCTO, PRC_TIPO_ID, TEC_ID, MOP, TENURE from stg_uy_eop_customer where DATE_EXP = 201909 and PRC_TIPO_ID = 3 AND PRC_CODIGO  IN (135, 216, 217, 147, 169)


In [8]:
df_clientes.shape

(20628, 9)

In [9]:
# Get Upgrades for target creation
df_upgrades = upgrades_basicos.filter_by(condition=f"EVENTO_ID = 108 AND PRODUCTO_ID IN {products_allowed_to_move}",
                                         date=[start_date, end_date_upgrades],
                                         target=True)
#Tecnology of the basic product.
df_upgrades["tecno_up"]=[y.split(" ")[2] for x,y in enumerate(df_upgrades.PRODUCTO_ID.map(product_tecnology))]
df_upgrades["tecno_up"]=np.where(df_upgrades.tecno_up=="MIX","SD",df_upgrades.tecno_up)

select * from stg_uy_plan_evento where PROD_CATEGORY_ID = 3 and EVENTO_ID IN (107,108,133,142) and FECHA > to_date('20191007235900', 'yyyymmddhh24miss') and FECHA <= to_date('20191202235900', 'yyyymmddhh24miss') and EVENTO_ID = 108 AND PRODUCTO_ID IN (135, 216, 217, 147, 169)


In [10]:
df_upgrades.shape

(1680, 11)

In [11]:
#Get Agendas
df_agenda = agendas_basicos.filter_by(date=[start_date_agendas, end_date])
df_agenda.rename(columns={"FECHA":"FECHA_AGENDA"},inplace=True)

select CUSTOMER_ID, PRODUCTO_ID, PRODUCTO, TRUNC(FECHA) FECHA_AGENDA from stg_uy_plan_evento where PROD_CATEGORY_ID = 3 and EVENTO_ID=100108 and FECHA >= to_date('20190909', 'yyyymmdd') and FECHA < to_date('20191104', 'yyyymmdd')


In [12]:
#Get Cancelations
df_cancelations = upgrades_basicos.filter_by(date=[start_date,cancel_date], target=True)
# Keep only first cancellation by CUSTOMER, PRODUCT
df_cancelations.sort_values(["CUSTOMER_ID", "PRODUCTO_ID", "FECHA"], ascending=[False, False, True],
                                inplace=True)
df_cancelations.drop_duplicates(subset=["CUSTOMER_ID", "PRODUCTO_ID"], keep="last", inplace=True)

select * from stg_uy_plan_evento where PROD_CATEGORY_ID = 3 and EVENTO_ID IN (107,108,133,142) and FECHA > to_date('20191007235900', 'yyyymmddhh24miss') and FECHA <= to_date('20200202235900', 'yyyymmddhh24miss')


In [13]:
df_clientes_upgrades = pd.merge(
            df_clientes[["CUSTOMER_ID", "PRC_CODIGO","tecno_eop"]],
            df_upgrades[["CUSTOMER_ID", "PRODUCTO_ID", "FECHA","tecno_up"]],
            on=["CUSTOMER_ID"],
            how="inner",
            validate="1:m")

df_clientes_upgrades.sort_values(["CUSTOMER_ID", "PRC_CODIGO", "FECHA"], ascending=[False, False, True],inplace=True)
df_clientes_upgrades.drop_duplicates(subset=["CUSTOMER_ID", "PRC_CODIGO"], keep="last", inplace=True)

In [14]:
df_clientes_upgrades.shape

(241, 6)

In [15]:
df_product_rank = pd.DataFrame(product_rank.items(), columns=["PRODUCTO_RANK_INI", "PRC_CODIGO"]).explode("PRC_CODIGO")

# Rank initial product (PRC_CODIGO) from EOP table
df_clientes_upgrades_ranked = pd.merge(df_clientes_upgrades,
                                   df_product_rank,
                                   on="PRC_CODIGO",
                                   how="left",
                                   validate="m:1")

# Rank last product (PRODUCTO_ID) from plan_evento table
df_product_rank.rename(columns={"PRC_CODIGO": "PRODUCTO_ID","PRODUCTO_RANK_INI": "PRODUCTO_RANK_END"}, inplace=True)
df_clientes_upgrades_ranked = pd.merge(df_clientes_upgrades_ranked,
                                       df_product_rank,
                                       on="PRODUCTO_ID",
                                       how="left",
                                       validate="m:1")

In [16]:
# Calculate target based on initial and end product plus tecnology
mask=(df_clientes_upgrades_ranked["PRODUCTO_RANK_END"] > df_clientes_upgrades_ranked["PRODUCTO_RANK_INI"]) & (df_clientes_upgrades_ranked["tecno_eop"] ==df_clientes_upgrades_ranked["tecno_up"])
df_clientes_upgrades_ranked["TARGET"] = np.where(mask, 1, 0)
log.info(f" Number of events 108 ending as upgrades {df_clientes_upgrades_ranked.TARGET.sum()}")

2021-03-25 19:27:42,076 - aa_engine_pkg.assets.utils.utilities - INFO -  Number of events 108 ending as upgrades 31


In [17]:
# Merge with target df to check for activation period
df_target = pd.merge(df_clientes_upgrades_ranked,
                     df_cancelations[["CUSTOMER_ID", "PRODUCTO_ID", "FECHA"]],
                     on=["CUSTOMER_ID", "PRODUCTO_ID"],
                     how="left")


# Compute time difference between events
df_target["DATE_DIFF"] = (df_target["FECHA_y"] - df_target["FECHA_x"]) / np.timedelta64(1, "D")
df_target["TARGET"] = np.where((df_target["DATE_DIFF"] > 0) & \
                               (df_target["DATE_DIFF"] <= parameters["targets"]["target_upsell"][
                                   "activation_window"]),
                               0,
                               df_target["TARGET"])
log.info(f" Number of events 108 ending as upgrades after product changes rule {df_target.TARGET.sum()}")
df_target = drop_extra_rename_remaining(df_target)

2021-03-25 19:27:43,250 - aa_engine_pkg.assets.utils.utilities - INFO -  Number of events 108 ending as upgrades after product changes rule 28


In [18]:
# Remove duplicates prioritizing upgrades
df_target.sort_values(["CUSTOMER_ID", "TARGET"], ascending=False,inplace=True)
df_target.drop_duplicates(subset=["CUSTOMER_ID"], keep="first", inplace=True)


# quitar upgrades agendados el mes anterior
df_final = pd.merge(df_target,
                df_agenda,
                left_on=['CUSTOMER_ID','PRC_CODIGO'],
                right_on=['CUSTOMER_ID','PRODUCTO_ID'],
                how='left')

In [None]:
# quitar agendas mes anterior
upgrades_agendados_antes = df_final[(df_final.FECHA_AGENDA)<(pd.to_datetime(start_date)).strftime("%Y%m%d")]
df_final = df_final.drop(upgrades_agendados_antes.index)

# quitar upgrades mes futuro no agendados este mes
upgrades_futuros = df_final[df_final.FECHA>(pd.to_datetime(start_date)+timedelta(days=28)).strftime("%Y%m%d")]
df_final = df_final.drop(upgrades_futuros[upgrades_futuros.FECHA_AGENDA.isna()].index)

df_final.sort_values(["CUSTOMER_ID", "TARGET"], ascending=False,inplace=True)
df_final.drop_duplicates(subset=["CUSTOMER_ID"], keep="first", inplace=True)

In [None]:
df_final.TARGET.sum()