In [None]:
# import global modules
import os
import re
import sys
import time
import json
import pickle
import numpy as np
import pandas as pd
from pathlib import Path
from yaml import safe_load

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_utils = pth_project / 'utils'
pth_queries = pth_project / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, pth_project.as_posix())
d_config = safe_load(pth_creds.open())

# import local modules
from utils.gcp import connect_bq_services
from utils.extract import extract_bq_data
from utils.modeling import extract_stats

In [None]:
bq_client = connect_bq_services(d_config['gcp-project-name'])

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
pth_queries

#### Extract data

In [None]:
sql = (pth_queries / 'extract_aia_predictions.sql').read_text()
df_results = extract_bq_data(bq_client, sql)
df_results.shape

In [None]:
d_target_mapping = {
 'sing_acquisition': 0,
 'shs_acquisition': 1,
 'tos_acquisition': 2,
 'wifi_acquisition': 3,
 'ttv_acquisition': 4,
 'sws_acquisition': 5,
 'hsic_acquisition': 6,
 'lwc_acquisition': 7,
 'hpro_acquisition': 8,
 'whsia_acquisition': 9,
}

d_target_mapping

#### Process data

In [None]:
df_results.head()

In [None]:
# get predicted columns
l_pred_cols = [c for c in df_results.columns if '_predicted_score_calibrated' in c]
len(l_pred_cols)

In [None]:
# get label columns
l_label_cols = [c for c in df_results.columns if '_label' in c]
len(l_label_cols)

In [None]:
l_label_cols

In [None]:
# aggregate labels
df_results['model_scenario'] = df_results.apply(
    lambda row: [
        label.replace('_label', '_acquisition') for label in l_label_cols 
        if pd.notnull(row[label]) and row[label] == 1
    ], axis=1
)

In [None]:
df_results.head()

In [None]:
#df_results[['model_scenario'] + l_label_cols]

In [None]:
# explode labels
df_res_exploded = df_results.explode('model_scenario')

In [None]:
df_res_exploded['model_scenario'].value_counts()

In [None]:
# create target indexes
df_res_exploded['target'] = df_res_exploded['model_scenario'].map(d_target_mapping)

In [None]:
df_res_exploded.head()

#### Results

In [None]:
# creta list with same order of label indexes
l_pred_ordered = [label.replace('_acquisition', '_predicted_score_calibrated') for label in d_target_mapping.keys()]
len(l_pred_ordered), set(l_pred_ordered) == set(l_pred_cols)

In [None]:
probabilities =  df_res_exploded[l_pred_ordered].to_numpy()
results_ranked = np.argsort(-probabilities, axis=1)
for n in (1, 2, 3):
    display(extract_stats(n, results_ranked, df_res_exploded['target'], d_target_mapping))

#### Naive approche

In [None]:
for model_type in ('volume_only', 'random_only', 'random_weighted'):
    print(model_type)
    
    probabilities = naive_model_predict_proba(
        df_res_exploded, 'target', d_target_mapping, score_type = model_type,
        eligible_rule = False, existing_prod_rule = False,
    )
    results_ranked = np.argsort(-probabilities, axis=1)
    display(extract_stats(n, results_ranked, df_res_exploded['target'], d_target_mapping))