In [None]:
import pandas as pd

In [None]:
from joblib import dump, load
from tempfile import TemporaryFile

In [None]:
from google.cloud import storage, bigquery as bq

## Retrieve and load the latest model

In [None]:
storage_client = storage.Client()

In [None]:
# need to have one as 'latest'
def retrieve_and_load_model(model_name = 'boost_target_model_latest'):
    bucket = storage_client.bucket('jupiter_models_staging')
    blob = bucket.blob(f'{model_name}.joblib')
    
    with TemporaryFile() as temp_file:
        print('Downloading to temporary file')
        blob.download_to_file(temp_file)
        print('Fetched, about to load')
        temp_file.seek(0)
        model = load(temp_file)
        print('Model loaded: ', model)
        return model

In [None]:
inference_model = retrieve_and_load_model()

## Get features for users

In [None]:
bq_client = bq.Client()

In [None]:
def enrich_users_from_bq(user_ids):
    # we need to get their (i) prior save count, (ii) when first saved, (iii) last saved, and (iv) if they have redeemed prior
    # the first can be found from the user behaviour table, and the second from the all events table
    job_config = bq.QueryJobConfig(
        query_parameters=[
            bq.ArrayQueryParameter("user_ids", "STRING", user_ids)
        ]
    )

    save_query = """
        with save_data as (select user_id, 
            count(*) as prior_save_count,
            max(TIMESTAMP_MILLIS(time_transaction_occurred)) as latest_save_date,
            min(TIMESTAMP_MILLIS(time_transaction_occurred)) as earliest_save_date
        from ops.user_behaviour
            where transaction_type = 'SAVING_EVENT'
            and user_id in UNNEST(@user_ids) group by user_id) 

        select user_id, prior_save_count, 
            latest_save_date, TIMESTAMP_DIFF(current_timestamp, latest_save_date, HOUR) as hours_since_latest, 
            earliest_save_date, TIMESTAMP_DIFF(current_timestamp, earliest_save_date, HOUR) as hours_since_earliest, 
        from save_data
    """
    
    df = bq_client.query(save_query, job_config=job_config).to_dataframe()

    boost_query = """
    select user_id, count(*) as number_boost_redeems 
    from ops.all_user_events where event_type = 'BOOST_REDEEMED' and user_id in UNNEST(@user_ids)
    group by user_id, event_type
    """

    boost_df = bq_client.query(boost_query, job_config=job_config).to_dataframe()

    df = df.merge(boost_df, how='left').fillna(0)

    return df

In [None]:
def add_one_hots(df, boost_type_category):
    all_boost_type_categories = [
        'GAME::CHASE_ARROW', 
        'GAME::DESTROY_IMAGE',
        'GAME::TAP_SCREEN', 
        'SIMPLE::ROUND_UP',
        'SIMPLE::SIMPLE_SAVE', 
        'SIMPLE::TIME_LIMITED',
        'SIMPLE::TARGET_BALANCE',
        'SOCIAL::FRIENDS_ADDED',
        'SOCIAL::NUMBER_FRIENDS',
        'WITHDRAWAL::CANCEL_WITHDRAWAL'
    ]
    
    assignment_args = {}
    for category in all_boost_type_categories:
        column_name = f'boost_type_category_{category}'
        assignment_args[column_name] = int(category == boost_type_category)
    
    return df.assign(**assignment_args)

In [None]:
def assemble_input_rows(boost_data, user_ids):
    boost_features = []
    user_id_features = retrieve_features_for_model(user_ids)
    combined_rows = pd.merge()
    return combined_rows

In [None]:
sample_boost = { 'boost_type_category': 'SIMPLE::SIMPLE_SAVE', 'boost_amount_whole_currency': 10 }

In [None]:
# sample_user_ids = ['b0041713-9326-4cd3-9dfe-8f022afaad73']
sample_user_ids = ['b0041713-9326-4cd3-9dfe-8f022afaad73', '27b00e1c-4f32-4631-a67b-88aaf5a01d0c']

In [None]:
bq_df = enrich_users_from_bq(sample_user_ids)

In [None]:
bq_df.head()

In [None]:
len(bq_df)

In [None]:
pd.DataFrame({ 'user_ids': sample_user_ids, 'should_offer': False })

In [None]:
bq_df['user_id']