# Testing dict -> query embedding funcitons in merlin
This will be used to create a custom prediciton routine for the query tower

In [7]:
import json 

TEST_INSTANCE = json.dumps({'collaborative': 'false',
                 'album_name_pl': ["There's Really A Wolf", 'Late Nights: The Album',
                       'American Teen', 'Crazy In Love', 'Pony'], 
                 # 'album_uri_can': 'spotify:album:5l83t3mbVgCrIe1VU9uJZR', 
                 # 'artist_followers_can': 4339757.0, 
                 # 'artist_genres_can': "'hawaiian hip hop', 'rap'", 
                 'artist_genres_pl': ["'hawaiian hip hop', 'rap'",
                       "'chicago rap', 'dance pop', 'pop', 'pop rap', 'r&b', 'southern hip hop', 'trap', 'urban contemporary'",
                       "'pop', 'pop r&b'", "'dance pop', 'pop', 'r&b'",
                       "'chill r&b', 'pop', 'pop r&b', 'r&b', 'urban contemporary'"], 
                 # 'artist_name_can': 'Russ', 
                 'artist_name_pl': ['Russ', 'Jeremih', 'Khalid', 'Beyonc\xc3\xa9',
                       'William Singe'], 
                 # 'artist_pop_can': 82.0, 
                 # 'artist_pop_pl': [82., 80., 90., 87., 65.], 
                 # 'artist_uri_can': 'spotify:artist:1z7b1Pr1rSlvWRzsW3HOrS', 
                 # 'artists_followers_pl': [ 4339757.,  5611842., 15046756., 30713126.,   603837.],  
                 'description_pl': '', 
                 # 'duration_ms_can': 237322.0, 
                 #'duration_ms_songs_pl': [237506., 217200., 219080., 226400., 121739.], 
                 'n_songs_pl': 8.0, 
                 'name': 'Lit Tunes ', 
                 'num_albums_pl': 8.0, 
                 'num_artists_pl': 8.0, 
                 # 'track_name_can': 'We Just Havent Met Yet', 
                 'track_name_pl': ['Losin Control', 'Paradise', 'Location',
                       'Crazy In Love - Remix', 'Pony'], 
                 # 'track_pop_can': 57.0, 
                 #'track_pop_pl': [79., 58., 83., 71., 57.],
                 'duration_ms_seed_pl': 51023.1,
                 'pid': 1,
                 # 'track_uri_can': 'spotify:track:0VzDv4wiuZsLsNOmfaUy2W', 
                 'track_uri_pl': ['spotify:track:4cxMGhkinTocPSVVKWIw0d',
                       'spotify:track:1wNEBPo3nsbGCZRryI832I',
                       'spotify:track:152lZdxL1OR0ZMW6KquMif',
                       'spotify:track:2f4IuijXLxYOeBncS60GUD',
                       'spotify:track:4Lj8paMFwyKTGfILLELVxt']
                     })

In [8]:
TEST_INSTANCE

'{"collaborative": "false", "album_name_pl": ["There\'s Really A Wolf", "Late Nights: The Album", "American Teen", "Crazy In Love", "Pony"], "artist_genres_pl": ["\'hawaiian hip hop\', \'rap\'", "\'chicago rap\', \'dance pop\', \'pop\', \'pop rap\', \'r&b\', \'southern hip hop\', \'trap\', \'urban contemporary\'", "\'pop\', \'pop r&b\'", "\'dance pop\', \'pop\', \'r&b\'", "\'chill r&b\', \'pop\', \'pop r&b\', \'r&b\', \'urban contemporary\'"], "artist_name_pl": ["Russ", "Jeremih", "Khalid", "Beyonc\\u00c3\\u00a9", "William Singe"], "description_pl": "", "n_songs_pl": 8.0, "name": "Lit Tunes ", "num_albums_pl": 8.0, "num_artists_pl": 8.0, "track_name_pl": ["Losin Control", "Paradise", "Location", "Crazy In Love - Remix", "Pony"], "duration_ms_seed_pl": 51023.1, "pid": 1, "track_uri_pl": ["spotify:track:4cxMGhkinTocPSVVKWIw0d", "spotify:track:1wNEBPo3nsbGCZRryI832I", "spotify:track:152lZdxL1OR0ZMW6KquMif", "spotify:track:2f4IuijXLxYOeBncS60GUD", "spotify:track:4Lj8paMFwyKTGfILLELVxt"]}'

### Create the prediction scripts - we will use this in our Custom Prediction Routine in 01-z notebook

In [10]:
import nvtabular as nvt
import dask.dataframe as dd
import pandas as pd
import os
BUCKET = 'gs://spotify-beam-v3'
import merlin.models.tf as mm
from nvtabular.loader.tf_utils import configure_tensorflow
import json
configure_tensorflow()

import tensorflow as tf

def load(artifacts_uri):
        """Loads the model artifact.
        Args:
            artifacts_uri (str):
                Required. The value of the environment variable AIP_STORAGE_URI.
        """
        
        model = tf.keras.models.load_model(os.path.join(artifacts_uri, "query_model_merlin" ))
        workflow = nvt.Workflow.load(os.path.join(artifacts_uri, "workflow/2t-spotify-workflow"))
        workflow.remove_inputs(['track_pop_can', 'track_uri_can', 
                            'duration_ms_can', 'track_name_can', 
                            'artist_name_can','album_name_can',
                            'album_uri_can','artist_followers_can',
                            'artist_genres_can','artist_name_can',
                            'artist_pop_can','artist_pop_pl','artist_uri_can',
                            'artists_followers_pl',])
        return(model, workflow)

def preprocess(prediction_input, model, workflow):
    """Preprocesses the prediction input before doing the prediction.
    Args:
        prediction_input (Any):
            Required. The prediction input that needs to be preprocessed.
    Returns:
        The preprocessed prediction input.
    """

    TEST_INSTANCE = json.loads(prediction_input)
    if type(TEST_INSTANCE) == list:
        pandas_instance = pd.DataFrame.from_dict(TEST_INSTANCE[0], orient='index').T
        if len(TEST_INSTANCE) > 1:
            for ti in TEST_INSTANCE[0:]:
                pandas_instance = pandas_instance.append(pd.DataFrame.from_dict(ti, orient='index').T)
    else:
        pandas_instance = pd.DataFrame.from_dict(TEST_INSTANCE, orient='index').T

    transformed_inputs = nvt.Dataset(pandas_instance)
    transformed_instance = workflow.transform(transformed_inputs)
    batch = mm.sample_batch(transformed_instance, batch_size=len(TEST_INSTANCE), include_targets=False)

    return model(batch)

In [11]:
BUCKET = 'gs://spotify-beam-v3'
ARTIFACT_URI = f'{BUCKET}/merlin-processed'

model, workflow = load(ARTIFACT_URI)
preprocess(TEST_INSTANCE, model, workflow)





2022-10-20 20:11:31.773060: I tensorflow/stream_executor/cuda/cuda_blas.cc:1804] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


<tf.Tensor: shape=(1, 128), dtype=float32, numpy=
array([[1.76715367e-02, 3.14773098e-02, 0.00000000e+00, 1.25602563e-03,
        8.14599246e-02, 0.00000000e+00, 2.13340390e-02, 0.00000000e+00,
        2.67350562e-02, 0.00000000e+00, 2.60866545e-02, 7.85914585e-02,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.08987670e-02,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 7.32983183e-03,
        5.98119311e-02, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        5.98613583e-02, 6.45721406e-02, 0.00000000e+00, 9.54043586e-03,
        5.09024113e-02, 0.00000000e+00, 2.21211519e-02, 9.38368728e-04,
        2.03566179e-02, 0.00000000e+00, 5.71851293e-03, 6.91074878e-04,
        6.85477280e-05, 9.55524296e-03, 2.24500149e-02, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 6.75486550e-02, 0.00000000e+00,
        2.35534720e-02, 5.03459610e-02, 2.95396671e-02, 7.63033004e-03,
        0.0000