## Beam conversion from Bigquery to TF Records

In this notebook we use Apache Beam to convert to tfrecords
The applications can be found in `beam_candidates` and `beam_training` for candidate generation and training

### IMPORTANT - run this upgrade and restart the kernel before proceeding

In [1]:
# !pip install --upgrade 'apache-beam[gcp]' --user

In [59]:
import os 

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

import tensorflow as tf

### Set variables

In [None]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]

In [35]:
import time
from google.cloud import storage

storage_client = storage.Client(project=PROJECT_ID)

# TODO - parameterize
REGION = 'us-central1'
NETWORK = 'ucaip-haystack-vpc-network'
BUCKET_NAME = 'matching-engine-content'


BQ_DATASET = 'spotify_e2e_test'
BQ_TABLE_TRAIN = 'train_flatten_last_5' # TODO
BQ_TABLE_VALID = 'train_flatten_valid_last_5'
BQ_TABLE_CANDIDATES = 'candidates'

MAX_PLAYLIST_LENGTH = 5

VERSION= "v1-0-0" # version tag for dataflow pipeline

# gcs subfolders - destinations for processed data: f'gs://{BUCKET_NAME}/{VERSION}/{___PREFIX}'
CANDIDATE_PREFIX = 'candidates'
TRAIN_DIR_PREFIX = 'train'
VALID_DIR_PREFIX = 'valid'

In [36]:
! gsutil ls -al gs://$BUCKET_NAME

                                 gs://matching-engine-content/10/
                                 gs://matching-engine-content/11/
                                 gs://matching-engine-content/13/
                                 gs://matching-engine-content/5/
                                 gs://matching-engine-content/6/
                                 gs://matching-engine-content/7/
                                 gs://matching-engine-content/8/
                                 gs://matching-engine-content/v1-0-0/


# Run Dataflow to convert BQ to TFrecords

Candidate generation can be found in `beam_candidates`
Training and Validation generation can be found in `beam_training`

Usage:
* Candidate generation

> `beam_candidates\python3 main.py $PROJECT_ID $NETWORK $REGION $VERSION $BUCKET_NAME $CANDIDATE_PREFIX $BQ_DATASET $BQ_TABLE_CANDIDATES`
   
* Training generation
  
> `beam_training\python3 main-train.py <BQ_table> <gcs data subfolder> <desired partition size MB> <BQ dataset size MB> <version tag>`

In [6]:
!tree beam_training

[01;34mbeam_training[00m
├── README.MD
├── __init__.py
├── main-train.py
├── setup.py
└── [01;34mtrain_pipeline[00m
    ├── __init__.py
    ├── [01;34m__pycache__[00m
    │   ├── __init__.cpython-37.pyc
    │   └── train_pipe.cpython-37.pyc
    └── train_pipe_shape.py

2 directories, 8 files


In [42]:
import os
os.getcwd()

'/home/jupyter/spotify_mpd_two_tower/beam_candidates'

In [74]:
%cd beam_candidates

/home/jupyter/spotify_mpd_two_tower/beam_candidates


### Candidates

In [75]:
start_time = time.time()

! python3 main.py $PROJECT_ID $NETWORK $REGION $VERSION $BUCKET_NAME $CANDIDATE_PREFIX $BQ_DATASET $BQ_TABLE_CANDIDATES

end_time = time.time()
runtime_mins = int((end_time - start_time) / 60)
print(f"total runtime_mins: {runtime_mins}")

GoogleCloudOptions(create_from_snapshot=None, dataflow_endpoint=https://dataflow.googleapis.com, dataflow_kms_key=None, dataflow_service_options=None, enable_artifact_caching=False, enable_hot_key_logging=False, enable_streaming_engine=False, flexrs_goal=None, gcp_oauth_scopes=['https://www.googleapis.com/auth/bigquery', 'https://www.googleapis.com/auth/cloud-platform', 'https://www.googleapis.com/auth/devstorage.full_control', 'https://www.googleapis.com/auth/userinfo.email', 'https://www.googleapis.com/auth/datastore', 'https://www.googleapis.com/auth/spanner.admin', 'https://www.googleapis.com/auth/spanner.data'], impersonate_service_account=None, job_name=spotify-bq-tfrecords-v1-0-0-230308-151304, labels=None, no_auth=False, project=hybrid-vertex, region=us-central1, service_account_email=None, staging_location=gs://matching-engine-content/v1-0-0/job/staging/, temp_location=gs://matching-engine-content/v1-0-0/job/temp/, template_location=None, transform_name_mapping=None, update=Fa

In [73]:
%cd ..

/home/jupyter/spotify_mpd_two_tower


### Validation set

In [45]:
%cd beam_training

/home/jupyter/spotify_mpd_two_tower/beam_training


In [46]:
TARGET_SHARD_SIZE_MB_VALID = 250
TOTAL_MB_VALID = 500
NUM_TF_RECORDS = int(TOTAL_MB_VALID) // int(TARGET_SHARD_SIZE_MB_VALID)
NUM_TF_RECORDS

2

In [64]:
start_time = time.time()

# ! python3 main-train.py $BQ_TABLE_VALID $VALID_DIR_PREFIX $TARGET_SHARD_SIZE_MB_VALID $TOTAL_MB_VALID $VERSION $BUCKET_NAME $REGION $PROJECT_ID $NETWORK $BQ_DATASET

! python3 main-train.py $PROJECT_ID $NETWORK $REGION $VERSION $BUCKET_NAME $VALID_DIR_PREFIX $TOTAL_MB_VALID $TARGET_SHARD_SIZE_MB_VALID $BQ_DATASET $BQ_TABLE_VALID

end_time = time.time()
runtime_mins = int((end_time - start_time) / 60)
print(f"total runtime_mins: {runtime_mins}")

Number of Expected TFRecords: 2
GoogleCloudOptions(create_from_snapshot=None, dataflow_endpoint=https://dataflow.googleapis.com, dataflow_kms_key=None, dataflow_service_options=None, enable_artifact_caching=False, enable_hot_key_logging=False, enable_streaming_engine=False, flexrs_goal=None, gcp_oauth_scopes=['https://www.googleapis.com/auth/bigquery', 'https://www.googleapis.com/auth/cloud-platform', 'https://www.googleapis.com/auth/devstorage.full_control', 'https://www.googleapis.com/auth/userinfo.email', 'https://www.googleapis.com/auth/datastore', 'https://www.googleapis.com/auth/spanner.admin', 'https://www.googleapis.com/auth/spanner.data'], impersonate_service_account=None, job_name=spotify-bq-tfrecords-v1-0-0-230308-013003, labels=None, no_auth=False, project=hybrid-vertex, region=us-central1, service_account_email=None, staging_location=gs://matching-engine-content/v1-0-0/job/staging/, temp_location=gs://matching-engine-content/v1-0-0/job/temp/, template_location=None, transf

### Tain set

In [65]:
TARGET_SHARD_SIZE_MB_TRAIN = 2000
TOTAL_MB_TRAIN = 44_000
NUM_TF_RECORDS = int(TOTAL_MB_TRAIN) // int(TARGET_SHARD_SIZE_MB_TRAIN)
NUM_TF_RECORDS

22

In [66]:
start_time = time.time()

! python3 main-train.py $PROJECT_ID $NETWORK $REGION $VERSION $BUCKET_NAME $TRAIN_DIR_PREFIX $TOTAL_MB_TRAIN $TARGET_SHARD_SIZE_MB_TRAIN $BQ_DATASET $BQ_TABLE_TRAIN

end_time = time.time()
runtime_mins = int((end_time - start_time) / 60)
print(f"total runtime_mins: {runtime_mins}")

Number of Expected TFRecords: 22
GoogleCloudOptions(create_from_snapshot=None, dataflow_endpoint=https://dataflow.googleapis.com, dataflow_kms_key=None, dataflow_service_options=None, enable_artifact_caching=False, enable_hot_key_logging=False, enable_streaming_engine=False, flexrs_goal=None, gcp_oauth_scopes=['https://www.googleapis.com/auth/bigquery', 'https://www.googleapis.com/auth/cloud-platform', 'https://www.googleapis.com/auth/devstorage.full_control', 'https://www.googleapis.com/auth/userinfo.email', 'https://www.googleapis.com/auth/datastore', 'https://www.googleapis.com/auth/spanner.admin', 'https://www.googleapis.com/auth/spanner.data'], impersonate_service_account=None, job_name=spotify-bq-tfrecords-v1-0-0-230308-014748, labels=None, no_auth=False, project=hybrid-vertex, region=us-central1, service_account_email=None, staging_location=gs://matching-engine-content/v1-0-0/job/staging/, temp_location=gs://matching-engine-content/v1-0-0/job/temp/, template_location=None, trans

# Test output

## Candidates

### Candidate tower features

In [79]:
candidate_features = {
    "track_uri_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),            
    "track_name_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "artist_uri_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "artist_name_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "album_uri_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),           
    "album_name_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()), 
    "duration_ms_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),      
    "track_pop_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),      
    "artist_pop_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "artist_genres_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "artist_followers_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    # new
    # "track_pl_titles_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "track_danceability_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_energy_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_key_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "track_loudness_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_mode_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "track_speechiness_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_acousticness_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_instrumentalness_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_liveness_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_valence_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_tempo_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_time_signature_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
}

### Candidate files

In [80]:
candidate_files = []
for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{VERSION}/{CANDIDATE_PREFIX}'):
    candidate_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
candidate_files
# # candidate_files = f'gs://{BUCKET_NAME}/{VERSION}/{CANDIDATE_PREFIX}/candidates-00000-of-00001.tfrecords'
# candidate_files = f'gs://{BUCKET_NAME}/{VERSION}/candidates-00000-of-00001.tfrecords'
#     matching-engine-content/v1-0-0/candidates
candidate_dataset = tf.data.TFRecordDataset(candidate_files)

def parse_candidate_tfrecord_fn(example):
    example = tf.io.parse_single_example(
        example, 
        features=candidate_features
    )
    return example

parsed_candidate_dataset = candidate_dataset.map(parse_candidate_tfrecord_fn)

In [81]:
from pprint import pprint

for x in parsed_candidate_dataset.batch(1).take(1):
    pprint(x)

{'album_name_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Festival Party Riddim'], dtype=object)>,
 'album_uri_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'spotify:album:6HRMv5gpkJDvfBhpBr1OVK'], dtype=object)>,
 'artist_followers_can': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>,
 'artist_genres_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'NONE'], dtype=object)>,
 'artist_name_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'The Winners Table Band'], dtype=object)>,
 'artist_pop_can': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>,
 'artist_uri_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'spotify:artist:2oy6bRhmrdW8M5IVCNpu1A'], dtype=object)>,
 'duration_ms_can': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([113554.], dtype=float32)>,
 'track_acousticness_can': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.0542], dtype=float32)>,
 'track_danceability_

## Train & Valid datasets

In [6]:
MAX_PLAYLIST_LENGTH = 5

### train & valid features

In [67]:
feats = {
    # ===================================================
    # candidate track features
    # ===================================================
    "track_uri_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),            
    "track_name_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "artist_uri_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "artist_name_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "album_uri_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),           
    "album_name_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()), 
    "duration_ms_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),      
    "track_pop_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),      
    "artist_pop_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "artist_genres_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "artist_followers_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    # "track_pl_titles_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "track_danceability_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_energy_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_key_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "track_loudness_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_mode_can":tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    "track_speechiness_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_acousticness_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_instrumentalness_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_liveness_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_valence_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_tempo_can":tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    "track_time_signature_can": tf.io.FixedLenFeature(dtype=tf.string, shape=()),
    
    # ===================================================
    # summary playlist features
    # ===================================================
    "pl_name_src" : tf.io.FixedLenFeature(dtype=tf.string, shape=()), 
    'pl_collaborative_src' : tf.io.FixedLenFeature(dtype=tf.string, shape=()), 
    # 'num_pl_followers_src' : tf.io.FixedLenFeature(dtype=tf.float32, shape=()), 
    'pl_duration_ms_new' : tf.io.FixedLenFeature(dtype=tf.float32, shape=()), 
    'num_pl_songs_new' : tf.io.FixedLenFeature(dtype=tf.float32, shape=()),  # num_pl_songs_new | n_songs_pl_new
    'num_pl_artists_new' : tf.io.FixedLenFeature(dtype=tf.float32, shape=()), 
    'num_pl_albums_new' : tf.io.FixedLenFeature(dtype=tf.float32, shape=()), 
    # 'avg_track_pop_pl_new' : tf.io.FixedLenFeature(dtype=tf.float32, shape=()), 
    # 'avg_artist_pop_pl_new' : tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    # 'avg_art_followers_pl_new' : tf.io.FixedLenFeature(dtype=tf.float32, shape=()),
    
    # ===================================================
    # ragged playlist features
    # ===================================================
    # bytes / string
    "track_uri_pl": tf.io.FixedLenFeature(dtype=tf.string, shape=(MAX_PLAYLIST_LENGTH,)), 
    "track_name_pl": tf.io.FixedLenFeature(dtype=tf.string, shape=(MAX_PLAYLIST_LENGTH,)), 
    "artist_uri_pl": tf.io.FixedLenFeature(dtype=tf.string, shape=(MAX_PLAYLIST_LENGTH,)), 
    "artist_name_pl": tf.io.FixedLenFeature(dtype=tf.string, shape=(MAX_PLAYLIST_LENGTH,)),
    "album_uri_pl": tf.io.FixedLenFeature(dtype=tf.string, shape=(MAX_PLAYLIST_LENGTH,)), 
    "album_name_pl": tf.io.FixedLenFeature(dtype=tf.string, shape=(MAX_PLAYLIST_LENGTH,)), 
    "artist_genres_pl": tf.io.FixedLenFeature(dtype=tf.string, shape=(MAX_PLAYLIST_LENGTH,)), 
    # "tracks_playlist_titles_pl": tf.io.FixedLenFeature(dtype=tf.string, shape=(MAX_PLAYLIST_LENGTH,)),
    
    # Float List
    "duration_ms_songs_pl": tf.io.FixedLenFeature(dtype=tf.float32, shape=(MAX_PLAYLIST_LENGTH,)), 
    "track_pop_pl": tf.io.FixedLenFeature(dtype=tf.float32, shape=(MAX_PLAYLIST_LENGTH,)), 
    "artist_pop_pl": tf.io.FixedLenFeature(dtype=tf.float32, shape=(MAX_PLAYLIST_LENGTH,)), 
    "artists_followers_pl": tf.io.FixedLenFeature(dtype=tf.float32, shape=(MAX_PLAYLIST_LENGTH,)),
    "track_danceability_pl": tf.io.FixedLenFeature(dtype=tf.float32, shape=(MAX_PLAYLIST_LENGTH,)),
    "track_energy_pl": tf.io.FixedLenFeature(dtype=tf.float32, shape=(MAX_PLAYLIST_LENGTH,)),
    "track_key_pl": tf.io.FixedLenFeature(dtype=tf.string, shape=(MAX_PLAYLIST_LENGTH,)), 
    "track_loudness_pl": tf.io.FixedLenFeature(dtype=tf.float32, shape=(MAX_PLAYLIST_LENGTH,)),
    "track_mode_pl": tf.io.FixedLenFeature(dtype=tf.string, shape=(MAX_PLAYLIST_LENGTH,)),
    "track_speechiness_pl": tf.io.FixedLenFeature(dtype=tf.float32, shape=(MAX_PLAYLIST_LENGTH,)),
    "track_acousticness_pl": tf.io.FixedLenFeature(dtype=tf.float32, shape=(MAX_PLAYLIST_LENGTH,)),
    "track_instrumentalness_pl": tf.io.FixedLenFeature(dtype=tf.float32, shape=(MAX_PLAYLIST_LENGTH,)),
    "track_liveness_pl": tf.io.FixedLenFeature(dtype=tf.float32, shape=(MAX_PLAYLIST_LENGTH,)), 
    "track_valence_pl": tf.io.FixedLenFeature(dtype=tf.float32, shape=(MAX_PLAYLIST_LENGTH,)),
    "track_tempo_pl": tf.io.FixedLenFeature(dtype=tf.float32, shape=(MAX_PLAYLIST_LENGTH,)), 
    "track_time_signature_pl": tf.io.FixedLenFeature(dtype=tf.string, shape=(MAX_PLAYLIST_LENGTH,)), 
}


### Valid files

In [68]:
valid_files = []
for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{VERSION}/{VALID_DIR_PREFIX}/'):
    if '.tfrecords' in blob.name:
        valid_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
    
valid = tf.data.TFRecordDataset(valid_files)

def parse_tfrecord(example):
    example = tf.io.parse_single_example(
        example, 
        features=feats
    )
    return example

valid_parsed = valid.map(parse_tfrecord)

In [42]:
# valid_parsed

In [69]:
for x in valid_parsed.batch(1).take(1):
    print(x)

{'album_name_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'I Decided.'], dtype=object)>, 'album_name_pl': <tf.Tensor: shape=(1, 5), dtype=string, numpy=
array([[b'Big Baby DRAM', b'Coloring Book', b'To Pimp A Butterfly',
        b'Views', b'Camp']], dtype=object)>, 'album_uri_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'spotify:album:0XAIjjN5qxViVS0Y5fYkar'], dtype=object)>, 'album_uri_pl': <tf.Tensor: shape=(1, 5), dtype=string, numpy=
array([[b'spotify:album:0NrZHZ0y5kTO8EHliuUUca',
        b'spotify:album:71QyofYesSsRMwFOTafnhB',
        b'spotify:album:7ycBtnsMtyVbbwTfJwRjSP',
        b'spotify:album:3hARKC8cinq3mZLLAEaBh9',
        b'spotify:album:4q5E2s5u5X5HT4UMJpbMKE']], dtype=object)>, 'artist_followers_can': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([10521744.], dtype=float32)>, 'artist_genres_can': <tf.Tensor: shape=(1,), dtype=string, numpy=
array([b"'detroit hip hop', 'hip hop', 'pop', 'pop rap', 'rap', 'southern hip hop', 'trap'"],
    

### Train files

In [70]:
train_files = []
for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{VERSION}/{TRAIN_DIR_PREFIX}/', delimiter="/"):
    if '.tfrecords' in blob.name:
        train_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
    
train = tf.data.TFRecordDataset(train_files)

def parse_tfrecord(example):
    example = tf.io.parse_single_example(
        example, 
        features=feats
    )
    return example

train_parsed = train.map(parse_tfrecord)

In [71]:
train_parsed

<MapDataset element_spec={'album_name_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'album_name_pl': TensorSpec(shape=(5,), dtype=tf.string, name=None), 'album_uri_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'album_uri_pl': TensorSpec(shape=(5,), dtype=tf.string, name=None), 'artist_followers_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'artist_genres_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_genres_pl': TensorSpec(shape=(5,), dtype=tf.string, name=None), 'artist_name_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_name_pl': TensorSpec(shape=(5,), dtype=tf.string, name=None), 'artist_pop_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'artist_pop_pl': TensorSpec(shape=(5,), dtype=tf.float32, name=None), 'artist_uri_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_uri_pl': TensorSpec(shape=(5,), dtype=tf.string, name=None), 'artists_followers_pl': TensorSpec(shape=(5,), dtype=tf.float32, name

In [72]:
for x in train_parsed.batch(1).take(1):
    print(x)

{'album_name_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'H3'], dtype=object)>, 'album_name_pl': <tf.Tensor: shape=(1, 5), dtype=string, numpy=
array([[b'Pluto', b'Tha Carter IV', b'Drop That #NaeNae', b'Stoner',
        b'H3']], dtype=object)>, 'album_uri_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'spotify:album:2SgtoSJoJ9gKozK8PCBLpv'], dtype=object)>, 'album_uri_pl': <tf.Tensor: shape=(1, 5), dtype=string, numpy=
array([[b'spotify:album:1yNIBzlvPVBALSPkUMq1ma',
        b'spotify:album:1uuSC0RCJB3dSp8Mb6GflZ',
        b'spotify:album:5L3nn0pw1AsAGi6QYSHXjT',
        b'spotify:album:3mEM2ULIptDPwU07OLk1qy',
        b'spotify:album:2SgtoSJoJ9gKozK8PCBLpv']], dtype=object)>, 'artist_followers_can': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>, 'artist_genres_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b"'chicago rap', 'vapor trap'"], dtype=object)>, 'artist_genres_pl': <tf.Tensor: shape=(1, 5), dtype=string, numpy=
ar