In [16]:
# importing necessary libraries
import tensorflow as tf
import transformers
from transformers import AutoTokenizer
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
import csv
from transformers import BertTokenizer, TFBertModel

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Model
from keras.layers import LSTM, Input, TimeDistributed, Dense, Activation, RepeatVector, Embedding
from keras.optimizers import Adam
from keras.losses import sparse_categorical_crossentropy

In [18]:
f = open('spotify_million_playlist_dataset_challenge/challenge_set.json')

js = json.load(f)
playlists = js['playlists']
titles = []
tracks = []

for playlist in playlists:
    if not playlist['tracks'] or 'name' not in playlist:
        continue
    titles.append(playlist['name'].lower())
    tracks.append(' '.join(track['track_uri'] for track in playlist['tracks']))
        

In [19]:
def tokenize(sentences):
    text_tokenizer = Tokenizer()
    text_tokenizer.fit_on_texts(sentences)
    return text_tokenizer.texts_to_sequences(sentences), text_tokenizer

In [20]:
titles_tokens, title_tokenizer = tokenize(titles)
tracks_tokens, track_tokenizer = tokenize(tracks)

title_vocab = len(title_tokenizer.word_index) + 1
track_vocab = len(track_tokenizer.word_index) + 1

max_title_length = int(len(max(titles_tokens, key=len)))
max_track_length = int(len(max(tracks_tokens, key=len)))

pad_titles = pad_sequences(titles_tokens, max_title_length, padding = "post")
pad_tracks = pad_sequences(tracks_tokens, max_track_length, padding = "post")

pad_titles = pad_titles.reshape(*pad_titles.shape, 1)
pad_tracks = pad_tracks.reshape(*pad_tracks.shape, 1)

In [21]:
input_sequence = Input(shape=(max_title_length,))
embedding = Embedding(input_dim=title_vocab, output_dim=128,)(input_sequence)
encoder = LSTM(64, return_sequences=False)(embedding)
r_vec = RepeatVector(max_track_length)(encoder)
decoder = LSTM(64, return_sequences=True, dropout=0.2)(r_vec)
logits = TimeDistributed(Dense(track_vocab))(decoder)

In [22]:
enc_dec_model = Model(input_sequence, Activation('softmax')(logits))
enc_dec_model.compile(loss=sparse_categorical_crossentropy,
              optimizer=Adam(1e-3),
              metrics=['accuracy'])
enc_dec_model.summary()



Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 9)]               0         
                                                                 
 embedding (Embedding)       (None, 9, 128)            285440    
                                                                 
 lstm (LSTM)                 (None, 64)                49408     
                                                                 
 repeat_vector (RepeatVecto  (None, 300, 64)           0         
 r)                                                              
                                                                 
 lstm_1 (LSTM)               (None, 300, 64)           33024     
                                                                 
 time_distributed (TimeDist  (None, 300, 63999)        4159935   
 ributed)                                                    

In [24]:
model_results = enc_dec_model.fit(pad_titles, pad_tracks, batch_size=30, epochs=1)

def logits_to_sentence(logits, tokenizer):

    index_to_words = {idx: word for word, idx in tokenizer.word_index.items()}
    index_to_words[0] = '<empty>' 

    return ' '.join([index_to_words[prediction] for prediction in np.argmax(logits, 1)])

print(logits_to_sentence(enc_dec_model.predict(pad_titles[20:21])[0], track_tokenizer))

<empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> <empty> 