Introduction to Deep Learning - BGU - SISE
Assignment 3 - From Melodies to Lyrics

Submitted by Elior Nehemya & Eden Levy

# 0. Imports & Defs

In [1]:
# Misc
from time import time
import itertools
import datetime
import re
import os

# Data
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# NLP
import nltk
nltk.download('punkt')
## from nltk.corpus import stopwords

# SKLearn
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score, confusion_matrix, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import cosine_similarity

# Keras
from keras.layers import *
from keras.models import *
from keras.preprocessing.sequence import pad_sequences
import keras.backend as K
from keras.optimizers import Adadelta, Adam, SGD
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils import to_categorical
from keras.callbacks import TensorBoard

# Word2Vec
from gensim.models import Word2Vec, KeyedVectors

# Google
from google.colab import drive
drive.mount('/content/drive')
ORIG_PATH = 'drive/My Drive/Colab Notebooks/Deep Learning - gilad/Assignment 3'
if os.getcwd().find(ORIG_PATH) == -1: os.chdir(ORIG_PATH)
PATH = './'

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Using TensorFlow backend.


Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
def text_to_word_list(text):
    ''' Pre process and convert texts to a list of words '''
    text = str(text)
    text = text.lower()

    # Clean the text
    text = re.sub(r"[^A-Za-z0-9^&,!.\/'+-=]", " ", text)
    text = re.sub(r"what's", "what is ", text)
    text = re.sub(r"\'s", " ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"can't", "cannot ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r",", " ", text)
    text = re.sub(r"\.", " ", text)
    text = re.sub(r"!", " ! ", text)
    text = re.sub(r"\/", " ", text)
    text = re.sub(r"\^", " ^ ", text)
    text = re.sub(r"\+", " + ", text)
    text = re.sub(r"\-", " - ", text)
    text = re.sub(r"\=", " = ", text)
    text = re.sub(r"'", " ", text)
    text = re.sub(r"(\d+)(k)", r"\g<1>000", text)
    text = re.sub(r":", " : ", text)
    text = re.sub(r" e g ", " eg ", text)
    text = re.sub(r" b g ", " bg ", text)
    text = re.sub(r" u s ", " american ", text)
    text = re.sub(r"\0s", "0", text)
    text = re.sub(r" 9 11 ", "911", text)
    text = re.sub(r"e - mail", "email", text)
    text = re.sub(r"j k", "jk", text)
    text = re.sub(r"\s{2,}", " ", text)

    text = nltk.word_tokenize(text)

    return text

def concat_lyrics(df):
    for idx, row in df.iterrows():
        for i in range(3,7):
            if len(row[i]) > 0:
                row[2] = row[2] + ' & ' + row[i]
        row[2] = text_to_word_list(row[2])
        
    df = df.drop(columns=range(3,7))
    return df

# 1. Data Loading

In [0]:
df_lyrics = pd.read_csv('./lyrics_train_set.csv', header=-1)

In [0]:
df_lyrics = df_lyrics.fillna('')
df_lyrics = concat_lyrics(df_lyrics)
df_lyrics.columns = ['artist', 'title', 'lyrics']

In [0]:
df_feats = pd.read_csv('./features.csv')
df_feats['artist'] = ''
df_feats['title'] = ''

# Transform the path into (artist, title)
for idx, row in df_feats.iterrows():
    info = row['Unnamed: 0'].split('\\')[-1].replace('.mid', '').replace('_', ' ').split(' - ')[:2]
    df_feats.at[idx, 'artist'] = info[0].lower()
    df_feats.at[idx, 'title'] = info[1].lower()
    
# "depeche mode - enjoy the silence" shows with -2
# Code here

# Drop filepath feature
df_feats = df_feats.drop(columns=['Unnamed: 0'])

In [0]:
# Inner join between midi features & lyrics
df_merged = pd.merge(df_feats, df_lyrics, on=['artist', 'title'], how='inner')
# Drop duplicate songs (only 1)
df_merged = df_merged.drop_duplicates(subset=['artist', 'title'])

# 2. Word2Vec

## Local W2V

In [0]:
# Prepare lyric data for w2v training
w2v_train = []
words_set = set()

for song in df_merged['lyrics'].values:
    w2v_train.append(song)
    words_set |= {word for word in song}

In [8]:
word2vec_model = Word2Vec(w2v_train, size=300, window=5, min_count=1, workers=10)
vocab_len = len(word2vec_model.wv.vocab)
print('Word2Vec vocabulary size: {}'.format(vocab_len))

Word2Vec vocabulary size: 7227


In [0]:
words_list = list(words_set)
words2idx = {words_list[i] : i for i in range(len(words_list))}
idx2word = {i : words_list[i] for i in range(len(words_list))}

def get_one_hot(word_list):
    integer_encoded = [words2idx[word_list[i]] for i in range(len(word_list))]
    return to_categorical(integer_encoded, num_classes=vocab_len)


def invers_one_hot(one_hot_list):
    integer_encoded = argmax(one_hot_list, axis = -1)
    return [idx2word[integer_encoded[i]] for i in range(len(one_hot_list))]


def sample_probs(probs):
    idx = np.random.choice(list(range(len(probs))), p=probs.ravel())
    return idx2word[idx]

## External W2V

In [0]:
##############################################################################################################

In [0]:
# from gensim.models.word2vec import Word2Vec
# from gensim.models import KeyedVectors
# model = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True)

In [0]:
# lyrics_word_set = set()
# for idx, row in df_merged.iterrows():
#     lyrics_word_set |= {word for word in row['lyrics']}

In [0]:
# i=0
# unk_words = []
# for word in lyrics_word_set:
#     if word not in model.vocab:
#         i = i+1
#         unk_words.append(word)

# ctr = 0
# for idx, row in df_merged.iterrows():
#     for word in row['lyrics']:
#         if word in unk_words:
#             ctr = ctr + 1

In [0]:
###############################################################################################################

# 3. Midi Features Data

In [0]:
np_midifeats = df_merged.drop(columns=['artist', 'lyrics', 'title']).values

# Weird stringified NaN values
np_midifeats[np.where(np_midifeats == ' NaN')] = .0

# Normalize Data - can be any scaler
scaler = MinMaxScaler()
midifeats_scaled = scaler.fit_transform(np_midifeats)

num_midifeats = midifeats_scaled.shape[1]

# 4. Prepare Data

In [0]:
# Configs
window_size = 5
embedding_size = word2vec_model.vector_size

In [0]:
X_train = []
y_train = []
X_feat_train = []

for idx, song in enumerate(w2v_train):
  
    # pad beginning of songs
    song = list(('<UNK>',)*(window_size-1)) + song

    for i in range(0,len(song)-window_size,1):
        X_train.append([np.zeros((word2vec_model.vector_size,)) if song[i+j]=='<UNK>' else word2vec_model.wv[song[i+j]] for j in range(window_size)])
        X_feat_train.append(midifeats_scaled[idx])
        y_train.append(song[i+window_size])
        
X_train = np.array(X_train).reshape((len(X_train),window_size,300))
X_feat_train = np.array(X_feat_train)
y_train = np.array(y_train)
y_train_one_hot = get_one_hot(y_train)
y_train_one_hot.shape

assert y_train_one_hot.shape == (X_train.shape[0], vocab_len)

# 5. Model

In [13]:
def get_model_1():
    inp_lyrics = Input((window_size, embedding_size), name='lyric_embeddings')

    inp_midifeats = Input((num_midifeats,), name='midi_features')
    duplicated_midifeats = concatenate(list((inp_midifeats,)*window_size), axis=1)
    reshaped = Reshape((window_size, num_midifeats))(duplicated_midifeats)

    full_input = concatenate([inp_lyrics, reshaped], axis=2)

    l = CuDNNLSTM(256, return_sequences=True)(full_input)
    l = CuDNNLSTM(128, return_sequences=False)(l)
    fc = Dense(1024)(l)

    out = Dense(vocab_len, activation='softmax')(fc)

    model = Model([inp_lyrics, inp_midifeats], out)
    optimizer = Adam()
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model


model = get_model_1()
model.summary()

W0805 14:21:12.091784 139742920800128 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0805 14:21:12.146513 139742920800128 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0805 14:21:13.862171 139742920800128 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0805 14:21:14.270575 139742920800128 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0805 14:21:14.278253 139742920800128 deprecation_wrappe

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
midi_features (InputLayer)      (None, 1495)         0                                            
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 7475)         0           midi_features[0][0]              
                                                                 midi_features[0][0]              
                                                                 midi_features[0][0]              
                                                                 midi_features[0][0]              
                                                                 midi_features[0][0]              
__________________________________________________________________________________________________
lyric_embe

## Training

In [0]:
tbCallBack = TensorBoard(log_dir='./log', histogram_freq=0,
                         write_graph=True,
                         batch_size=32,
                         write_images=True)

In [15]:
history = model.fit(x=[X_train, X_feat_train], y=y_train_one_hot, epochs=10, verbose=1, callbacks=[tbCallBack], validation_split=0.1, batch_size=32)

W0805 14:21:32.264303 139742920800128 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0805 14:21:32.526704 139742920800128 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.



Train on 172123 samples, validate on 19125 samples


W0805 14:21:36.501655 139742920800128 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/callbacks.py:850: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.

W0805 14:21:36.503293 139742920800128 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/callbacks.py:853: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# 6. Advanced Model

In [0]:
def get_model_2():

    ########## Autoencoder Part ############

    nb_epoch = 50
    batch_size = 64

    # Input dimension size (first and last autoencoder layer size)
    input_dim = midifeats_scaled.shape[1]

    # Setting the auto encoder layers
    input_layer = Input(shape=(input_dim, ))

    encoder_l1 = Dense(256, activation="relu")(input_layer) 
    encoder_d1 = Dropout(0.2)(encoder_l1)
    encoder_l2 = Dense(64, activation="relu")(encoder_d1) 
    encoder = Dropout(0.2)(encoder_l2)
    decoder = Dense(256, activation="relu")(encoder)

    decoder = Dense(input_dim, activation="sigmoid")(decoder) 

    ae = Model(inputs=input_layer, outputs=decoder)
    
    ae_history = train_ae(ae)

    ################ NN ####################

    inp_midifeats = Input((num_midifeats,))

    # Encode midi features input using pre-trained layers
    enc_l1 = Dense(256, activation="relu", name="encoder_l1")(inp_midifeats) 
    encoded = Dense(64, activation="relu", name="encoder_l2")(enc_l1)

    # Copy features for each timestamp in LSTM
    duplicated_midifeats = concatenate(list((encoded,)*window_size), axis=1)
    reshaped = Reshape((window_size, 64))(duplicated_midifeats)

    inp_lyrics = Input((window_size, embedding_size))

    full_input = concatenate([inp_lyrics, reshaped], axis=2)

    l = CuDNNLSTM(256, return_sequences=True)(full_input)
    l = CuDNNLSTM(128, return_sequences=False)(l)
    fc = Dense(1024)(l)

    out = Dense(vocab_len, activation='softmax')(fc)

    model = Model([inp_lyrics, inp_midifeats], out)
    optimizer = Adam()
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    # Set pre-trained weights for the encoder part
    l1_w, l1_b, l2_w, l2_b = ae.get_weights()[:4]
    model.get_layer('encoder_l1').set_weights([l1_w, l1_b])
    model.get_layer('encoder_l2').set_weights([l2_w, l2_b])

    model.get_layer('encoder_l1').trainable = False
    model.get_layer('encoder_l2').trainable = False

    return model


def train_ae(ae, nb_epoch=64, batch_size=64, val_split=0.1):
    nb_epoch = 200
    batch_size = 64

    # Configures the learning process of the network
    ae.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])

    # Train the autoencoder based on the best epoch, returns history object
    history = ae.fit(midifeats_scaled, midifeats_scaled, epochs=nb_epoch, batch_size=batch_size, verbose=0, validation_split=0.1)

    return history

In [19]:
model_2 = get_model_2()

W0805 13:15:01.131497 140526145329024 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0805 13:15:01.139565 140526145329024 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0805 13:15:01.161312 140526145329024 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
W0805 13:15:01.223661 140526145329024 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.tra

## 7. Training 2

In [22]:
tbCallBack = TensorBoard(log_dir='./log', histogram_freq=0,
                         write_graph=True,
                         write_grads=True,
                         batch_size=32,
                         write_images=True)
history_2 = model_2.fit(x=[X_train, X_feat_train], y=y_train_one_hot, epochs=10, verbose=1, validation_split=0.1
                        ,callbacks=[tbCallBack]
                        )

  'Discrepancy between trainable weights and collected trainable'


Train on 172123 samples, validate on 19125 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# 7. Test Evaluation

In [0]:
avg_song_len = sum(df_merged['lyrics'].apply(len)) / df_merged.shape[0]

In [0]:
def query_model(model, word, midi_vector):

    # Prepare lyrics input
    inp_lyrics = np.zeros((1, window_size, embedding_size))
    inp_lyrics[0, window_size-1,:] = word2vec_model.wv[word]

    # Prepare midi input
    midi_vector = midi_vector

    predicted_song = [word]
    i = 0
    while (i < avg_song_len):
        preds = model.predict([inp_lyrics, midi_vector])
        pred_word = sample_probs(preds[0])
        
        predicted_song.append(pred_word)

        inp_lyrics = np.roll(inp_lyrics, -1, axis=1)
        inp_lyrics[0, window_size-1, :] = word2vec_model.wv[pred_word]

        i = i + 1

    forprint = ' '.join(predicted_song).replace(' & ', '\n')

    return forprint

In [0]:
# read test data

df_lyrics = pd.read_csv('./lyrics_test_set.csv', header=-1 )


for idx, row in df_lyrics.iterrows():
    row[2] = text_to_word_list(row[2])
    row[1] = str(row[1])[1:]

df_lyrics.columns = ['artist', 'title', 'lyrics']

df_feats = pd.read_csv('./features.csv')
df_feats['artist'] = ''
df_feats['title'] = ''

# Transform the path into (artist, title)
for idx, row in df_feats.iterrows():
    info = row['Unnamed: 0'].split('\\')[-1].replace('.mid', '').replace('_', ' ').split(' - ')[:2]
    df_feats.at[idx, 'artist'] = info[0].lower()
    df_feats.at[idx, 'title'] = info[1].lower()
    

# Drop filepath feature
df_feats = df_feats.drop(columns=['Unnamed: 0'])

# Inner join between midi features & lyrics
df_merged = pd.merge(df_feats, df_lyrics, on=['artist', 'title'], how='inner')
# Drop duplicate songs (only 1)
# df_merged = df_merged.drop_duplicates(subset=['artist', 'title'])

np_midifeats = df_merged.drop(columns=['artist', 'lyrics', 'title']).values

# Weird stringified NaN values
np_midifeats[np.where(np_midifeats == ' NaN')] = .0

# Normalize Data - can be any scaler
scaler = MinMaxScaler()
midifeats_scaled = scaler.fit_transform(np_midifeats)

num_midifeats = midifeats_scaled.shape[1]


# generate test songs:

melodies = [midifeats_scaled[i:i+1] for i in range(5)]
words = ['love', 'believe', 'rage' ]
songs = []
for melody in melodies:
    for word in words:
        songs.append(query_model(model, word, melody))

In [9]:
# TEST SONGS - LOAD PREMADE SONGS

songs1 = np.load('model1_songs.npy')
songs2 = np.load('model2_songs.npy')

songs1 = [song.replace(' \n ', '\n') for song in songs1]
songs2 = [song.replace(' \n ', '\n') for song in songs2]

w = ['love', 'believe', 'rage']

print('############ Songs generated by Basic Model #############')
for i, song in enumerate(songs1):
  print('*'*60)
  print('Artist: ' + df_merged['artist'][int(i/3)])
  print('Title: ' + df_merged['title'][int(i/3)])
  print('Starting word: {}\n'.format(w[i%3]))
  print(song)

  
  
print('############ Songs generated by Advanced Model #############')
for i, song in enumerate(songs2):
  print('*'*60)
  print('Artist: ' + df_merged['artist'][int(i/3)])
  print('Title: ' + df_merged['title'][int(i/3)])
  print('Starting word: {}\n'.format(w[i%3]))
  print(song)

############ Songs generated by Basic Model #############
************************************************************
Artist: aqua
Title: barbie girl
Starting word: love

love start start bus suit sad shadow that find deep find sad sad sad surely that shadow must find find sad bus bus wo die find find find sad sad sad sad find start sad earl find walls start glad find find start die find sad sad die find deep shadow bus bus that find find die name die quick deep sad recognize earl find start find die find sad sad die deep find walls start sad die find find find find find find quick gentle sad find stop die die
************************************************************
Artist: aqua
Title: barbie girl
Starting word: believe

believe bye fun earl earl shadow that find find die sad sad find sad sad find deep sad find walls quick bus that find find die die die quick special that find start sad sad sad sad find die shadow bus bus that find find die sad die sad sad sad my
shadow different 

# 8. TensorBoard

In [0]:
%load_ext tensorboard
%tensorboard --logdir name1:./log/