# Testing - Model 2

In this notebook we are handling the test file.
<br>This is an example run. (Requires pretrained model to run)

In [1]:
import Model2Base as mb
import pandas as pd
import numpy as np
from nltk.tokenize import RegexpTokenizer
from keras.preprocessing.text import Tokenizer

import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


**First** - Handling vocabulary from the first data

In [2]:
df = pd.read_csv("data/lyrics_train_set.csv",header=None)
df = df.fillna('')
df[2] = df[2] + df[3] + df[4] + df[5] + df[6] 
df=df.drop([3,4,5,6],axis=1)
df.columns=['singer','song','lyrics']

df['song_num']=df.index

In [3]:
df['clean_lyrics'] = df.apply(lambda row: mb.clean_text(row.lyrics),axis=1)
df['singer_song']= df.apply(lambda row: mb.clean_singer_song(row['singer'],row['song']),axis=1)
tokenizer = RegexpTokenizer(r'\w+|&+')
df["tokens"] = df["clean_lyrics"].apply(tokenizer.tokenize)

Midis data for information from the training model

In [4]:
midis_vec = mb.create_midis_vector(df)
number_of_sequences = mb.get_median_length(midis_vec)
songs_dict = {}
for midi in midis_vec:
    midi_file = midi[1]
    mat = mb.create_midi_matrix(midi_file, number_of_sequences)
    songs_dict[midi[0]] = mat
    
df=df[df.song_num.isin(songs_dict.keys())]
df['clean_lyrics'] = df.apply(lambda row: mb.clean_text(row.lyrics),axis=1)
df['singer_song']=df.apply(lambda row: mb.clean_singer_song(row['singer'],row['song']),axis=1)

Tokenizing the different words in our model

In [5]:
tokenizer = RegexpTokenizer(r'\w+')
df["tokens"] = df["clean_lyrics"].apply(tokenizer.tokenize)
df['ln']= df["tokens"].str.len()

In [6]:
all_words = [word for tokens in df["tokens"] for word in tokens]
sentence_lengths = [len(tokens) for tokens in df["tokens"]]
VOCAB = sorted(list(set(all_words)))
print("%s words total, with a vocabulary size of %s" % (len(all_words), len(VOCAB)))
print("Max sentence length is %s" % max(sentence_lengths))

180248 words total, with a vocabulary size of 7525
Max sentence length is 1481


Creating tokenizer from the train data so we can reverse the sequences to text

In [7]:
VOCAB_SIZE = len(VOCAB)
tokenizer = Tokenizer(num_words=VOCAB_SIZE)
tokenizer.fit_on_texts(df["clean_lyrics"].tolist())

## Aranging test set for testing

In [8]:
test_df = pd.read_csv("data/lyrics_test_set.csv",header=None)
test_df = test_df.fillna('')
test_df.columns=['singer','song','lyrics']
test_df['clean_lyrics'] = test_df.apply(lambda row: mb.clean_text(row.lyrics),axis=1)
test_sequences = tokenizer.texts_to_sequences(test_df["clean_lyrics"].tolist())

Create midis matrices for all test songs

In [9]:
midis_vec = mb.test_create_midis_vector(test_df)
number_of_sequences = 221
songs_dict = {}
for midi in midis_vec:
    midi_file = midi[1]
    mat = mb.create_midi_matrix(midi_file, number_of_sequences)
    songs_dict[midi[0]] = mat

## Loading the model

In [10]:
model_name = 'model2_2019_07_26__1512'
model = mb.load_model(model_name)

W0727 19:16:20.399608 13552 deprecation_wrapper.py:119] From C:\Users\TomerMeirman\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0727 19:16:20.416469 13552 deprecation_wrapper.py:119] From C:\Users\TomerMeirman\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0727 19:16:20.417469 13552 deprecation_wrapper.py:119] From C:\Users\TomerMeirman\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0727 19:16:20.747279 13552 deprecation_wrapper.py:119] From C:\Users\TomerMeirman\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0727 19:16:20.753

In [11]:
input_words = [test_sequences[x][0] for x in range(0,len(test_sequences))]
melodies = [np.expand_dims(songs_dict[i], 0) for i in range(len(songs_dict))]

## Creating songs using our model

In [12]:
# np.random.seed(123)
num_of_songs = 2

songs = []
for ii in range(len(input_words)):
    song = mb.create_song(model, input_words[ii],melodies[ii],lyrics_length=50,num_of_songs=num_of_songs, random_state=5)
    songs.append(song)

Reverse the sequences to text - to view our newly created songs

In [13]:
reverse_word_map = [tokenizer.sequences_to_texts(song) for song in songs]
reversed_sentences = []
for i in range(len(input_words)):
    reversed_sentences.append([x.split('eos') for x in reverse_word_map[i]])
reversed_sentences

[[['close to you ',
   ' you know you want to know ',
   ' its not to me you ',
   ' you know the things that you want ',
   ' ',
   ' ooh ooh ooh oh i know ',
   ' baby you still im in my life ',
   ' youre so many i say ',
   ' you still im'],
  ['close to me ',
   ' to walk away ',
   ' when youre gone at me ',
   ' its been to me so ',
   ' and it if you feel like so ',
   ' its been so you wont you just to me to you ',
   ' im in my arms ',
   ' and you want me to']],
 [['if it doesnt really matter ',
   ' im not more ',
   ' i cant give you ',
   ' now i want to be weird ',
   ' and so many i have to go ',
   ' im not more more ',
   ' do it right ',
   ' i cant believe the things that i want you ',
   ' oh'],
  ['if you were my name ',
   ' my loneliness is moving back ',
   ' oh you got to be ',
   ' cant you ',
   ' im a man who have before ',
   ' i didnt know why it was me ',
   ' when you see the same ',
   ' you got to be weird ',
   ' im in']],
 [['dear ',
   ' i am i can