# Testing - Model 1
In this notebook we are handling the test file.
<br>This is an example run. (Requires pretrained model to run)

In [1]:
import Model1Base as mb
import pandas as pd
from nltk.tokenize import RegexpTokenizer
from keras.preprocessing.text import Tokenizer

Using TensorFlow backend.


In [2]:
import warnings
warnings.filterwarnings('ignore')

**Train File** - first, create a vocabulary that matches the first training file

In [3]:
df = pd.read_csv("lyrics_train_set.csv",header=None)
df = df.fillna('')
df[2] = df[2] + df[3] + df[4] + df[5] + df[6] 
df=df.drop([3,4,5,6],axis=1)
df.columns=['singer','song','lyrics']

In [4]:
df['clean_lyrics'] = df.apply(lambda row: mb.clean_text(row.lyrics),axis=1)
df['singer_song']= df.apply(lambda row: mb.clean_singer_song(row['singer'],row['song']),axis=1)
tokenizer = RegexpTokenizer(r'\w+|&+')
df["tokens"] = df["clean_lyrics"].apply(tokenizer.tokenize)

In [5]:
midi_df = pd.read_pickle("data/melody_df.pkl")

In [6]:
df_concat = pd.merge(df,midi_df,how='inner', left_on='singer_song', right_on='filename')

In [7]:
all_words = [word for tokens in df_concat["tokens"] for word in tokens]
sentence_lengths = [len(tokens) for tokens in df_concat["tokens"]]
VOCAB = sorted(list(set(all_words)))
print("%s words total, with a vocabulary size of %s" % (len(all_words), len(VOCAB)))
print("Max sentence length is %s" % max(sentence_lengths))

176682 words total, with a vocabulary size of 7474
Max sentence length is 1481


In [9]:
VOCAB_SIZE = len(VOCAB)

tokenizer = Tokenizer(num_words=VOCAB_SIZE)
tokenizer.fit_on_texts(df_concat["clean_lyrics"].tolist())

## Aranging test set for testing

In [10]:

test_df = pd.read_csv("lyrics_test_set.csv",header=None)
test_df = test_df.fillna('')
test_df.columns=['singer','song','lyrics']
test_df['clean_lyrics'] = test_df.apply(lambda row: mb.clean_text(row.lyrics),axis=1)
test_sequences = tokenizer.texts_to_sequences(test_df["clean_lyrics"].tolist())

**Pretty Midi** - Create vector using the midi files (pretty_midi objects)

In [11]:
import pretty_midi
midis = []
for row in test_df.itertuples():
    name = (str(row[1])+" -"+str(row[2])).replace(' ','_') + '.mid'
    try:
        midis.append(pretty_midi.PrettyMIDI("midi_files/"+name))
    except:
        midis.append(None)
        
"""Instead of using this function, use pickle: midis_vecotr_pickle.pkl"""
midis_vector_list = []
for midi in midis:
    if midi != None:
        midis_vector_list.append(mb.get_song_vector(midi))
    else:
        midis_vector_list.append(None)   

In [12]:
input_words = [test_sequences[x][0] for x in range(0,len(test_sequences))]
melodies = [midis_vector_list[i] for i in range(len(midis_vector_list))]

## Loading the model

In [13]:
model = mb.load_model('model_2019_07_20__2343')

W0727 17:34:08.908857   440 deprecation_wrapper.py:119] From C:\Users\TomerMeirman\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0727 17:34:08.932829   440 deprecation_wrapper.py:119] From C:\Users\TomerMeirman\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0727 17:34:08.934789   440 deprecation_wrapper.py:119] From C:\Users\TomerMeirman\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0727 17:34:09.362643   440 deprecation_wrapper.py:119] From C:\Users\TomerMeirman\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0727 17:34:09.369

## Creating songs using our model

In [14]:
# np.random.seed(123)
num_of_songs = 2

songs = []
for ii in range(len(input_words)):
    song = mb.create_song(model, input_words[ii],melodies[ii],lyrics_length=50,num_of_songs=num_of_songs, random_state=5)
    songs.append(song)

Reverse the sequences to text - to view our newly created songs

In [20]:
reverse_word_map = [tokenizer.sequences_to_texts(song) for song in songs]
reversed_sentences = []
for i in range(len(input_words)):
    reversed_sentences.append([x.split('eos') for x in reverse_word_map[i]])
reversed_sentences

[[['close to you ',
   ' i know that i cant cause i cant believe its time ',
   ' i dont remember all i time ',
   ' i dont know where to start ',
   ' i want to spend this time of my life ',
   ' what ive been and i have never been sent to'],
  ['close to you ',
   ' and i could be wrong for all i can do i love you ',
   ' and i will be waiting ',
   ' like a dream that i could do ',
   ' and i know that i could not know ',
   ' i dont want to be your love ',
   ' baby']],
 [['if you want to know if it was you and i ',
   ' but you didnt hear me ',
   ' who they are ',
   ' but you dont know where they say im gonna make it ',
   ' i know that i love you ',
   ' i never want to do you know ',
   ' how'],
  ['if its all right ',
   ' you want it to be the one to love you ',
   ' but i dont need to live in your eyes ',
   ' i know a little chance to come true ',
   ' at the copa co copacabana ',
   ' music and passion were always in fashion ',
   ' at']],
 [['dear ',
   ' but when im in 