In [21]:
%matplotlib inline
import matplotlib as mpl
from matplotlib import pyplot as plt

import os
import glob
import pandas as pd
import numpy as np
import re
import pickle
from sklearn.model_selection import train_test_split
import keras
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Activation, Embedding, Flatten, Dropout, Conv1D, MaxPooling1D, GlobalMaxPooling1D, Bidirectional
from keras import backend
from keras.backend.tensorflow_backend import set_session
from keras import optimizers
from keras.layers import TimeDistributed
from keras.preprocessing import sequence
from keras.models import Sequential
import tensorflow as tf
import os
from sklearn.model_selection import KFold
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

Using TensorFlow backend.


In [4]:
#additional phonetics: numbers and acronyms etc. rename columns for concat
x = pd.read_csv('C:/Users/ian.ashmore/Desktop/new_phon_train2.csv')
x['Phoneme'] = x['Phoneme'].str[1:-1]
dfx = pd.read_csv('C:/Users/ian.ashmore/Downloads/nums.csv', usecols=['numeric', 'uk'])
dfx['uk'] = dfx['uk'].str[1:-1]
dfx['uk'] = dfx['uk'].str.replace('ˈ','').str.replace('ˌ','').str.strip()
dfx = dfx.rename(columns={'numeric':'Word', 'uk':'Phoneme'})
dfx = dfx[~dfx.Word.isin(df.Word)]

#FULL phoneme dataset (scraped from dictonary.com using beautiful soup for python)
df = pd.read_csv('C:/Users/ian.ashmore/Downloads/charlie_EXTENDED_TS.csv')
df = df.drop(columns=['Unnamed: 0'])
df = df[~df.Phoneme.str.contains('x')]

df.head()

Unnamed: 0,Word,Phoneme
0,manicure-set,mæn.ɪ.kjʊə.set
1,zairean,zaɪɪə.ri.ən
2,embark-on-upon-something,ɪmbɑrk
3,glycol,ɡlaɪ.kɒl
4,sublimate,sʌb.lɪ.meɪt


In [7]:
#concatenate dataframes into one, drop duplicates and shuffle:
df2 = pd.concat([df, x, dfx], axis=0, ignore_index=True)
df2 = df2.drop_duplicates(subset=['Word']).reset_index(drop=True)
df2 = df2.sample(frac=1)

#clean phoneme set to remove ambiguities between UK & US pronunciations and drop phonemes that are too 'subtle' for rap music
df2['Phoneme'] = df2.Phoneme.str.replace('ɝ','ɜ')
df2['Phoneme'] = df2.Phoneme.str.replace('aː','æ')
df2['Phoneme'] = df2.Phoneme.str.replace('lː','lɪ')
df2['Phoneme'] = df2.Phoneme.str.replace('tː','t')
df2['Phoneme'] = df2.Phoneme.str.replace('ʃː','ʃ')
df2['Phoneme'] = df2.Phoneme.str.replace('zː','z')
df2['Phoneme'] = df2.Phoneme.str.replace('ɒː','ɔː')
df2['Phoneme'] = df2.Phoneme.str.replace('əː','ə.')
df2['Phoneme'] = df2.Phoneme.str.replace('ɪː','iː')

#fix unique problem from scrape with below char
for index, row in df2.iterrows():
    row.Phoneme = row.Phoneme.replace('ɪɪ', 'ɪ.ɪ')  

df2.head()

Unnamed: 0,Word,Phoneme
47270,coming-of-age,kʌm.ɪŋ.əv.eɪdʒ
29652,father-confessor,fɑː.ðə.kənfes.ər
38162,child-free,tʃaɪld.friː
6819,held,held
10688,cloistered,klɔɪ.stəd


In [10]:
#identify phonemes with double character represetnation (to allow them to produce a single token)
doubles = ['iː', 'uː', 'ɔː', 'ɜː', 'ɑː', 'æː', 'ɔɪ', 'ʊː']

#create empty lists for unique phonics and input sequences
combined_phons = []
unique_phons = []

#iterate through dataframe and extract unique phonics and split training set into sequences
for index, row in df2.iterrows():
    x = row.Phoneme
    phon_vals = [i for i in x]
    phon_sets = [''.join(x[i:i+2]) for i in range(len(x))]
    new_phon = []
    for y, combination in enumerate(phon_sets):
        if combination in doubles:
            new_phon.append(str(combination))
            if combination not in unique_phons:
                unique_phons.append(combination)
        else:
            new_phon.append(str(combination[0])) 
            if combination[0] not in unique_phons:
                unique_phons.append(combination[0])
    final_phon = []
    nxt=False
    for phon in new_phon:
        if len(phon)>1:
            final_phon.append(phon)
            nxt=True
        elif phon == 'ɔ':
            phon = 'ɔː'
            final_phon.append(phon)
            nxt=False
        else:
            if nxt==False:
                final_phon.append(phon)
            else:
                nxt=False
    combined_phons.append(final_phon)
    
df2['combined_phons'] = combined_phons

df2.head()

Unnamed: 0,Word,Phoneme,combined_phons
47270,coming-of-age,kʌm.ɪŋ.əv.eɪdʒ,"[k, ʌ, m, ., ɪ, ŋ, ., ə, v, ., e, ɪ, d, ʒ]"
29652,father-confessor,fɑː.ðə.kənfes.ər,"[f, ɑː, ., ð, ə, ., k, ə, n, f, e, s, ., ə, r]"
38162,child-free,tʃaɪld.friː,"[t, ʃ, a, ɪ, l, d, ., f, r, iː]"
6819,held,held,"[h, e, l, d]"
10688,cloistered,klɔɪ.stəd,"[k, l, ɔɪ, ., s, t, ə, d]"


In [12]:
df3 = df2.sample(frac=1)
#create input and target lists.
input_texts = list(df3['Word'])
target_texts = df3['combined_phons']

#create 
input_chars = []
for word in input_texts:
    for char in word:
        if char not in input_chars:
            input_chars.append(char)
            
input_chars.append(' ')
input_chars = list(set(input_chars))
print("input chars:", input_chars)

#create dictionary to translate input characters to integers
input_token_index = dict([(char, i) for i, char in enumerate(input_chars)])
reverse_input_token_index = dict((i, char) for char, i in input_token_index.items())
print("input token index length", len(input_token_index))

input chars: ['t', ' ', 'z', 'e', '2', '6', 'x', 'q', 'r', 'p', '4', 'm', 'c', 'd', 'v', 'i', 'b', 'y', '8', '-', 'n', '7', 's', '9', 'k', 'u', 'f', 'h', 'g', '0', 'a', 'j', 'o', '3', '1', '5', 'l', 'w']
input token index length 38


In [14]:
#create dictionary to translate output characters to integers
target_token_index = dict([(char, i) for i, char in enumerate(unique_phons)])
reverse_target_token_index = dict((i, char) for char, i in target_token_index.items())
print(unique_phons)
print("target token index length", len(target_token_index))

['k', 'ʌ', 'm', '.', 'ɪ', 'ŋ', 'ə', 'v', 'e', 'd', 'ʒ', 'f', 'ɑː', 'ː', 'ð', 'n', 's', 'r', 't', 'ʃ', 'a', 'l', 'iː', 'h', 'ɔɪ', 'θ', 'ɜː', 'i', 'uː', 'p', 'æ', 'j', 'z', 'ʊ', 'ɒ', 'ɡ', 'ɔː', 'w', 'b', 'u', 'ɑ', 'ɚ', 'o', 'ɔ', 'ɜ', 'æː', 'ʊː']
target token index length 47


In [17]:
#details on training set:

input_characters = sorted(list(input_chars))
target_characters = sorted(list(unique_phons))
num_encoder_tokens = len(input_token_index)
num_decoder_tokens = len(target_token_index)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])
max_len = max(max_encoder_seq_length, max_decoder_seq_length)

print('Number of samples:', len(input_texts)) #number of training examples
print('input token index length:', len(input_token_index)) #number of input tokens
print('target token index length:', len(target_token_index)) #number of output tokens
print('Max sequence length for inputs:', max_encoder_seq_length) #maximum length of input word
print('Max sequence length for outputs:', max_decoder_seq_length) #maximum length of output word
print('maximum input length:', max_len)


Number of samples: 54611
input token index length: 38
target token index length: 47
Max sequence length for inputs: 46
Max sequence length for outputs: 64
maximum input length: 64


In [20]:
#split data set into training and validation sets.
txtin_train, txtin_val, txtout_train, txtout_val = train_test_split(input_texts, target_texts, test_size=0.075)

#create empty arrays for training and validation data
encoder_input_data_train = np.zeros((len(txtin_train), max_len),dtype='float32')
decoder_input_data_train = np.zeros((len(txtin_train), max_len),dtype='float32')
encoder_input_data_val = np.zeros((len(txtin_val), max_len),dtype='float32')
decoder_input_data_val = np.zeros((len(txtin_val), max_len),dtype='float32')
target_data_train = np.zeros((len(txtout_train), max_len, len(target_token_index)), dtype='float32')
target_data_val = np.zeros((len(txtout_val), max_len, len(target_token_index)), dtype='float32')


for i, (txt_in, txt_out) in enumerate(zip(txtin_train, txtout_train)):
    for t, char in enumerate(txt_in):
        encoder_input_data_train[i,t] = input_token_index[char]
    encoder_input_data_train[i,t+1:] = input_token_index[' ']
    
for i, (txt_in, txt_out) in enumerate(zip(txtin_val, txtout_val)):
    for t, char in enumerate(txt_in):
        encoder_input_data_val[i,t] = input_token_index[char]
    encoder_input_data_val[i,t+1:] = input_token_index[' ']

for i, (decoder_data) in enumerate(decoder_input_data_train):
    for t, char in enumerate(decoder_data):
        new_in = int(char)
    target_data_train[i,t,new_in] = 1.
    
for i, (decoder_data) in enumerate(decoder_input_data_val):
    for t, char in enumerate(decoder_data):
        new_in = int(char)
    target_data_val[i,t,new_in] = 1.
    
print("encoder input shape:", encoder_input_data_train.shape, "target shape:", target_data_train.shape)

encoder input shape: (50515, 64) target shape: (50515, 64, 47)


In [61]:
#assign deep learning parameters
vocab = len(input_token_index)+1
w_dim = 600         #dimensionality of word vector embeddings
latent_dim = 800    #hidden states inside lstm gates (equivalent to hidden nodes in densely connected layers)
                    #NOTE THIS NUMBER IS DOUBLED BECAUSE THE WE ARE USING BI-DIRECTIONAL LSTM: 800 IN EACH DIR
hidden = 400        #nodes in densely conected layer
epochs = 1000       # maximum epocs
batch_size = 64     #batch size of input: samples sent through before each back propagation pass
num_steps = max_len
num_epochs = epochs

#callback to halt training if validation loss stops decreasing and restore the best score
Callbacks = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=5e-8, patience=3, verbose=0, mode='auto', restore_best_weights=True)

#create deep learning model: uses keras api: sequential model, model with tensorflow backend
model8 = Sequential()
#embedding layer converts input tokens to 300 dimensional vectors, initialised with random floats (trained with NN)
model8.add(Embedding(vocab, w_dim, input_shape=(encoder_input_data_train[0].shape), trainable=True))
#triple stacked bi-directional lstm layers, set to RETURN SEQ, (for sequential output)
model8.add(Bidirectional(LSTM(latent_dim, return_sequences=True, dropout=0.15)))
model8.add(Bidirectional(LSTM(latent_dim, return_sequences=True, dropout=0.15)))
model8.add(Bidirectional(LSTM(latent_dim, return_sequences=True, dropout=0.15)))
#time dependent hidden dense layer assists classification while retaining sequential nature of input
model8.add(TimeDistributed(Dense(hidden)))
model8.add(Dropout(0.25))
model8.add(Activation('tanh'))
#time dependent output layer predicts a sequence max_len long
model8.add(TimeDistributed(Dense(len(target_token_index)), input_shape=(max_len,len(target_token_index))))
model8.add(Activation('softmax'))
print(model8.summary())

#compile model with optimiser and loss function
model8.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_13 (Embedding)     (None, 64, 600)           23400     
_________________________________________________________________
bidirectional_27 (Bidirectio (None, 64, 1600)          8966400   
_________________________________________________________________
bidirectional_28 (Bidirectio (None, 64, 1600)          15366400  
_________________________________________________________________
bidirectional_29 (Bidirectio (None, 64, 1600)          15366400  
_________________________________________________________________
time_distributed_15 (TimeDis (None, 64, 400)           640400    
_________________________________________________________________
dropout_1 (Dropout)          (None, 64, 400)           0         
_________________________________________________________________
activation_12 (Activation)   (None, 64, 400)           0         
__________

In [None]:
#This would train the model, but needs GPU so instead load trained model below

##history = model8.fit(encoder_input_data_train, target_data_train,
#                     batch_size=batch_size,
#                     shuffle=True,
#                     epochs=epochs,
#                     validation_data=[encoder_input_data_val,target_data_val], 
#                     callbacks=[Callbacks])
##evaluate the model
#score, acc = model8.evaluate(encoder_input_data_val, target_data_val, batch_size=64, verbose=1)

#print('loss:', score, "accuracy:", acc)

In [56]:
#load pre-trained model and encoders

model = keras.models.load_model('C:/Users/ian.ashmore/Desktop/MLD_1001/phoneme_translation_model_POINT1001.h5')
print(model.summary())

with open('C:/Users/ian.ashmore/Desktop/MLD_1001/input_token_index.pickle', 'rb') as handle:
    input_token_index = pickle.load(handle)

with open('C:/Users/ian.ashmore/Desktop/MLD_1001/reverse_input_token_index.pickle', 'rb') as handle:
    reverse_input_token_index = pickle.load(handle)

with open('C:/Users/ian.ashmore/Desktop/MLD_1001/target_token_index.pickle', 'rb') as handle:
    target_token_index = pickle.load(handle)

with open('C:/Users/ian.ashmore/Desktop/MLD_1001/reverse_target_token_index.pickle', 'rb') as handle:
    reverse_target_token_index = pickle.load(handle)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 64, 600)           23400     
_________________________________________________________________
bidirectional_7 (Bidirection (None, 64, 1600)          8966400   
_________________________________________________________________
bidirectional_8 (Bidirection (None, 64, 1600)          15366400  
_________________________________________________________________
bidirectional_9 (Bidirection (None, 64, 1600)          15366400  
_________________________________________________________________
time_distributed_5 (TimeDist (None, 64, 400)           640400    
_________________________________________________________________
activation_5 (Activation)    (None, 64, 400)           0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 64, 400)           0         
__________

In [39]:
#create a dictionary of words:phonemes, initially with the training set, but to be appended.
phon_dict = dict(zip(df3.Word,df3.Phoneme))
print(len(phon_dict))

54611


In [40]:
#load songs - scraped from lyrics.com: extensively preprocessed elsewhere
df_songs = pd.read_pickle("C:/Users/ian.ashmore/Desktop/81k_songs.pkl")
df_songs.head()

Unnamed: 0,song,lyrics
0,/lyric/5564459/Nellyville,"[welcome to nellyville, where all newborns get..."
1,/lyric/24660518/Gettin%27+It+Started,"[hey, boo, wassup?, you lookin' good, thank yo..."
2,/lyric/5564461/Hot+in+Herre,"[hot in, so hot in here! so hot in, hot, oh!, ..."
3,/lyric/5564462/Dem+Boyz,"[like, oh, better get them back, watch them ni..."
4,/lyric/5564463/Oh+Nelly,"[here we go again, real smooth, yeah, here we ..."


In [41]:
#final cleaning step to remove empty list entries
new_lyrics = []

for index, row in df_songs.iterrows():
    lyrics = row.lyrics
    lyrics2 = [i.strip() for i in lyrics if i!=' ']
    new_lyrics.append(lyrics2)
    
df_songs['lyrics_cleaned'] = new_lyrics
df_songs.head()

Unnamed: 0,song,lyrics,lyrics_cleaned
0,/lyric/5564459/Nellyville,"[welcome to nellyville, where all newborns get...","[welcome to nellyville, where all newborns get..."
1,/lyric/24660518/Gettin%27+It+Started,"[hey, boo, wassup?, you lookin' good, thank yo...","[hey, boo, wassup?, you lookin' good, thank yo..."
2,/lyric/5564461/Hot+in+Herre,"[hot in, so hot in here! so hot in, hot, oh!, ...","[hot in, so hot in here! so hot in, hot, oh!, ..."
3,/lyric/5564462/Dem+Boyz,"[like, oh, better get them back, watch them ni...","[like, oh, better get them back, watch them ni..."
4,/lyric/5564463/Oh+Nelly,"[here we go again, real smooth, yeah, here we ...","[here we go again, real smooth, yeah, here we ..."


In [42]:
#occasionaly the algorithm predicts the same phoneme twice. this can never happen, so we remove the second occurance
def remove_adjacent(phoneme):
    Phoneme = phoneme.split('.')
    p_start = [Phoneme[0]]
    for item in Phoneme:
        if item!=p_start[-1]:
            p_start.append(item)
    final_phon = '.'.join(p_start)
    return final_phon

In [63]:
#define function to translate any word input into a phoneme: converts to input tokens, feeds input tokens to model, obtains
#prediction array and converts to output characters, removes back-to-back duplicates, then returns

def phoneme_translator(unknowns):
    encoder_input = np.zeros((len(unknowns), max_len),dtype='float32')
    for i, (word) in enumerate(unknowns):
        if len(word.split(r'.'))>1:
            word_list = word.split(r'.')
            big_list = []
            for w in word_list:
                for t, char in enumerate(w):
                    if char in input_token_index.keys():
                        encoder_input[i, t] = input_token_index[char]
                    else:
                        encoder_input[i, t] = input_token_index[' ']
                    encoder_input[i, t+1:] = input_token_index[' ']
                prediction = model.predict_classes(encoder_input)
                phoneme = ' '
                last = ''
                for j in range(max_len):
                    predict_token = prediction[:, j]
                    pred_char = reverse_target_token_index[predict_token[0]]
                    if pred_char!=last:
                        phoneme+=pred_char.strip()
                        last = pred_char.strip()
                    phoneme = phoneme.strip()
                big_list.append(phoneme)
            phon_fin = '.'.join(big_list)
        
        elif len(word.split('-'))==1:
            for t, char in enumerate(word):
                if char in input_token_index.keys():
                    encoder_input[i, t] = input_token_index[char]
                else:
                    encoder_input[i, t] = input_token_index[' ']
                encoder_input[i, t+1:] = input_token_index[' ']
            prediction = model.predict_classes(encoder_input)
            phoneme = ' '
            last = ''
            for j in range(max_len):
                predict_token = prediction[:, j]
                pred_char = reverse_target_token_index[predict_token[0]]
                if pred_char!=last:
                    phoneme+=pred_char.strip()
                    last = pred_char.strip()
                phon_fin = phoneme.strip()
            
        else:
            word_list = word.split('-')
            big_list = []
            for w in word_list:
                if w=='uh':
                    phoneme='ʌ'
                else:
                    for t, char in enumerate(w):
                        if char in input_token_index.keys():
                            encoder_input[i, t] = input_token_index[char]
                        else:
                            encoder_input[i, t] = input_token_index[' ']
                        encoder_input[i, t+1:] = input_token_index[' ']
                    prediction = model.predict_classes(encoder_input)
                    phoneme = ' '
                    last = ''
                    for j in range(max_len):
                        predict_token = prediction[:, j]
                        pred_char = reverse_target_token_index[predict_token[0]]
                        #print(pred_char)
                        if pred_char!=last:
                            phoneme+=pred_char.strip()
                            last = pred_char.strip()
                        phoneme = phoneme.strip()
                big_list.append(phoneme)
            phon_fin = ' '.join(big_list)
            
    phon_fin4 = remove_adjacent(phon_fin)
   
    return phon_fin4

In [49]:
#take a random sample of two songs
df_songs_short = df_songs.sample(n=2, replace=False)
df_songs_short.head()

Unnamed: 0,song,lyrics,lyrics_cleaned
27917,/lyric/21899545/Big+Heavy,"[hey big daddy what you doing tonight, i got s...","[hey big daddy what you doing tonight, i got s..."
29976,/lyric/28836490/The+Children%27s+Song,"[children hold on, to your dreams, believe in ...","[children hold on, to your dreams, believe in ..."


In [None]:
#THIS WOULD PROCESS THE SONGS, BUT AGAIN, IT REQUIRES GPU

#run through songs selected, replace identified 'problem' words, identify if the word is already in a dictionary.
#if it is, replace it with dict version.
#if not, predict from model and update dictionary.

import re
trans_dict = {}

reg = re.compile("[^A-Za-z0-9]+")
new_songs = []
i=0
for index, row in df_songs_short.iterrows():
    full_lyrics = row.lyrics
    full_lyrics = [x for x in full_lyrics if x!=' '] #drop empty list entries
    new_lyrics = []
    for lyric in full_lyrics:
        lyric = lyric.strip()
        new_lyric = []
        words = lyric.split(' ')
        for word in words:
            word = word.replace('f*ck', 'fuck')
            word = word.replace(r'..',' ').replace('\'','')
            word2 = re.sub(reg, '', word) 
            if word2 in phon_dict.keys():
                new_word = phon_dict[word2]
                if new_word[-1]=='.':
                    new_word=new_word[:-1]
                new_lyric.append(new_word)
                #print('dictionary', word2, new_word)     
            elif word2 in trans_dict.keys():
                new_word = trans_dict[word2]
                new_lyric.append(new_word)
                #print('previously learned', word2, new_word)
            elif word2=='187':
                new_word = 'wʌn.eɪt.sev.ən'
                new_lyric.append(new_word)
            elif word2=='uh':
                new_word = 'ʌ'
                new_lyric.append(new_word)
            elif word2=='okay':
                new_word = 'əʊ.keɪ'
                new_lyric.append(new_word)
            elif 'gucci' in word2:
                new_word = 'ɡuːtʃi'
                new_lyric.append(new_word)
            else:
                new_word = phoneme_translator([word])
                new_word = new_word.strip().lower() 
                trans_dict.update({str(word2): new_word})
                if len(new_word)>1 and new_word[-1]=='.':
                    new_word = new_word[:-1]
                new_lyric.append(new_word)
                print('new word', word, new_word)
        joined_lyric = ' '.join(new_lyric)
        new_lyrics.append(joined_lyric)
        print("*OLD:--->", lyric,"*NEW:--->", joined_lyric)
    new_songs.append(new_lyrics)
    print("DONE: ",i, "song:", new_songs)
    i+=1

In [66]:
#INSTEAD LOAD THE SONGS PROCESSED ON THE GPUs

path = 'C:/Users/ian.ashmore/Desktop/data/'
df = pd.DataFrame()

for filename in glob.glob(os.path.join(path, '*.pickle')):
    x = pd.read_pickle(filename)
    df = pd.concat([df, x], axis=0, ignore_index=True)
    
df.head()  

Unnamed: 0,song,lyrics,lyrics_cleaned,phoneme_lyrics
0,/lyric/5564459/Nellyville,"[welcome to nellyville, where all newborns get...","[welcome to nellyville, where all newborns get...","[wel.kəm tuː nel.i.vl, weər ɔːl njuː.bənz ɡet ..."
1,/lyric/24660518/Gettin%27+It+Started,"[hey, boo, wassup?, you lookin' good, thank yo...","[hey, boo, wassup?, you lookin' good, thank yo...","[heɪ buː wɒsʌp, juː lʊk.ɪn ɡʊd, θæŋk juː wɒts ..."
2,/lyric/5564461/Hot+in+Herre,"[hot in, so hot in here! so hot in, hot, oh!, ...","[hot in, so hot in here! so hot in, hot, oh!, ...","[hɒt ɪn səʊ hɒt ɪn hɪər səʊ hɒt ɪn hɒt əʊ, dʒʌ..."
3,/lyric/5564462/Dem+Boyz,"[like, oh, better get them back, watch them ni...","[like, oh, better get them back, watch them ni...","[laɪk əʊ, bet.ər ɡet ðem bæk, wɒtʃ ðem nɪɡ.ə b..."
4,/lyric/5564463/Oh+Nelly,"[here we go again, real smooth, yeah, here we ...","[here we go again, real smooth, yeah, here we ...","[hɪər wiː ɡəʊ əɡen, rɪəl smuːð jeə, hɪər wiː ɡ..."


In [69]:
df.phoneme_lyrics[100]

['ɪt wɒz ɔːl ə driːm',
 'lɑːst naɪt aɪ hæd ə driːm',
 'θɔːt.s wɒz reɪ.sɪŋ θruː maɪ hed',
 'ɪt wɒz ɔːl ə driːm',
 'felt səʊ rɪəl tuː miː',
 'ðɪs ɪz wɒt wɒz sed',
 'hæd ə driːm aɪ sed baʊt huː hiː sed',
 'baʊt bɪɡ aɪ sed ðæts bɪɡ hiː sed',
 'dɪɡ wɒt hiː sed',
 'prəʊ.siːd hiː sed',
 'ɪndiːd aɪ sed səʊ briːð aɪ dɪd',
 'aɪ siː aɪ sed dʒel.ə.si aɪ sed',
 'ɡɒt ðiː həʊl ɪn.də.stri mæd æt miː aɪ sed',
 'ðen baɪ sed eɪtʃ.əʊviː rɪmaɪnd jɔːself',
 'nəʊ.bə.di bɪlt laɪk juː juː dɪzaɪnd jɔːself',
 'aɪ əɡriː aɪ sed maɪ wʌn əv ə kaɪnd self',
 'ɡet stəʊnd ev.ri deɪ laɪk dʒiː.əs dɪd',
 'wɒt hiː sed aɪ sed hæz biːn sed bɪfɔːr',
 'dʒʌst kiːp duː.ɪŋ jɔːr θɪŋ hiː sed seɪ nəʊ mɔːr',
 'wɒz ɪt ɔːl ə driːm',
 'wɒz ɪt ɔːl ə driːm wɒz ɪt ɔːl ə driːm',
 'wɒz ɪt ɔːl ə driːm',
 'wɒz ɪt ɔːl ə driːm wɒz ɪt ɔːl ə driːm',
 'sɒlt.p.pə ænd hev.i di ʌp ɪn ðiː lɪm.əziːn',
 'hæŋ.ɪŋ pɪk.tʃər ɒn maɪ wɔːl',
 'aɪ let maɪ teɪp rɒk tɪl maɪ teɪp pɒpt',
 'wɪð ðiː hæt tuː mætʃ',
 'rɪmem.bər ræp.ɪn dʒuːk dʌ hɑː dʌ hɑː',
 'naʊ aɪem ɪn ð