# Analysis of Model Weights

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
import requests
import pickle
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime
from src.nlp.generate_lyrics import DeepLyric

## 4.2-LM-108k-lines-genre-song_title

### Load Model

In [358]:
MODEL = '4.2-LM-108k-lines-genre-song_title'

In [5]:
model_url = f'https://storage.googleapis.com/w210-capstone/models/{MODEL}_architecture.pkl'
model = requests.get(model_url)
model = model.content
model = pickle.loads(model)

In [6]:
itos_url = f'https://storage.googleapis.com/w210-capstone/models/{MODEL}_itos.pkl'
itos = requests.get(itos_url)
itos = itos.content
itos = pickle.loads(itos)

#### Stop Words

In [13]:
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /home/syang/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [315]:
stop_words = set(stopwords.words('english')) 
stop_words.add("'t")
stop_words.add("'ll")
stop_words.add("’t")
stop_words.add("'ve")
stop_words.add(",")
stop_words.add("'s")
stop_words.add("'re")
stop_words.add("'m")
stop_words.add("don")
stop_words.add("won")
stop_words.add("xbol")
stop_words.add("xbos")
stop_words.add("xeol")
stop_words.add("xeos")
stop_words.add("xgenre")
stop_words.add("xtitle")
for i in range(1,100):
    stop_words.add(f"xbol-{i}")
stop_words.add("[verse-1]")
stop_words.add("[intro]")
stop_words.add("(album")

In [316]:
deep_lyric = DeepLyric(model, itos=itos, model_type='language')

### Next Word Distribution

Genre #1

In [334]:
next_word_probs = deep_lyric.get_predicted_probs(seed_text='xbos xgenre oldies xtitle', 
                         verbose=0, context_length=50, beam_width=1000,
                         max_len=5, top_k=100, temperature=1., multinomial=False,
                         GPU=True)

next_word_probs = [s for s in next_word_probs if s[1] not in stop_words]

df = pd.DataFrame(next_word_probs, columns=['context', 'next_word', 'probability'])
df.shape

(360300, 3)

In [335]:
top_words_1 = df.groupby('next_word').sum().sort_values(by='probability', ascending=False)
top_words_1.head(10)

Unnamed: 0_level_0,probability
next_word,Unnamed: 1_level_1
love,8.063945
one,5.30319
baby,4.166161
know,3.346304
darling,2.76238
go,2.725564
little,2.284754
gone,2.283052
girl,2.057671
want,2.032918


Genre #2

In [355]:
next_word_probs = deep_lyric.get_predicted_probs(seed_text='xbos xgenre death metal xtitle', 
                         verbose=0, context_length=50, beam_width=1000,
                         max_len=5, top_k=100, temperature=1., multinomial=False,
                         GPU=True)

next_word_probs = [s for s in next_word_probs if s[1] not in stop_words]

df = pd.DataFrame(next_word_probs, columns=['context', 'next_word', 'probability'])
df.shape

(369033, 3)

In [356]:
top_words_2 = df.groupby('next_word').sum().sort_values(by='probability', ascending=False)
top_words_2.head(10)

Unnamed: 0_level_0,probability
next_word,Unnamed: 1_level_1
death,4.420793
world,4.418909
black,3.259346
dead,2.960679
night,1.971365
war,1.816822
evil,1.739159
hell,1.69834
life,1.67441
end,1.638543


In [361]:
df_combined = pd.merge(top_words_1, top_words_2, how='outer', left_index=True, right_index=True)
# df_combined['rel_ratio_abs'] = (df_combined['probability_x'] / df_combined['probability_y'])
df_combined['diff'] = df_combined['probability_x'] - df_combined['probability_y']
df_combined['diff_abs'] = np.abs(df_combined['diff'])
df_combined['rel_diff_abs'] = df_combined['diff_abs'] / df_combined['probability_x']
df_combined.sort_values(by='rel_diff_abs', ascending=False)\
    .loc[df_combined['diff'] > 0, :]

Unnamed: 0_level_0,probability_x,probability_y,diff,diff_abs,rel_diff_abs
next_word,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
marie,0.026983,2.334066e-06,2.698024e-02,2.698024e-02,0.999913
downtown,0.042128,1.420305e-05,4.211331e-02,4.211331e-02,0.999663
babe,0.130177,6.084398e-05,1.301164e-01,1.301164e-01,0.999533
tennessee,0.014239,7.910537e-06,1.423126e-02,1.423126e-02,0.999444
georgia,0.013158,8.511978e-06,1.314957e-02,1.314957e-02,0.999353
gal,0.053483,3.694148e-05,5.344582e-02,5.344582e-02,0.999309
darling,2.762380,2.028313e-03,2.760352e+00,2.760352e+00,0.999266
ooo,0.004086,3.117769e-06,4.082936e-03,4.082936e-03,0.999237
doo,0.004206,3.255128e-06,4.202258e-03,4.202258e-03,0.999226
dancin,0.046226,3.604081e-05,4.618946e-02,4.618946e-02,0.999220


## 4.2-MM-108k-post

### Load Model

In [369]:
MODEL = '4.2-MM-108k-post'

In [370]:
model_url = f'https://storage.googleapis.com/w210-capstone/models/{MODEL}_architecture.pkl'
model = requests.get(model_url)
model = model.content
model = pickle.loads(model)

In [371]:
itos_url = f'https://storage.googleapis.com/w210-capstone/models/{MODEL}_itos.pkl'
itos = requests.get(itos_url)
itos = itos.content
itos = pickle.loads(itos)

In [372]:
deep_lyric = DeepLyric(model, itos=itos, model_type='multimodal')

In [408]:
genre_ohe = np.zeros((1, 24))
genre_ohe[0][23] = 1

In [416]:
deep_lyric.generate_text(seed_text=['xbos', 'xbol-1'], 
                         verbose=0, context_length=100, beam_width=3,
                         max_len=100, top_k=5, temperature=1.45,
                         GPU=True, audio=genre_ohe)

In [417]:
deep_lyric.print_lyrics()

SONG START
 

xbol-1 i 'd hate to think it wouldn 't be like this 
 i wouldn 't lie to you like that 
 shouldn 't i love you like that ? 
 i don 't care , i don 't care 
 if that isn 't worth it 
 i don 't care 
 if this isn 't love 
 why wouldn 't she care ? 
 
 if that isn 't love 
 what would it be 
 if that isn 't love ? 
 if that isn 't 

### Next Word Distribution

Genre #1

In [423]:
genre_ohe = np.zeros((1, 24))
genre_ohe[0][23] = 1

next_word_probs = deep_lyric.get_predicted_probs(seed_text=['xbos', 'xbol-1'], 
                         verbose=0, context_length=50, beam_width=1000,
                         max_len=5, top_k=100, temperature=1., multinomial=False,
                         GPU=True, audio=genre_ohe)

next_word_probs = [s for s in next_word_probs if s[1] not in stop_words]

df = pd.DataFrame(next_word_probs, columns=['context', 'next_word', 'probability'])
df.shape

(359120, 3)

In [424]:
top_words_1 = df.groupby('next_word').sum().sort_values(by='probability', ascending=False)
top_words_1.head(10)

Unnamed: 0_level_0,probability
next_word,Unnamed: 1_level_1
know,3.16233
see,2.68137
time,2.413892
morning,2.248662
love,2.153535
got,1.964642
go,1.894376
tell,1.893202
long,1.893187
little,1.81001


Genre #2

In [425]:
genre_ohe = np.zeros((1, 24))
genre_ohe[0][16] = 1

next_word_probs = deep_lyric.get_predicted_probs(seed_text=['xbos', 'xbol-1'], 
                         verbose=0, context_length=50, beam_width=1000,
                         max_len=5, top_k=100, temperature=1., multinomial=False,
                         GPU=True, audio=genre_ohe)

next_word_probs = [s for s in next_word_probs if s[1] not in stop_words]

df = pd.DataFrame(next_word_probs, columns=['context', 'next_word', 'probability'])
df.shape

(360021, 3)

In [426]:
top_words_2 = df.groupby('next_word').sum().sort_values(by='probability', ascending=False)
top_words_2.head(10)

Unnamed: 0_level_0,probability
next_word,Unnamed: 1_level_1
know,3.286165
got,3.216332
yeah,2.946051
love,2.507336
like,2.263979
time,2.184132
de,2.082353
one,1.711553
hey,1.646097
see,1.624662


In [428]:
df_combined = pd.merge(top_words_1, top_words_2, how='outer', left_index=True, right_index=True)
# df_combined['rel_ratio_abs'] = (df_combined['probability_x'] / df_combined['probability_y'])
df_combined['diff'] = df_combined['probability_x'] - df_combined['probability_y']
df_combined['diff_abs'] = np.abs(df_combined['diff'])
df_combined['rel_diff_abs'] = df_combined['diff_abs'] / df_combined['probability_x']
df_combined.sort_values(by='rel_diff_abs', ascending=False)\
    .loc[df_combined['diff'] < 0, :]

Unnamed: 0_level_0,probability_x,probability_y,diff,diff_abs,rel_diff_abs
next_word,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
xbol-125,4.939588e-08,0.000285,-2.851689e-04,2.851689e-04,5773.131404
chanson,6.655138e-04,1.005610,-1.004944e+00,1.004944e+00,1510.027747
bist,8.444942e-05,0.100078,-9.999354e-02,9.999354e-02,1184.064318
drummer,8.642363e-06,0.009497,-9.488718e-03,9.488718e-03,1097.931069
urge,1.205424e-06,0.000988,-9.867229e-04,9.867229e-04,818.569072
[estribillo],1.965110e-06,0.001437,-1.434811e-03,1.434811e-03,730.142836
swimmin,4.473763e-06,0.002959,-2.954125e-03,2.954125e-03,660.322205
appear,8.972119e-06,0.005726,-5.716728e-03,5.716728e-03,637.165945
desires,4.643415e-06,0.002710,-2.705295e-03,2.705295e-03,582.608857
ou,7.432987e-06,0.003110,-3.102198e-03,3.102198e-03,417.355476
