In [1]:
# This notebook was adapted from kgero's work on the fastai IMDb example:
# https://github.com/kgero/style-gen
from fastai_old.text import *
import html
import spacy 

spacy.load('en')

<spacy.lang.en.English at 0x7fdb453a0160>

# Initialization
These sections initialize the learners and and dictionary mappings nesseccary to generate lyrics from the trained models. This is similar to the initialization for training, but skips many steps which are either unneccessary for using the model or whose results have been saved and can simply be loaded back

In [2]:
# These values are used in loading the models
num_verses = 3
verses = [0] * num_verses
chorus = ''

#This determines which of the three trained models the notebook will use to generate lyrics
#The options are 'large_rap', 'small_rap', or 'small_country'
model = 'large_rap' 

In [3]:
PATH=Path('data/')
LM_PATH=Path('data/model_lm/')
LM_PATH.mkdir(exist_ok=True)

In [4]:
if model = 'large_rap':
    itosv = pickle.load(open(LM_PATH/'tmp'/'itos-verse-rap-verylarge.pkl', 'rb'))
    itosc = pickle.load(open(LM_PATH/'tmp'/'itos-chorus-rap-verylarge.pkl', 'rb'))
if model = 'small_rap':
    itosv = pickle.load(open(LM_PATH/'tmp'/'itos-large-verse.pkl', 'rb'))
    itosc = pickle.load(open(LM_PATH/'tmp'/'itos-large-chorus.pkl', 'rb'))
if model = 'small_country':
    itosv = pickle.load(open(LM_PATH/'tmp'/'itos-verse-country.pkl', 'rb'))
    itosc = pickle.load(open(LM_PATH/'tmp'/'itos-chorus-country.pkl', 'rb'))

stoiv = collections.defaultdict(lambda:0, {v:k for k,v in enumerate(itosv)})
stoic = collections.defaultdict(lambda:0, {v:k for k,v in enumerate(itosc)})

vsv=len(itosv)
vsc=len(itosc)
vsv, vsc

(27366, 6762)

In [5]:
# these sets are kept empty since the model doesn't need to be trained
trn_lm = np.empty([10,2])
val_lm = np.empty([10,2])

In [6]:
#This bloack creates the models
em_sz,nh,nl = 400,1150,3
drops = np.array([0.25, 0.1, 0.2, 0.02, 0.15])*0.7
wd=1e-7
bptt=70
bs=52
opt_fn = partial(optim.Adam, betas=(0.8, 0.99))
trn_dl = LanguageModelLoader(np.concatenate(trn_lm), bs, bptt)
val_dl = LanguageModelLoader(np.concatenate(val_lm), bs, bptt)
mdv = LanguageModelData(PATH, 1, vsv, trn_dl, val_dl, bs=bs, bptt=bptt)
mdc = LanguageModelData(PATH, 1, vsc, trn_dl, val_dl, bs=bs, bptt=bptt)

DATA: 0 70
DATA: 0 70


In [7]:
# And finally creates the learners
learnerv= mdv.get_model(opt_fn, em_sz, nh, nl, 
    dropouti=drops[0], dropout=drops[1], wdrop=drops[2], dropoute=drops[3], dropouth=drops[4])

learnerc= mdc.get_model(opt_fn, em_sz, nh, nl, 
    dropouti=drops[0], dropout=drops[1], wdrop=drops[2], dropoute=drops[3], dropouth=drops[4])


INIT <fastai_old.text.LanguageModelData object at 0x7fdac8876470>
INIT <fastai_old.text.LanguageModelData object at 0x7fdac88764e0>


# Generation Funcitons
These are the two functions for generating lyrics from the models. These are nesseccary to take the predicitons from the model and turn them into a useable block of text. There is a separate function for verses and choruses as they have different styles which we are trying to replicate. The difference between these functions is better discussed in the report

In [8]:
def generate_text_verse(m, s, itos, stoi, l=20):
    m[0].bs=1  # Set batch size to 1
    m.eval()  # Turn off dropout
    m.reset()  # Reset hidden state
    m[0].bs=bs  # Put the batch size back to what it was

    ss = s.lower().split()
    si = [stoi[w] for w in ss]
    t = torch.autograd.Variable(torch.cuda.LongTensor(np.array([si])))
    
    res,*_ = m(t)
    
    output = s + ' '
    count = 0
    while True:
        n = torch.multinomial(res[-1].exp(), 10)  # drawing from probability distribution
        n = n[1] if n.data[0]==0 else n[0]
        if itos[int(n)] == '\n' and count > l:
            output += "\n"
            break
        if not any (x in itos[int(n)] for x in ['xbos', 'xfld']):
            output += itos[int(n)] + ' '
        res,*_ = m(n.unsqueeze(0).unsqueeze(0))  # sometimes need an extra .unsqueeze(0)
        count += 1
    return output

def generate_text_chorus(m, s, itos, stoi, l=20):
    m[0].bs=1  # Set batch size to 1
    m.eval()  # Turn off dropout
    m.reset()  # Reset hidden state
    m[0].bs=bs  # Put the batch size back to what it was

    ss = s.lower().split()
    si = [stoi[w] for w in ss]
    t = torch.autograd.Variable(torch.cuda.LongTensor(np.array([si])))
    
    res,*_ = m(t)
    
    output = s + ' '
    count = 0
    while True:
        p = np.random.choose([0,1], p=[0.1, 0.9])
        if p ==1:
            n = torch.multinomial(res[-1].exp(), 10)  # drawing from probability distribution
        else:
            n = res[-1].topk(5)[1]  # top word
        n = n[1] if n.data[0]==0 else n[0]
        if itos[int(n)] == '\n' and count > l:
            output += "\n"
            break
        if not any (x in itos[int(n)] for x in ['xbos', 'xfld']):
            output += itos[int(n)] + ' '
        res,*_ = m(n.unsqueeze(0).unsqueeze(0))  # sometimes need an extra .unsqueeze(0)
        count += 1
    return output

# Generation
Here is where the lyrics are actually generated. To make better use of the available time only verse and chorus models were trained, so a final implementation would be more robust and complete. This cell is set up to run any of the three models trained for the 

In [9]:
if model == 'large_rap':
    learnerv.load('lm_30epochs-verse-rap-verylarge')
    learnerc.load('lm_30epochs-chorus-rap-verylarge')
if model == 'small_rap':
    learnerv.load('lm_30epochs-large-verse-set')
    learnerc.load('lm_30epochs-large-chorus')
if model == 'small_country':
    learnerv.load('lm_30epochs-verse-country')
    learnerc.load('lm_30epochs-chorus-country')

#These strings are the seeds for the verses
verse_strings = ["this is the way I", "somedays the paint", "which way does the road"]

#This is the seed string for the chorus
chorus_string = "these days they ca n't"

mv=learnerv.model
for i in range(num_verses):
    verses[i] = generate_text_verse(mv, verse_strings[i], itosv, stoiv, l=150)

mc=learnerc.model
chorus = generate_text_verse(mc, chorus_string, itosc, stoic, l=100)

In [14]:
# This function un-does 
def clean_output(x):
    return x.replace(" n't", "n't").replace(" ' til", "@'til").replace(" ' cause", "@'cause").replace(
        " '", "'").replace("@'til", " 'til").replace("@'cause", " 'cause").replace(" ,", ",").replace("1 ", "")

In [15]:
print('[Intro]')
for i in range(num_verses):
    print('\n[Verse ' + str(i + 1) + ']')
    print(clean_output(verses[i]))
    print('\n[Chorus]')
    print(clean_output(chorus))

[Intro]

[Verse 1]
this is the way I ’m makaveli, met a bad one patiently 
 hit the bar with one mack, then send my nigga next to me, and go head 
 so i abide, to remember the guys who was fly 
 take a phone or two, to the crib i saw you for a moment 
 put the balls to the lip, one time for the kilo 
 pop a pill, hit the crib, now you on my snack elijah 
 what's the fuck the soul of the killer ? 
 since they don't hear me, i'm seekin' another midnight 
 what's the problem girl ? don't plan 'til you get it 
 rip your head off with the thinking of the sun 
 plottin' on them friday night nights, we'll sleep late 
 t.o.p, nobody knows if we were 


[Chorus]
these days they can't change bringing somethin' hot to you 
 the na - th - th - this is the rugged shit 
 t_up aids is gold, t_up waiters is platinum 
 sit down, ask them where you get your facts from 
 five hundred thousand niggas in the hood with it 
 and a million more niggas is gettin ready to get it 
  t_up krs is to live it up in 