In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import requests
import pickle
import numpy as np
from pathlib import Path
from datetime import datetime

from src.nlp.generate_lyrics import DeepLyric
from src.nlp.evaluate_lyrics import Evaluator

## Model Loading

The `DeepLyric` object can now be loaded by two different methods.
1. Load the model object and itos object directly
2. Input model `str` and download from cloud storage

### Load Method 1
It's important to input `model_name` explicitly if this method is used

In [3]:
MODEL = '4.2-LM-108k-lines-genre-song_title'

In [4]:
model_url = f'https://storage.googleapis.com/w210-capstone/models/{MODEL}_architecture.pkl'
model = requests.get(model_url)
model = model.content
model = pickle.loads(model)

In [5]:
itos_url = f'https://storage.googleapis.com/w210-capstone/models/{MODEL}_itos.pkl'
itos = requests.get(itos_url)
itos = itos.content
itos = pickle.loads(itos)

In [6]:
deep_lyric = DeepLyric(model, itos=itos, model_type='language', model_name=MODEL)

In [7]:
deep_lyric.config

{'seed_text': 'xbos',
 'max_len': 40,
 'GPU': True,
 'context_length': 30,
 'beam_width': 3,
 'verbose': 0,
 'temperature': 1.5,
 'top_k': 3,
 'audio': None,
 'multinomial': True,
 'genre': None,
 'title': None,
 'model_name': '4.2-LM-108k-lines-genre-song_title',
 'model_type': 'language'}

### Load Method 2

This method is more convenient, but takes a few extra seconds at the beginning to download the models

In [83]:
deep_lyric = DeepLyric(MODEL, model_type='language')

In [84]:
deep_lyric.config

{'seed_text': 'xbos',
 'max_len': 40,
 'GPU': True,
 'context_length': 30,
 'beam_width': 3,
 'verbose': 0,
 'temperature': 1.5,
 'top_k': 3,
 'audio': None,
 'multinomial': True,
 'genre': None,
 'title': None,
 'model_name': '4.2-LM-108k-lines-genre-song_title',
 'model_type': 'language'}

## Changing Parameters, Generating, and Saving
Once the model is loaded, the rest of the functions work identically. There are also two ways to set configs.
1. Pass in a key/value pair
2. Pass in an entire dictionary (this will overwrite all current config)

In [85]:
deep_lyric.set_config('seed_text', 'xbos xgenre')
deep_lyric.set_config('max_len', 80)
deep_lyric.set_config('top_k', 5)
deep_lyric.set_config('context_length', 40)
deep_lyric.set_config('verbose', 0)

In [23]:
deep_lyric.generate_text()

Here is the printed lyrics from the above instance

In [24]:
deep_lyric.print_lyrics()

SONG START
 genre: death metal 
 title: the rotting applause 

xbol-1 pass this task 
 bring your confusion 
 whether you like this 
 drink your meal today 
 
 drag your breasts to your knees 
 or walk all over 

Here's a preview of the json payload. Simply change the parameter `dir` to a directory to save to disk.
File names are UTC timestamps to attempt to be unique

In [11]:
deep_lyric.save_json(out=True)

{'meta': {'seed_text': 'xbos xgenre death metal xtitle',
  'max_len': 40,
  'GPU': True,
  'context_length': 40,
  'beam_width': 3,
  'verbose': 0,
  'temperature': 1.5,
  'top_k': 5,
  'audio': None,
  'multinomial': True,
  'genre': None,
  'title': None,
  'model_name': '4.2-LM-108k-lines-genre-song_title',
  'model_type': 'language'},
 'lyric': ['xbos',
  'xgenre',
  'death',
  'metal',
  'xtitle',
  'and',
  'i',
  'don',
  "'t",
  'think',
  'that',
  'xbol-1',
  'after',
  'all',
  'this',
  'xeol',
  'xbol-2',
  'what',
  'can',
  'i',
  'see',
  'now',
  '?',
  'xeol',
  'xbol-3',
  'it',
  "'s",
  'blind',
  'because',
  'i',
  'xeol',
  'xbol-4',
  'can',
  "'t",
  'find',
  'my',
  'way',
  'back',
  '!',
  'xeol',
  'xeos',
  'xbos',
  'xgenre',
  'nan',
  'xtitle']}

And if we wanted to run a bunch of simulations with the specified parameters, we just do this:

In [None]:
# for i in range(10):
#     deep_lyric.generate_text()
#     deep_lyric.save_json(dir='.')

## Evaluation

The `Evaluator` is an extension of `DeepLyric` that piggy-backs off the config states. We can pass the configured deep lyric object above to do further evals

None of the actual eval functions are hooked up yet, but here is how we'd interface with the framework

In [121]:
# evaluator = Evaluator(deep_lyric, set_lyric_state=False)
deep_lyric.set_config('seed_text', 'xbos xgenre rap')
deep_lyric.set_config('max_len', 60)
evaluator = Evaluator(deep_lyric, set_lyric_state=False)

In [108]:
evaluator.deep_lyric.config

{'seed_text': 'xbos xgenre metal',
 'max_len': 40,
 'GPU': True,
 'context_length': 40,
 'beam_width': 3,
 'verbose': 0,
 'temperature': 1.5,
 'top_k': 5,
 'audio': None,
 'multinomial': True,
 'genre': None,
 'title': None,
 'model_name': '4.2-LM-108k-lines-genre-song_title',
 'model_type': 'language'}

In [109]:
# evaluator.get_lyric()
# evaluator.generated_song

In [122]:
# not yet implemented
evaluator.get_lyric()
evaluator.evaluate(out=True)

The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


{'rhymeDensityAP': 0.5365853658536586,
 'rhymeDensityAV': 0.6585365853658537,
 'rhymeDensityAS': 0.5609756097560976,
 'rhymeDensityEP': 0.5,
 'rhymeDensityEV': 0.5,
 'rhymeDensityES': 0.5,
 'BLEU_1_excl_Unsmoothed': 0.9310627797040228,
 'BLEU_2_excl_Unsmoothed': 0.36334157256742355,
 'BLEU_3_excl_Unsmoothed': 0.023276569492600572,
 'BLEU_4_excl_Unsmoothed': 2.0716834517485276e-308,
 'BLEU_3_cumul_Smoothed': 0.4390031332100231,
 'BLEU_4_cumul_Smoothed': 0.2621987224388713,
 'closestMeters': ['Iambic dimeter', 'Trochaic dimeter'],
 'editsPerLine': 169.0,
 'POS_conformity': 0.8004035714285714}

In [111]:
evaluator.deep_lyric.print_lyrics()

SONG START
 genre: metal 
 title: false fate 

xbol-1 at this point of romance , babe 
 yeah you are where i 've been 
 know i 've got you on my side 
 yeah , look at us now , oh 

In [112]:
evaluator.save_json(out=True)

{'meta': {'seed_text': 'xbos xgenre metal',
  'max_len': 40,
  'GPU': True,
  'context_length': 40,
  'beam_width': 3,
  'verbose': 0,
  'temperature': 1.5,
  'top_k': 5,
  'audio': None,
  'multinomial': True,
  'genre': None,
  'title': None,
  'model_name': '4.2-LM-108k-lines-genre-song_title',
  'model_type': 'language'},
 'lyric': ['xbos',
  'xgenre',
  'metal',
  'xtitle',
  'false',
  'fate',
  'xbol-1',
  'at',
  'this',
  'point',
  'of',
  'romance',
  ',',
  'babe',
  'xeol',
  'xbol-2',
  'yeah',
  'you',
  'are',
  'where',
  'i',
  "'ve",
  'been',
  'xeol',
  'xbol-3',
  'know',
  'i',
  "'ve",
  'got',
  'you',
  'on',
  'my',
  'side',
  'xeol',
  'xbol-4',
  'yeah',
  ',',
  'look',
  'at',
  'us',
  'now',
  ',',
  'oh'],
 'metrics': {'rhymeDensityAP': 0.20689655172413793,
  'rhymeDensityAV': 0.4482758620689655,
  'rhymeDensityAS': 0.4482758620689655,
  'rhymeDensityEP': 0.0,
  'rhymeDensityEV': 0.0,
  'rhymeDensityES': 0.0,
  'BLEU_1_excl_Unsmoothed': 0.966666666666

In [118]:
import collections
def flatten(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, collections.MutableMapping):
            items.extend(flatten(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

### Batch Evaluation

In [12]:
deep_lyric.config

{'seed_text': 'xbos',
 'max_len': 40,
 'GPU': True,
 'context_length': 30,
 'beam_width': 3,
 'verbose': 0,
 'temperature': 1.5,
 'top_k': 3,
 'audio': None,
 'multinomial': True,
 'genre': None,
 'title': None,
 'model_name': '4.2-LM-108k-lines-genre-song_title',
 'model_type': 'language',
 None: None}

In [82]:
import warnings
warnings.filterwarnings("ignore")

import time
now = time.time()

# load model
deep_lyric = DeepLyric(model, itos=itos, model_type='language', model_name=MODEL)

N = 1
DIR = '../data/lyrics/batch-01'
# Set Grid
genre = ['pop', 'indie', 'jazz', 'metal', 'spanish', 'christian', 'rap', 'hip hop', 'rock']
temperature = [1.2, 1.3, 1.4, 1.5, 1.6]
top_k = [3, 6, 9, 12]
beam_width = [3, 6, 9]

# grid generate
for combo in itertools.product(genre, temperature, top_k, beam_width):
    _genre, _temperature, _top_k, _beam_width = combo
    deep_lyric.set_config('genre', _genre)
    deep_lyric.set_config('temperature', _temperature)
    deep_lyric.set_config('top_k', _top_k)
    deep_lyric.set_config('beam_width', _beam_width)

    for _ in range(N):
        evaluator = Evaluator(deep_lyric, set_lyric_state=False)
        evaluator.get_lyric()
        evaluator.evaluate()
        evaluator.save_json(dir=DIR)

print(f'Run Time: {time.time() - now}')

Run Time: 2908.7841925621033


In [89]:
import os, json
import collections
import pandas as pd

def open_json(file):
    if not file.split('/')[-1].startswith('.'):
        with open(file) as f:
            xx = json.load(f)
        return xx
    
def flatten(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, collections.MutableMapping):
            items.extend(flatten(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

batch_list = [open_json(os.path.join(DIR, file)) for file in os.listdir(DIR)]
flattened = [flatten(b) for b in batch_list if b]
df = pd.DataFrame(flattened)
df.metrics_closestMeters.unique

<bound method Series.unique of 0                     [Iambic dimeter, Trochaic dimeter]
1                                                   None
2                     [Iambic dimeter, Trochaic dimeter]
3                     [Iambic dimeter, Trochaic dimeter]
4                     [Iambic dimeter, Trochaic dimeter]
5                     [Iambic dimeter, Trochaic dimeter]
6                     [Iambic dimeter, Trochaic dimeter]
7                     [Iambic dimeter, Trochaic dimeter]
8                     [Iambic dimeter, Trochaic dimeter]
9                     [Iambic dimeter, Trochaic dimeter]
10                    [Iambic dimeter, Trochaic dimeter]
11                    [Iambic dimeter, Trochaic dimeter]
12                    [Iambic dimeter, Trochaic dimeter]
13                    [Iambic dimeter, Trochaic dimeter]
14                    [Iambic dimeter, Trochaic dimeter]
15                    [Iambic dimeter, Trochaic dimeter]
16                    [Iambic dimeter, Trochaic dimeter]


### Store to Cloud...

In [94]:
OUT_STORE = f'gs://w210-capstone/lyrics/{model_name}/'

In [95]:
!gsutil -m -o cp $OUT_PATH/* $OUT_STORE

Copying file://../data/lyrics/4.2-LM-108k-lines-genre-song_title/1543790822-1.0-1.txt [Content-Type=text/plain]...
Copying file://../data/lyrics/4.2-LM-108k-lines-genre-song_title/1543790825-1.0-2.txt [Content-Type=text/plain]...
Copying file://../data/lyrics/4.2-LM-108k-lines-genre-song_title/1543790834-1.0-3.txt [Content-Type=text/plain]...
Copying file://../data/lyrics/4.2-LM-108k-lines-genre-song_title/1543790842-1.0-4.txt [Content-Type=text/plain]...
- [4 files][  2.7 KiB/  2.7 KiB]                                                
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m -o ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying file://../data/lyrics/4.2-LM-108k-lines-genre-song_title/1543790850-1.0-5.txt [Content-Type=text/plain]...
Copying file://../data/lyrics/4.2-LM-108k-lines-genre-song_title/1543790859-1.2-1.txt [Conte

In [68]:
deep_lyric.tokenize("But, soft! what light through yonder window breaks? It is the east, and Juliet is the sun")

['but',
 ',',
 'soft',
 '!',
 'what',
 'light',
 'through',
 'yonder',
 'window',
 'breaks',
 '?',
 'it',
 'is',
 'the',
 'east',
 ',',
 'and',
 'juliet',
 'is',
 'the',
 'sun']