In this notebook there are some examples of how to perform poetry generation with our corpus.

First of all, let us execute another notebook, in order to have access to some variables.

In [None]:
import io
from IPython.nbformat import current

def execute_notebook(nbfile):
    
    with io.open(nbfile) as f:
        nb = current.read(f, 'json')
    
    ip = get_ipython()
    
    for cell in nb.worksheets[0].cells:
        if cell.cell_type != 'code':
            continue
        ip.run_cell(cell.input)

Let us load the configuration code that defines variables and functions specific to our application.

In [None]:
execute_notebook("Get_started.ipynb")

Remove the sentences that cannot rhyme.

In [None]:
Pv = [extract_verses(document) for document in Dv]
noRhymingVerses = Poetry.noRhymingSentences(Pv)
possibleRhymes = General.substractList(Pv, noRhymingVerses)

Compute the number of rhyming partitions of the set of verses with a minimum of elements.

In [None]:
# noRhymeSentences, noRhymeLastWords, cleanPartitionIndices, cleanPartitionSentences = Poetry.analyzeProspectiveRhymes(possibleRhymes)
noRhymeSentences, noRhymeLastWords, cleanPartitionIndices, cleanPartitionSentences = Poetry.analyzeProspectiveRhymes(Pv)
validPartitionSentences = Poetry.possiblePartitions(cleanPartitionSentences, RP)
# print validPartitionSentences

Create a semantic model.

In [None]:
# customized values
number_topics = 100
filtered_words = ['dut', 'ni', 'zu', 'da', 'du', 'dute', 'zen', 'ere', 'gu', 'dugu', 'ez', 'bat', 'hori', 'hor', 'dira', 
            'baina', 'bi', 'zi', 'zut', 'zituzten', 'atzo', 'beste', 'dela']
no_below = 5
no_above = 0.2

# semantic model creation
dictionary, corpus, tfidfModel, lsiModel = NLP.semanticsExtractor(lemmatizedDs, number_topics, filtered_words, no_below, no_above)

## Poetry generation

Find the *n* verses more similar to a theme *t*, and name *v* the most similar one.

In [None]:
fileSim = NLP.getSimilarityMatrix(lemmatizedDv, dictionary, tfidfModel, lsiModel)
t = 'guraso' # the semantic similarity of the verses will be computed against this theme
simsWithModel = NLP.simsFromSentence(NLP.lemmatizeString(t), dictionary, lsiModel, fileSim)
n = -1 # number of the best sentences returned
bestIndexes, bestValues, bestSentencesLemmatized, bestSentencesOriginal = NLP.getIndexesAndSentencesFromSimsValues(simsWithModel, lemmatizedDv_filename, Dv_filename, n)
similarToTheme = sorted(zip(bestSentencesOriginal, bestValues), key=lambda pair: pair[1], reverse = True)
print similarToTheme
v = similarToTheme[0][0]
print "More similar verse: " + v

Find the verses more similar to *t* that also rhyme with *v*

In [None]:
rhymingSentences = Poetry.getEquivalenceClassForVerse(v, validPartitionSentences)
print rhymingSentences

In [None]:
aux = NLP.lemmatizeListOfSentences([t])
lemmatized_t = ' '.join(aux)
similaritiesWithT = NLP.similarityFromSentenceToSetSentences(lemmatized_t, rhymingSentences, NLP.cosineSimilarityBetweenTwoSentences, dictionary, tfidfModel, lsiModel)
results = sorted(zip(rhymingSentences, similaritiesWithT), key=lambda pair: pair[1], reverse = True)
results[0:4]

Build the stanza

In [None]:
stanza, stanza_fitness, other = Poetry.getBestStanza(v, results)
print stanza, stanza_fitness

Construction of the best stanzas taking into account the *numVerses* best verses.

In [None]:
numVerses = 10
best_stanza_fitness = -1000
best_stanza_list = []
best_stanza = []
for j in range(numVerses):
    v = similarToTheme[j][0]
    rhymingSentences = Poetry.getEquivalenceClassForVerse(v, validPartitionSentences)
    aux = NLP.lemmatizeListOfSentences([t])
    lemmatized_t = ' '.join(aux)
    similaritiesWithT = NLP.similarityFromSentenceToSetSentences(lemmatized_t, rhymingSentences, NLP.cosineSimilarityBetweenTwoSentences, dictionary, tfidfModel, lsiModel)
    results = sorted(zip(rhymingSentences, similaritiesWithT), key=lambda pair: pair[1], reverse = True)
    stanza, stanza_fitness, other = Poetry.getBestStanza(v, results)
    if stanza_fitness > best_stanza_fitness:
        best_stanza_fitness = stanza_fitness
        best_stanza = stanza
    best_stanza_list.append((stanza, stanza_fitness))
print best_stanza
print best_stanza_fitness
first = [elem[0] for elem in best_stanza_list]
second = [elem[1] for elem in best_stanza_list]
print sorted(zip(first, second), key=lambda pair: pair[1], reverse = True)