# Divina Commedia Generation

In [2]:
import os
import sys
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import numpy as np
import tensorflow as tf
tf.get_logger().setLevel('ERROR')

from dante_by_tonedrev_syl.data_preparation import text_in_rev_syls, text_in_syls_rhyme
from dante_by_tonedrev_syl.text_processing import clean_comedy, prettify_text, special_tokens
from dante_by_tonedrev_syl.generate_dante import generate_text
from utils import save_vocab, load_vocab, save_syls_list, load_syls_list

working_dir = os.path.join(os.path.abspath('.'), 'dante_by_tonedrev_syl')


Loading the rhyme and verse vocabularies 

In [20]:
logs_dir = os.path.join(working_dir, 'logs')
os.makedirs(logs_dir, exist_ok = True) 
vocab_file_rhyme = os.path.join(logs_dir, 'vocab_rhyme.json')
vocab_file_verse = os.path.join(logs_dir,  'vocab_verse.json')

vocab_rhyme, idx2syl_rhyme, syl2idx_rhyme = load_vocab(vocab_file_rhyme)
vocab_verse, idx2syl_verse, syl2idx_verse = load_vocab(vocab_file_verse)

vocab_size_rhyme = len(vocab_rhyme)
vocab_size_verse = len(vocab_verse)


Loading the rhyme and verse models already trained 

In [21]:
models_dir = os.path.join(working_dir, 'models')
os.makedirs(models_dir, exist_ok = True) 
model_file_verse = os.path.join(models_dir, "dante_by_tonedrev_syl_verse_model.h5")
model_file_rhyme = os.path.join(models_dir, "dante_by_tonedrev_syl_rhyme_model.h5")

model_verse = tf.keras.models.load_model(model_file_verse)
model_rhyme = tf.keras.models.load_model(model_file_rhyme)


Models summary

In [22]:
model_rhyme.summary()
print('\n')
model_verse.summary()

Model: "RhymeNetwork"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 580, 256)          379392    
_________________________________________________________________
last_lstm (LSTM)             (None, 580, 512)          1574912   
_________________________________________________________________
output (Dense)               (None, 580, 1482)         760266    
Total params: 2,714,570
Trainable params: 2,714,570
Non-trainable params: 0
_________________________________________________________________


Model: "VerseNetwork"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 100, 256)          1499904   
_________________________________________________________________
last_lstm (LSTM)             (None, 100, 1024)         5246976   
____________________

In [23]:
tf.keras.utils.plot_model(model_rhyme, show_shapes=True, show_layer_names=True)

('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')


In [24]:
tf.keras.utils.plot_model(model_verse, show_shapes=True, show_layer_names=True)

('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')


Loading sequence of rhyme to build the start sequence of rhyme

In [25]:
SEQ_LENGTH_RHYME = model_rhyme.get_layer('embedding').output.shape[1]


text_in_syls_rhyme_file = os.path.join(working_dir, 'text_in_syls_rhyme.json')

if os.path.isfile(text_in_syls_rhyme_file):
    syls_rhyme_list = load_syls_list(text_in_syls_rhyme_file)
else:
    syls_rhyme_list = text_in_syls_rhyme(divine_comedy)
    save_syls_list(syls_rhyme_list, text_in_syls_rhyme_file)

# syls_rhyme_list = text_in_syls_rhyme(divine_comedy_r)

#index_eoc = syls_rhyme_list.index(special_tokens['END_OF_CANTO']) + 1
indexes = [i for i, x in enumerate(syls_rhyme_list) if x == special_tokens['END_OF_CANTO'] and i > SEQ_LENGTH_RHYME]
index_eoc = np.random.choice(indexes) + 1
start_idx = max(0, index_eoc - SEQ_LENGTH_RHYME)
start_seq_rhyme = syls_rhyme_list[start_idx:index_eoc]

Loading sequence of verse to build the start sequence of verse

In [26]:
SEQ_LENGTH_VERSE = model_verse.get_layer('embedding').output.shape[1]


text_in_syls_verse_file = os.path.join(working_dir, 'text_in_syls_verse.json')

if os.path.isfile(text_in_syls_verse_file):
    syls_verse_list = load_syls_list(text_in_syls_verse_file)
else:
    syls_verse_list = text_in_rev_syls(divine_comedy)
    save_syls_list(syls_verse_list, text_in_syls_verse_file)

# syls_verse_list = text_in_rev_syls(divine_comedy_v)
indexes = [i for i, x in enumerate(syls_verse_list) if x == special_tokens['END_OF_VERSO'] and i > SEQ_LENGTH_VERSE]
index_eov = np.random.choice(indexes)
start_idx = max(0, index_eov - SEQ_LENGTH_VERSE)
start_seq_verse = syls_verse_list[start_idx:index_eov]

Generating the text

In [29]:
generated_text, generated_text_no_tone = generate_text(model_rhyme, model_verse, special_tokens, vocab_size_rhyme, vocab_size_verse, syl2idx_rhyme, idx2syl_rhyme, syl2idx_verse, idx2syl_verse, SEQ_LENGTH_RHYME, SEQ_LENGTH_VERSE, start_seq_rhyme, start_seq_verse, temperature=1.0)

CANTO
e ròtti più e quèsta compagnìa
tal pàrte tùtta 'l presènte dòpo
che tu vedrài dinànzi a la sùa vìa

da quèlla o quèlla nùvola d'ìsopo
potrète su per la presènte rìssa
ma l'àltro che per quèsto vòlto tòpo

che più non si parèggia 'mo' e 'ìssa'
ma prìa che dicèa assài ben s'accòppia
ne l'àltra trìsta giustamènte fìssa

ed èi non hài tu vòi siète e ciò ch'el scòppia
in te magnificènza in te pòi
che purgatòrio torstàto non ti dòppia

or ti facëàn nòi più vèrso nòi
sòno schernìti con dànno e con bèffa
stàva a udìr turbàrsi e làscia lor nòi

la pòrta sòvra 'l mal volèr s'agguèffa
u' la natùra del mòndo fedèli
ciascùn da l'àltra lièvre ch'èlli àcceffa'

tu vèli m'hànno del cièlo àncora pèli
ciò che l'aspètto in sé àvea contènto
quèsti òrdini non èbber li facèli

e salìr se non può 'l càlor convènto
pòco còlpo per lo mòndo il diètro
per lo nòvo avarìzia fòsse sènto

dòve nòta da bercercàr per vètro
turbàto un pòco d'òra e tra ' rèi
ma tu perché da quèlla dèntro 'mpètro

quàndo cominciò a

KeyboardInterrupt: 

Model Evaluation

In [37]:
generated_canto_file = os.path.join('.', "generated_cantos", "generated_canto_tonedrev_model.txt")

In [40]:
#import
import os
import sys
sys.path.append(os.path.join('.', 'other_metrics') )
import other_metrics.metrics as om
from other_metrics.ngrams_plagiarism import ngrams_plagiarism

import our_metrics.metrics as m
from dante_by_tonedrev_syl.text_processing import clean_comedy, prettify_text, special_tokens, remove_punctuation

In [41]:
#divine comedy
divine_comedy_file = os.path.join('.', "divina_commedia", "divina_commedia_accent_UTF-8.txt") 
with open(generated_canto_file,"r", encoding='utf-8') as f:
    generated_canto = f.read()

with open(divine_comedy_file,"r", encoding='utf-8') as f:
    divine_comedy = f.read()

divine_comedy = clean_comedy(divine_comedy, special_tokens)
divine_comedy = prettify_text(divine_comedy, special_tokens)
divine_comedy = remove_punctuation(divine_comedy)

evaluation_results = {}

In [42]:
def evaluate_other_metrics(generated_canto, divine_comedy):

    generated_canto_list = generated_canto.split("\n")
    generated_canto_list = [line.strip() for line in generated_canto_list if line != 'CANTO']
    generated_canto = "\n".join(generated_canto_list)

    divine_comedy_list = divine_comedy.split("\n")
    divine_comedy_list = [line.strip() for line in divine_comedy_list if line != 'CANTO']
    divine_comedy = "\n".join(divine_comedy_list)


    evaluation_results = {}
    evaluation_results = om.eval(generated_canto, verbose=False, synalepha=True, permissive=False, rhyme_threshold=1.0)

    ngrams_plagiarism_score = ngrams_plagiarism(generated_canto, divine_comedy, n=4)

    evaluation_results['Plagiarism'] =  ngrams_plagiarism_score

    return evaluation_results
def evaluate_our_metrics(generated_canto, divine_comedy):

    generated_canto_list = generated_canto.split("\n")
    generated_canto_list = [line.strip() for line in generated_canto_list if line != 'CANTO']
    generated_canto = "\n".join(generated_canto_list)

    divine_comedy_list = divine_comedy.split("\n")
    divine_comedy_list = [line.strip() for line in divine_comedy_list if line != 'CANTO']
    divine_comedy = "\n".join(divine_comedy_list)

    evaluation_results = {}

    evaluation_results = m.eval(generated_canto, synalepha=True)

    return evaluation_results

In [44]:
print(generated_canto)

CANTO
sempre l'amor che son fravati santo'
e forse vedi il lume di paradiso
allora il cor secondo predicanto

e l'altra non pur lo suo proprio riso
che fece me e più e per quelbrezza
sì che veggiate il duca mio avviso

colui ch'a lei tanta allegrezza
ma costui puose il mondo in tanta pace
pur o gente sicura ricchezza

per lo dolce che lì riguardia face
ma nel mondo del martiro che divenne
agulando prima e poi con più vivace

e simigliante poi a dir li venne
che dicëa alcuna cosa e marte
sì che vo far morrà del monte penne

a la poeta che 'n osa onde si parte
con una voce detto vato coro
veduto hai figlio e se' venuto in parte

però de vero in tristi di coloro
però a la rimasa dietro ond' io
e che ne la speranza di costoro

sì volsi li occhi a li occhi segnor mio
sì che come fa l'uom che tanto vaca
e dolcezza di dì l'andare a dio

dallato m'era solo il mio cloaca
ch'emisperio de l'universo
fatto m'hai voto poi non agnel si placa

e vinse in camlo seder de l'universo
da tre e a noiga va 

In [43]:
## Evaluation other metrics on generated canto ### 
print('\nEVALUATING OTHER METRICS ON GENERATED CANTO...')
evaluation_results = evaluate_other_metrics(generated_canto, divine_comedy)

print('\nOTHER METRICS ON GENERATED CANTO:')
for k, v in evaluation_results.items():
    print('{}: {}'.format(k, v)) 

##################################################

### Evaluation our metrics on generated canto ####
print('\nEVALUATING OUR METRICS ON GENERATED CANTO...')
evaluation_results = evaluate_our_metrics(generated_canto, divine_comedy)

print('\nOUR METRICS ON GENERATED CANTO:')
for k, v in evaluation_results.items():
    print('{}: {}'.format(k, v))

##################################################


EVALUATING OTHER METRICS ON GENERATED CANTO...

OTHER METRICS ON GENERATED CANTO:
Number of putative terzine: 89
Number of well formed terzine: 89
Average structuredness: 0.9972144846796658
Average hendecasyllabicness: 0.9311797752808986
Average rhymeness: 0.9469696969696971
Plagiarism: 0.858359957401491

EVALUATING OUR METRICS ON GENERATED CANTO...

OUR METRICS ON GENERATED CANTO:
Number of verses: 268
Number of strophes: 90
Number of well formed terzine: 89
Last single verse: True
Average syllables per verse: 11.10 ± 0.63
Hendecasyllables by tone: 0.7164
Number of rhymes: 0
