# Divina Commedia Demo 

In order to showcase our project results here a demo to run our main script "generete_dante.py" and see our Canto printed out with all the requirements needed as:
- terzine structure 
- hendecasyllable verse
- rhyme scheme

### DATA PROCESSING
We first want to show how we performed some data processing in order to get our latest canto, for this reason in the following session we will share different types of Divina Commedia text in order to all the phases we processed before to get out CANTO

#### Data cleaning 

In [3]:
import os
from dante_by_tonedrev_syl.text_processing import clean_comedy, special_tokens, prettify_text
from dante_by_tonedrev_syl.tone import ToneTagger
from dante_by_tonedrev_syl.syllabification import syllabify_verse, syllabify_verse_prettify
from dante_by_rev_syl.data_preparation import text_in_rev_syls
from dante_by_rev_syl.data_preparation import text_in_syls_rhyme


working_dir = os.path.join(os.path.abspath('.'), 'dante_by_tonedrev_syl')
divine_comedy_file = os.path.join(os.path.dirname(working_dir), "divina_commedia", "divina_commedia_accent_UTF-8.txt")
working_dir

'/Users/allegraadinolfi/Desktop/UNIBO/DEEP LEARNING/deep_comedy/dante_by_tonedrev_syl'

In [4]:
from dante_by_tonedrev_syl.text_processing import clean_comedy, special_tokens, prettify_text
#reading the original divine comedy 
with open(divine_comedy_file,"r") as f:
    divine_comedy = f.read()
#cleaning the divine comedy with our functions    
divine_comedy = clean_comedy(divine_comedy, special_tokens)
print(divine_comedy[:600])
print("Special tokens used: ", special_tokens.values())

<start_of_canto>
<start_of_terzina>
nel <word_sep> mezzo <word_sep> del <word_sep> cammin <word_sep> di <word_sep> nostra <word_sep> vita <end_of_verso>
mi <word_sep> ritrovai <word_sep> per <word_sep> una <word_sep> selva <word_sep> oscura <end_of_verso>
ché <word_sep> la <word_sep> diritta <word_sep> via <word_sep> era <word_sep> smarrita <end_of_verso>
<end_of_terzina>
<start_of_terzina>
ahi <word_sep> quanto <word_sep> a <word_sep> dir <word_sep> qual <word_sep> era <word_sep> è <word_sep> cosa <word_sep> dura <end_of_verso>
esta <word_sep> selva <word_sep> selvaggia <word_sep> e <word_sep
Special tokens used:  dict_values(['<start_of_canto>', '<end_of_canto>', '<start_of_terzina>', '<end_of_terzina>', '<end_of_verso>', '<word_sep>'])


#### Toned Data

In [5]:
from dante_by_tonedrev_syl.tone import ToneTagger
divine_comedy_prettified = prettify_text(divine_comedy, special_tokens)
#divine_comedy = remove_all_punctuation(divine_comedy)

tone_tagger = ToneTagger()
divine_comedy_words = divine_comedy_prettified.split()[:200] + ['']

for w in divine_comedy_words:
    print(tone_tagger.tone(w), flush=True, end=' ')


Model: "ToneNetwork"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 30, 32)            1088      
_________________________________________________________________
bidirectional (Bidirectional (None, 1024)              2232320   
_________________________________________________________________
output (Dense)               (None, 30)                30750     
Total params: 2,264,158
Trainable params: 2,264,158
Non-trainable params: 0
_________________________________________________________________
cànto nel mèzzo del cammìn di nòstra vìta mi ritrovài per ùna sèlva oscùra ché la dirìtta vìa èra smàrrita àhi quànto a dir quàl èra è còsa dùra èsta sèlva selvàggia e àspra e fòrte che nel pènsier rinòva la paùra tant' è amàra che pòco è più mòrte ma per trattàr del ben ch'i' vi trovài dirò de l'àltre còse ch'i' v'ho scòrte io non so ben ridìr com' i' v'intrài tant' èra p

#### Syllabified Data

In [6]:
from dante_by_tonedrev_syl.syllabification import syllabify_verse, syllabify_verse_prettify
divine_comedy_list = divine_comedy.split("\n")
divine_comedy_list = [ line for line in divine_comedy_list if line.strip() not in special_tokens.values() ]
for line in divine_comedy_list[:15]:
    syllables = syllabify_verse(line, special_tokens, tone_tagger)
    size = len(syllables)
    print(syllables, flush=True)

['nel', '<word_sep>', 'mèz', 'zo', '<word_sep>', 'del', '<word_sep>', 'cam', 'mìn', '<word_sep>', 'di', '<word_sep>', 'nò', 'stra', '<word_sep>', 'vì', 'ta', '<end_of_verso>']
['nel', '<word_sep>', 'mez', 'zo', '<word_sep>', 'del', '<word_sep>', 'cam', 'min', '<word_sep>', 'di', '<word_sep>', 'no', 'stra', '<word_sep>', 'vi', 'ta', '<end_of_verso>']
['mi', '<word_sep>', 'ri', 'tro', 'vài', '<word_sep>', 'per', '<word_sep>', 'ù', 'na', '<word_sep>', 'sèl', 'va', '<word_sep>', 'o', 'scù', 'ra', '<end_of_verso>']
['mi', '<word_sep>', 'ri', 'tro', 'vai', '<word_sep>', 'per', '<word_sep>', 'u', 'na', '<word_sep>', 'sel', 'va<word_sep>o', 'scu', 'ra', '<end_of_verso>']
['ché', '<word_sep>', 'la', '<word_sep>', 'di', 'rìt', 'ta', '<word_sep>', 'vì', 'a', '<word_sep>', 'è', 'ra', '<word_sep>', 'smàr', 'ri', 'ta', '<end_of_verso>']
['ché', '<word_sep>', 'la', '<word_sep>', 'di', 'rit', 'ta', '<word_sep>', 'vi', 'a<word_sep>e', 'ra', '<word_sep>', 'smar', 'ri', 'ta', '<end_of_verso>']
['àhi', '<

In [7]:
# printing syllabification without special tokens 
for line in divine_comedy_list[:15]:
    syllables = syllabify_verse_prettify(line, special_tokens, tone_tagger)
    size = len(syllables)
    print(syllables, flush=True)

['nel', '<word_sep>', 'mèz', 'zo', '<word_sep>', 'del', '<word_sep>', 'cam', 'mìn', '<word_sep>', 'di', '<word_sep>', 'nò', 'stra', '<word_sep>', 'vì', 'ta', '<end_of_verso>']
['nel', 'mez', 'zo', 'del', 'cam', 'min', 'di', 'no', 'stra', 'vi', 'ta']
['mi', '<word_sep>', 'ri', 'tro', 'vài', '<word_sep>', 'per', '<word_sep>', 'ù', 'na', '<word_sep>', 'sèl', 'va', '<word_sep>', 'o', 'scù', 'ra', '<end_of_verso>']
['mi', 'ri', 'tro', 'vai', 'per', 'u', 'na', 'sel', 'va o', 'scu', 'ra']
['ché', '<word_sep>', 'la', '<word_sep>', 'di', 'rìt', 'ta', '<word_sep>', 'vì', 'a', '<word_sep>', 'è', 'ra', '<word_sep>', 'smàr', 'ri', 'ta', '<end_of_verso>']
['ché', 'la', 'di', 'rit', 'ta', 'vi', 'a e', 'ra', 'smar', 'ri', 'ta']
['àhi', '<word_sep>', 'quàn', 'to', '<word_sep>', 'a', '<word_sep>', 'dir', '<word_sep>', 'quàl', '<word_sep>', 'è', 'ra', '<word_sep>', 'è', '<word_sep>', 'cò', 'sa', '<word_sep>', 'dù', 'ra', '<end_of_verso>']
['ahi', 'quan', 'to a', 'dir', 'qual', 'e', 'ra è', 'co', 'sa', 'd

#### Reversed data for corpus 

In [9]:
from dante_by_rev_syl.data_preparation import text_in_rev_syls
divine_comedy_verse = text_in_rev_syls(divine_comedy)
divine_comedy_verse[:100]

['<end_of_verso>',
 'ta',
 'vi',
 '<word_sep>',
 'stra',
 'no',
 '<word_sep>',
 'di',
 '<word_sep>',
 'min',
 'cam',
 '<word_sep>',
 'del',
 '<word_sep>',
 'zo',
 'mez',
 '<word_sep>',
 'nel',
 '<end_of_verso>',
 'ra',
 'scu',
 'va<word_sep>o',
 'sel',
 '<word_sep>',
 'na',
 'u',
 '<word_sep>',
 'per',
 '<word_sep>',
 'vai',
 'tro',
 'ri',
 '<word_sep>',
 'mi',
 '<end_of_verso>',
 'ta',
 'ri',
 'smar',
 '<word_sep>',
 'ra',
 'via<word_sep>e',
 '<word_sep>',
 'ta',
 'rit',
 'di',
 '<word_sep>',
 'la',
 '<word_sep>',
 'ché',
 '<end_of_verso>',
 'ra',
 'du',
 '<word_sep>',
 'sa',
 'co',
 '<word_sep>',
 'ra<word_sep>è',
 'e',
 '<word_sep>',
 'qual',
 '<word_sep>',
 'dir',
 '<word_sep>',
 'to<word_sep>a',
 'quan',
 '<word_sep>',
 'hi',
 'a',
 '<end_of_verso>',
 'te',
 'for',
 '<word_sep>',
 'spra<word_sep>e',
 'gia<word_sep>e<word_sep>a',
 'vag',
 'sel',
 '<word_sep>',
 'va',
 'sel',
 '<word_sep>',
 'sta',
 'e',
 '<end_of_verso>',
 'ra',
 'pau',
 '<word_sep>',
 'la',
 '<word_sep>',
 'va',
 

#### Reversed data for rhyme 

In [16]:
from dante_by_rev_syl.data_preparation import text_in_syls_rhyme
divine_comedy_rhyme = text_in_syls_rhyme(divine_comedy)
divine_comedy_rhyme[:100]

['<start_of_canto>',
 '<start_of_terzina>',
 'vi',
 'ta',
 '<end_of_verso>',
 'scu',
 'ra',
 '<end_of_verso>',
 'ri',
 'ta',
 '<end_of_verso>',
 '<end_of_terzina>',
 '<start_of_terzina>',
 'du',
 'ra',
 '<end_of_verso>',
 'for',
 'te',
 '<end_of_verso>',
 'pau',
 'ra',
 '<end_of_verso>',
 '<end_of_terzina>',
 '<start_of_terzina>',
 'mor',
 'te',
 '<end_of_verso>',
 'tro',
 'vai',
 '<end_of_verso>',
 'scor',
 'te',
 '<end_of_verso>',
 '<end_of_terzina>',
 '<start_of_terzina>',
 "v'in",
 'trai',
 '<end_of_verso>',
 'pun',
 'to',
 '<end_of_verso>',
 'do',
 'nai',
 '<end_of_verso>',
 '<end_of_terzina>',
 '<start_of_terzina>',
 'giun',
 'to',
 '<end_of_verso>',
 'val',
 'le',
 '<end_of_verso>',
 'pun',
 'to',
 '<end_of_verso>',
 '<end_of_terzina>',
 '<start_of_terzina>',
 'spal',
 'le',
 '<end_of_verso>',
 'ne',
 'ta',
 '<end_of_verso>',
 'cal',
 'le',
 '<end_of_verso>',
 '<end_of_terzina>',
 '<start_of_terzina>',
 'que',
 'ta',
 '<end_of_verso>',
 'ra',
 'ta',
 '<end_of_verso>',
 'pie',
 '

### MODEL STRUCTURE

In [1]:
import tensorflow as tf

def build_model(name, vocab_size, seq_length, embedding_dim=64, rnn_type='lstm', rnn_units=512, learning_rate=0.01):

    model = tf.keras.Sequential(name=name)

    model.add(tf.keras.layers.Input((seq_length,), name='input'))
    model.add(tf.keras.layers.Embedding(vocab_size, embedding_dim, name='embedding'))
    if rnn_type == 'lstm':
        model.add(tf.keras.layers.LSTM(rnn_units,
                          return_sequences=True,
                          dropout=0.3,
                          recurrent_initializer='glorot_uniform',
                          name='last_lstm')
        )
    elif rnn_type == 'gru':
        model.add(tf.keras.layers.GRU(rnn_units,
                          return_sequences=True,
                          dropout=0.3,
                          recurrent_initializer='glorot_uniform',
                          name='last_gru')
        )
    elif rnn_type == '2lstm':
        model.add(tf.keras.layers.LSTM(rnn_units,
                          return_sequences=True,
                          dropout=0.3,
                          recurrent_initializer='glorot_uniform',
                          name='first_lstm')
        )
        model.add(tf.keras.layers.LSTM(rnn_units,
                          return_sequences=True,
                          dropout=0.3,
                          recurrent_initializer='glorot_uniform',
                          name='last_lstm')
        )

    elif rnn_type == '2gru':
        model.add(tf.keras.layers.GRU(rnn_units,
                          return_sequences=True,
                          dropout=0.3,
                          recurrent_initializer='glorot_uniform',
                          name='first_gru')
        )
        model.add(tf.keras.layers.GRU(rnn_units,
                          return_sequences=True,
                          dropout=0.3,
                          recurrent_initializer='glorot_uniform',
                          name='last_gru')
        )
#    model.add(tf.keras.layers.Dense(128, activation='relu', name='dense'))

    model.add(tf.keras.layers.Dense(vocab_size, activation='softmax', name='output'))

#    model.add(tf.keras.layers.Dense(vocab_size, name='output'))
    
    
#    def loss(labels, logits):
#        return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

#    model.compile(loss=loss, metrics="accuracy", optimizer=optimizer)
    
    model.compile(loss="sparse_categorical_crossentropy", metrics="accuracy", optimizer=optimizer)
    model.summary()

    return model
#build_model(....)

Once trained the model, in this case we are using a pretrained model, we can generate our text..

In [3]:
import os
import numpy as np
from dante_by_tonedrev_syl.data_preparation import text_in_rev_syls
from dante_by_tonedrev_syl.text_processing import prettify_text, special_tokens

#### Evaluation

#### Embedding visualization 