In [1]:
from tensorflow import keras
from tensorflow.keras import layers
from keras.callbacks import TensorBoard

import numpy as np
import random
import io
import json


In [2]:
#path = keras.utils.get_file(
#     "nietzsche.txt", origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt"
# )
path = './manele.json'


In [3]:
text = []
with io.open(path, encoding="utf-8") as f:
    file = json.load(f)
    for manea in file:
        for lyric in manea['lyrics']:
            text.append(lyric)
text = ''.join(text)


In [4]:
text = text.lower()
to_replace = list('!"$&()*+/:;<=>@[]^_~{}')
to_replace.append("refren")
to_replace.append("x2")
to_replace.append("florin salam")
for word in to_replace:
    text = text.replace(word, '')
print("Corpus length:", len(text))

chars = sorted(list(set(text)))
print(chars)
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 60
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


Corpus length: 401854
['\n', ' ', "'", ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '9', '?', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
Total chars: 42
Number of sequences: 133932


In [5]:
model = keras.Sequential(
    [
        keras.Input(shape=(maxlen, len(chars))),
        layers.LSTM(128),
        layers.Dense(len(chars), activation="softmax"),
    ]
)
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
#optimizer='adam'
model.compile(loss="categorical_crossentropy", optimizer=optimizer)
tensor_board = TensorBoard('./logs/character_generation')


In [6]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [7]:
epochs = 10
batch_size = 128

for epoch in range(epochs):
    print("Epoch: %d\n" % epoch)
    model.fit(x, y, batch_size=batch_size, epochs=1)

Epoch: 0

Epoch: 1

Epoch: 2

Epoch: 3

Epoch: 4

Epoch: 5

Epoch: 6

Epoch: 7

Epoch: 8

Epoch: 9



In [9]:
generated_length = 200
start_index = random.randint(0, len(text) - maxlen - 1)
for diversity in [0.2, 0.5]:
    print("...Diversity:", diversity)

    generated = ""
    original_sentence = text[start_index : start_index + maxlen]
    sentence = original_sentence
    print('...Generating with seed:\n "' + sentence + '"')

    for i in range(generated_length):
        x_pred = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(sentence):
            x_pred[0, t, char_indices[char]] = 1.0
        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds, diversity)
        next_char = indices_char[next_index]
        sentence = sentence[1:] + next_char
        generated += next_char

    print("...Generated:\n", original_sentence + generated)
    print()

...Diversity: 0.2
...Generating with seed:
 "nu vrei ce nu-ti convine
stiu ca ma iubesti pe mine. 

stiu "
...Generated:
 nu vrei ce nu-ti convine
stiu ca ma iubesti pe mine. 

stiu ca sa te mai pot azi inima mea. 

florin salam
e doamne sunt si sa mai placesc sa nu te las
as vrea sa te iubesc
cum sa te iubesc
dar nu ma las sa fii numai pt mine

ce as vrea sa ma dau
si nu mai pot

...Diversity: 0.5
...Generating with seed:
 "nu vrei ce nu-ti convine
stiu ca ma iubesti pe mine. 

stiu "
...Generated:
 nu vrei ce nu-ti convine
stiu ca ma iubesti pe mine. 

stiu ca nu ma las la dusmanii mei nu te las
si sa ma dau
doamne si numai sunosit de ceasa mele doamne este privat
si imi plac nimenea sa nu mai las la locul tau si am plans din tine nu ma las cu tine as vr

