## *Poetry Generation*

### Load Libraries

In [1]:
import tensorflow as tf
from tensorflow import keras
import requests
import numpy as np
import pandas as pd
from keras.optimizers import Adam

### Load Dataset

In [2]:
Data = requests.get('https://raw.githubusercontent.com/laxmimerit/poetry-data/master/adele.txt').text

In [3]:
Data = Data.split(sep="\n")

In [4]:
len(Data)

2400

In [5]:
max_line_size = max(len(x.split(sep=" ")) for x in Data)
max_line_size

30

In [6]:
for x in Data:
    if len(x.split(sep=" "))==30:
        print(x)

It's in my blood and I stain every heart that I use to heal the pain So I blame it on the River Lea, the River Lea, the River Lea


In [7]:
Input_Data = ["starttoken " + i for i in Data]
#Input_Data = [i.split(" ") for i in Input_Data_]

In [8]:
Output_Data = [i + " endtoken" for i in Data]
#Output_Data = [i.split(" ") for i in Output_Data_]

In [9]:
Input_Data[5],Output_Data[9]

("starttoken Let's just say that maybe", "It's more than enough endtoken")

In [10]:
Dataset = Input_Data + Output_Data

In [11]:
Dataset[9]

"starttoken It's more than enough"

In [12]:
VOC_SIZE = 20000
OUTPUT_SEQ = 64
EMBEDDING_DIM =50
LATENT_DIM = 25

In [13]:
vect = keras.layers.TextVectorization(max_tokens=VOC_SIZE,standardize='lower_and_strip_punctuation',
                                      output_mode='int',output_sequence_length=OUTPUT_SEQ)

In [14]:
vect.adapt(Dataset)

In [15]:
encoded_input = vect(Input_Data)
encoded_output = vect(Output_Data)

In [16]:
vect.get_vocabulary()[235]

'maybe'

In [17]:
Num_Words = len(vect.get_vocabulary())

In [18]:
type(vect.get_vocabulary())

list

In [19]:
Input_Data[5]

"starttoken Let's just say that maybe"

In [20]:
encoded_input[5]

<tf.Tensor: shape=(64,), dtype=int64, numpy=
array([  2, 296,  38,  49,  13, 235,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
      dtype=int64)>

In [21]:
encoded_output.shape, len(encoded_output)

(TensorShape([2400, 64]), 2400)

### Hot-Encode Target

In [22]:
target_data = np.zeros((len(encoded_output),OUTPUT_SEQ,Num_Words))
for i,target in enumerate(encoded_output):
    for j,word in enumerate(target):
        if word>0:
            target_data[i,j,word] = 1

In [23]:
encoded_output[1]

<tf.Tensor: shape=(64,), dtype=int64, numpy=
array([120,  11,  82, 159,   6, 140,   3,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
      dtype=int64)>

In [24]:
target_data[1,0,120]

1.0

### Model

In [25]:
input_ = keras.layers.Input(shape=(OUTPUT_SEQ,))
initial_h = keras.layers.Input(shape=(LATENT_DIM,))
initial_c = keras.layers.Input(shape=(LATENT_DIM,))
emb = keras.layers.Embedding(VOC_SIZE,EMBEDDING_DIM)
x = emb(input_)
lstm = keras.layers.LSTM(LATENT_DIM,return_sequences=True,return_state=True)
x,_,_ = lstm(x,initial_state=[initial_h,initial_c])
dense_1 = keras.layers.Dense(256,activation='relu')
dense = dense_1(x)
dense_2 = keras.layers.Dense(Num_Words,activation="softmax")
output = dense_2(dense)

In [26]:
model = keras.models.Model([input_,initial_h,initial_c],output)

In [27]:
model.compile(loss='CategoricalCrossentropy',metrics=['acc'],optimizer=Adam())

In [28]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 64)]         0           []                               
                                                                                                  
 embedding (Embedding)          (None, 64, 50)       1000000     ['input_1[0][0]']                
                                                                                                  
 input_2 (InputLayer)           [(None, 25)]         0           []                               
                                                                                                  
 input_3 (InputLayer)           [(None, 25)]         0           []                               
                                                                                              

In [29]:
z = np.zeros((len(encoded_input),LATENT_DIM))
z.shape

(2400, 25)

In [30]:
history = model.fit([encoded_input,z,z],target_data,batch_size=128,epochs=30,validation_split=0.2)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


### Sampling Model

In [31]:
input_test = keras.layers.Input(shape=(1,))
x = emb(input_test)
x,h,c = lstm(x,initial_state=[initial_h,initial_c])
dense_1 = keras.layers.Dense(256,activation='relu')
dense = dense_1(x)
dense_2 = keras.layers.Dense(Num_Words,activation="softmax")
output = dense_2(dense)
#output = dense(x)

In [32]:
sample_model = keras.models.Model([input_test,initial_h,initial_c],[output,h,c])

### Generate Sample Lines

In [33]:
word2idx = {}
for i,w in enumerate(vect.get_vocabulary()):
    word2idx[w]=i

In [34]:
word2idx['starttoken']

2

In [35]:
max_poem_line_size = 10

In [36]:
def sample_line():
    np_input = np.array([[word2idx['starttoken']]])
    h = np.zeros(shape=(1,LATENT_DIM))
    c = np.zeros(shape=(1,LATENT_DIM))
    output_line = []
    for _ in range(max_poem_line_size):
        o,h,c = sample_model.predict([np_input,h,c],verbose=0)
        probs = o[0,0]
        if np.argmax(probs) == 0:
            print('oooooo')
        probs[0] = 0
        probs /= probs.sum()
        idx = np.random.choice(len(probs),p=probs)
        if idx == word2idx['endtoken']:
            break
        output_line.append(vect.get_vocabulary()[idx])
        np_input[0,0] = idx
    return ' '.join(output_line)

In [37]:
while True:
    for _ in range(4):
        print(sample_line())
    gen = input("---Generate another one [Y/N]")
    if gen and gen[0].lower().startswith('n'):
        break

drown falling god hardest unkind choose scratch walk fast situation
brought looking collided by wings watching instead insane distance flashing
watching chase earn wake day woah fact pushing hurt lies
which in fun crashing be but despair see remind honey
---Generate another one [Y/N]y
pretend owe verse bullshit secret standing seem soaking friend reflection
clue closer dirty talk shouldnt almost see adele turning fallen
things lets by it heavyhearted to seethrough weve none distance
help mm word closly rest disappeared desperately what whispered older
---Generate another one [Y/N]n
