<a href="https://colab.research.google.com/github/mozhgans/ML-Poetry/blob/main/PoemWriterAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# An example of what we are going to create. Some lines generated by the program.



**I  every be that pull in i every i life go would be time the soul a how empty has i spread but be day of and it me will i life go would be has of and it me will i life go would be has of and it me will i life go would be has of and it me will i life go would be has of and it me will i life go would be has of and it me will i life go would be has of and it me will i life go would be.**



# Importing the libraries
We import the necessary libraries useful for tokenizing and padding of sequences.

In [None]:

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow import keras
import numpy as np



# Preprocessing the Data
Here, we will be tokenizing the lines of the poem and convert them to sequences of equal lengths



## Extracting the raw text from the file
We read the text available in the file.


In [None]:

text_file_path = 'text'

def get_raw_data_from_file( path ):
    text = str()
    with open(path, "r") as fd:
        text += fd.read()
    return text

raw_text = get_raw_data_from_file( text_file_path )
print( raw_text )


## Tokenizing the lines of the poem
We tokenize poem lines using the tensorflow.keras.preprocessing.text.Tokenizer.

In [None]:

tokenizer = Tokenizer()

corpus = raw_text.split( "\n\n" )
tokenizer.fit_on_texts(corpus)
total_words = len( tokenizer.word_index ) + 1

input_sequences = []

for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i + 1]
        input_sequences.append(n_gram_sequence)


## Padding the sequences of tokenized lines
We pad the sequences so as to give them equal lengths.


In [None]:

sequence_lengths = list()
for x in input_sequences:
    sequence_lengths.append( len( x ) )
max_sequence_len = max( sequence_lengths )

input_sequences = np.array(pad_sequences(input_sequences,
                                         maxlen=max_sequence_len+1, padding='pre'))
x, y = input_sequences[:, :-1], input_sequences[:, -1]
y = keras.utils.to_categorical(y, num_classes=total_words)

print( x[ 0 ] , x.shape )
print( y[ 0 ]  , y.shape ) 


# Initializing and Training the model

## Defining the model schema
We define the hyperparameters from our LSTM model.

In [None]:

from tensorflow import keras
from tensorflow.keras.layers import *
import tensorflow as tf

dropout_rate = 0.3
activation_func = keras.activations.relu

SCHEMA = [

    Embedding( total_words , 10, input_length=max_sequence_len ),
    LSTM( 32 ) ,
    Dropout(dropout_rate),
    Dense( 32 , activation=activation_func ) ,
    Dropout(dropout_rate),
    Dense( total_words, activation=tf.nn.softmax )

]


## Compiling the model
Compiling the Keras model.


In [None]:

model = keras.Sequential(SCHEMA)
model.compile(
    optimizer=keras.optimizers.Adam() ,
    loss=keras.losses.categorical_crossentropy ,
    metrics=[ 'accuracy' ]
)
model.summary()


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 202, 10)           3920      
_________________________________________________________________
lstm (LSTM)                  (None, 32)                5504      
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 32)                1056      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)      

## Training the model
Training the model for 150 epochs over the training data.

In [None]:

model.fit(
    x,
    y,
    batch_size=50 ,
    epochs=1,
)




<tensorflow.python.keras.callbacks.History at 0x7f45ced709e8>

## Saving the model
Saving the model to a .h5 file.

In [None]:

model.save( 'model.h5' ) 


In [None]:

converter = tf.lite.TFLiteConverter.from_keras_model_file( 'model.h5')
tflite_model = converter.convert()
open("model.tflite", "wb").write(tflite_model)


Instructions for updating:
Use tf.compat.v1.graph_util.convert_variables_to_constants
Instructions for updating:
Use tf.compat.v1.graph_util.extract_sub_graph
INFO:tensorflow:Froze 8 variables.
INFO:tensorflow:Converted 8 variables to const ops.


ConverterError: ignored

# Making Predictions
We define the predict method which takes two arguments :


1.   `seed_text `: The starter text required for the model to build sentences.
2.   `seed `: Number of words the generated sentence must contain.




In [None]:

def predict(seed_text , seed=10 ):

    for i in range( seed ):

        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=
        max_sequence_len , padding='pre')
        predicted = model.predict_classes(token_list, verbose=0 )
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word

    return seed_text

print( predict( input( 'Enter some starter text ( I want ... ) : ') , int( input( 'Enter the desired length of the generated sentence : '))  ) )
