<a href="https://colab.research.google.com/github/malakkkk4/char-rnn-text-generator/blob/main/literature_corpus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import to_categorical


In [2]:
with open('literature_corpus.txt','r')as file:
  text=file.read()
text=text.lower().replace('\n',' ')
chars=sorted(list(set(text)))
print("Number of unique characters: ",len(chars))

char_to_int= {char:i for i,char in enumerate(chars)}
int_to_char= {i: char for i ,char in enumerate(chars)}
encoded_text= [char_to_int[char] for char in text]

Number of unique characters:  76


In [3]:
print("\nFirst 100 characters of text: ")
print(text[:100])
print("\nNumber of characters in text: ",len(text))


First 100 characters of text: 
﻿the project gutenberg ebook of moby dick; or, the whale      this ebook is for the use of anyone an

Number of characters in text:  1238226


In [4]:
model=Sequential()
model.add(SimpleRNN(units=128,input_shape=(100,1),return_sequences=False))
model.add(Dense(len(chars),activation='softmax'))

model.compile(optimizer='adam',loss='categorical_crossentropy')

  super().__init__(**kwargs)


In [5]:
model.summary()

In [6]:
seq_length=100
X=[]
y=[]

for i in range(0,len(encoded_text)-seq_length,1):
  seq_in=encoded_text[i:i + seq_length]
  seq_out=encoded_text[i + seq_length]
  X.append(seq_in)
  y.append(seq_out)

X=np.reshape(X,(len(X),seq_length,1))
X =X/float(len(chars))

y=to_categorical(y,num_classes=len(chars))

model.fit(X,y,epochs=50,batch_size=128,verbose=1)

Epoch 1/50
[1m9673/9673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 8ms/step - loss: 2.8408
Epoch 2/50
[1m9673/9673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 8ms/step - loss: 2.6554
Epoch 3/50
[1m9673/9673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 8ms/step - loss: 2.5688
Epoch 4/50
[1m9673/9673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 8ms/step - loss: 2.5028
Epoch 5/50
[1m9673/9673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 8ms/step - loss: 2.4654
Epoch 6/50
[1m9673/9673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 8ms/step - loss: 2.4387
Epoch 7/50
[1m9673/9673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 8ms/step - loss: 2.4158
Epoch 8/50
[1m9673/9673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 8ms/step - loss: 2.4028
Epoch 9/50
[1m9673/9673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 8ms/step - loss: 2.3883
Epoch 10/50
[1m9673/9673[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x7ce955b4cc80>

In [10]:
# Generate new text
seed_text = "call me ishmael"
generated_text = seed_text
encoded_seq = [char_to_int[char] for char in seed_text]

for _ in range(500):
    # Prepare the input shape
    encoded_seq_reshaped = np.reshape(encoded_seq, (1, len(encoded_seq), 1))
    encoded_seq_reshaped = encoded_seq_reshaped / float(len(chars))

    # Predict the next character
    prediction = model.predict(encoded_seq_reshaped, verbose=0)
    predicted_index = np.argmax(prediction)
    next_char = int_to_char[predicted_index]

    generated_text += next_char
    encoded_seq.append(predicted_index)
    encoded_seq = encoded_seq[1:]  # Move the window

print(generated_text)

call me ishmaelu and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the shate and the 
