In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import GRU, Dense
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
import numpy as np
import os


Load and preprocess a text dataset of multiple files

In [None]:
# Load the data
data_dir = 'path_to_your_data'  # replace with the path to your data
text = ''
for filename in os.listdir(data_dir):
    with open(os.path.join(data_dir, filename), 'r') as f:
        text += f.read()

In [None]:
# Preprocess the data
chars = sorted(list(set(text)))
char_to_index = dict((c, i) for i, c in enumerate(chars))
index_to_char = dict((i, c) for i, c in enumerate(chars))

Encode the characters as integers and one-hot vectors

In [None]:
# Prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, len(text) - seq_length, 1):
    seq_in = text[i:i + seq_length]
    seq_out = text[i + seq_length]
    dataX.append([char_to_index[char] for char in seq_in])
    dataY.append(char_to_index[seq_out])
n_patterns = len(dataX)

In [None]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (n_patterns, seq_length, 1))

# normalize
X = X / float(len(chars))

# one hot encode the output variable
y = np_utils.to_categorical(dataY)

Define and compile a GRU model with a softmax output layer

In [None]:
# Define the model
model = Sequential([
    GRU(256, input_shape=(X.shape[1], X.shape[2])),
    Dense(y.shape[1], activation='softmax')
])

Train the model on the input-output pairs of character sequences

In [None]:

# Compile and fit the model
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.fit(X, y, epochs=20, batch_size=128)


Define a function to sample a character from a probability distribution

In [None]:
def sample(preds, temperature=1.0):

preds = np.asarray(preds).astype(‘float64’)

preds = np.log(preds) / temperature

exp_preds = np.exp(preds)

preds = exp_preds / np.sum(exp_preds)

probas = np.random.multinomial(1, preds, 1)

return np.argmax(probas)

Generate new text


In [None]:
# Pick a random seed
start_index = np.random.randint(0, len(text) - seq_length - 1)

seed = text[start_index: start_index + seq_length]

Generate characters

In [None]:

generated = ‘’

for i in range(200): # generate 200 characters

#Encode the seed as one-hot vectors
x = np.zeros((1, seq_length, len(chars)))

for t, char in enumerate(seed):

x[0, t, char_to_index[char]] = 1.

#Predict the next character
preds = model.predict(x, verbose=0)[0]

next_index = sample(preds, temperature=0.5) # adjust the temperature for more or less diversity

next_char = index_to_char[next_index]



Append the next character to the generated text and the seed

In [None]:

generated += next_char

seed += next_char

seed = seed[1:]

# Print the generated text
print(generated)