**Natural Language Generation (NLG)** involves creating meaningful phrases and sentences in human-readable form from structured data. It's like teaching a computer to write or speak information that usually a human would explain, such as turning a weather forecast's data into a readable report that says, "It will be sunny tomorrow with a high of 75 degrees."

In [26]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

# Sample Data: Temperature (Celsius) and Condition
data = [
    (22, 'sunny'), (10, 'rainy'), (15, 'cloudy'), 
    (30, 'sunny'), (20, 'cloudy'), (5, 'rainy')
]
descriptions = [
    "It is a warm and sunny day.", "Expect chilly temperatures and rain.", 
    "It is quite cloudy and cool.", "It's hot and sunny outside.", 
    "Cool with clouds covering the sky.", "Cold and rainy weather expected."
]

# Tokenize and prepare sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(descriptions)
sequences = tokenizer.texts_to_sequences(descriptions)
max_len = max(len(seq) for seq in sequences)
sequences = pad_sequences(sequences, maxlen=max_len, padding='post')
vocab_size = len(tokenizer.word_index) + 1  # vocabulary size

# Prepare input features
temps = np.array([item[0] for item in data])  # Temperatures
conditions = [item[1] for item in data]  # Conditions
tokenizer_conditions = Tokenizer()
tokenizer_conditions.fit_on_texts(conditions)
encoded_conditions = tokenizer_conditions.texts_to_sequences(conditions)
encoded_conditions = pad_sequences(encoded_conditions, maxlen=1, padding='post').flatten()
conditions_one_hot = to_categorical(encoded_conditions, num_classes=len(tokenizer_conditions.word_index) + 1)

# Normalize temperatures and combine with conditions
inputs = np.hstack([temps.reshape(-1, 1) / max(temps), conditions_one_hot])
inputs = np.repeat(inputs, max_len, axis=0).reshape(-1, max_len, inputs.shape[1])

# Prepare targets as categorical
targets = to_categorical(sequences, num_classes=vocab_size)

# Build the model
model = Sequential([
    LSTM(50, input_shape=(max_len, inputs.shape[2]), return_sequences=True),
    Dense(vocab_size, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train the model
model.fit(inputs, targets, epochs=100, verbose=1)

def generate_text(input_data, model, tokenizer, max_len):
    result = []
    for _ in range(max_len):
        prediction = model.predict(input_data)[0, -1, :]  # Get prediction for the last timestep
        predicted_word_index = np.argmax(prediction)  # Get the index with the highest probability
        predicted_word = tokenizer.index_word.get(predicted_word_index, '')  # Map index to word
        result.append(predicted_word)
        # Normally you would update the input data here if the model is designed to handle such updates

    return ' '.join(result)

# Generate text using the model
generated_text = generate_text(test_input, model, tokenizer, max_len)
print("Generated Text:", generated_text)



Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.0238 - loss: 3.3051
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.0238 - loss: 3.2972
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.0238 - loss: 3.2894
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.0238 - loss: 3.2817
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.0238 - loss: 3.2740
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.0238 - loss: 3.2663
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.0952 - loss: 3.2586
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.1429 - loss: 3.2509
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [16]:
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(max_len, inputs.shape[1])),
    Dense(vocab_size, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


  super().__init__(**kwargs)


In [17]:
model.fit(inputs, target, epochs=10, batch_size=1)


Epoch 1/10


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("data:0", shape=(1, 35), dtype=float32). Expected shape (None, 7, 35), but input has incompatible shape (1, 35)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(1, 35), dtype=float32)
  • training=True
  • mask=None