# Build a NLP Language model for text generation involves train a neural network to predict the next word in a sequence of words.

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Assume 'text_data' is your training dataset, where each element is a sentence.

# Tokenize the text data
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(text_data)
total_words = len(tokenizer.word_index) + 1

# Create input sequences and labels
input_sequences = []
for line in text_data:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

max_sequence_length = max([len(seq) for seq in input_sequences])
input_sequences = tf.keras.preprocessing.sequence.pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')

X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

# Build the model
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_length-1))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=50, verbose=1)

# Generate text
seed_text = "The quick brown fox"
next_words = 10

for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = tf.keras.preprocessing.sequence.pad_sequences([token_list], maxlen=max_sequence_length-1, padding='pre')
    predicted = model.predict_classes(token_list, verbose=0)
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted:
            output_word = word
            break
    seed_text += " " + output_word

print(seed_text)


# Build a Speech to Text model.

In [None]:
import speech_recognition as sr

def speech_to_text(audio_file_path):
    # Initialize the recognizer
    recognizer = sr.Recognizer()

    # Load the audio file
    with sr.AudioFile(audio_file_path) as source:
        audio_data = recognizer.record(source)

    try:
        # Use Google Web Speech API for speech recognition
        text = recognizer.recognize_google(audio_data)
        print("Text from Speech: {}".format(text))
    except sr.UnknownValueError:
        print("Speech Recognition could not understand audio.")
    except sr.RequestError as e:
        print("Could not request results from Google Web Speech API; {0}".format(e))

# Replace 'your_audio_file.wav' with the path to your audio file
speech_to_text('your_audio_file.wav')

In [None]:
Build a Text to Speech model.

In [None]:
from gtts import gTTS
import os

def text_to_speech(text, language='en', output_file='output.mp3'):
    # Create a gTTS object
    tts = gTTS(text=text, lang=language, slow=False)

    # Save the audio file
    tts.save(output_file)

    # Play the generated audio (optional)
    os.system("start " + output_file)

# Replace 'Hello, how are you?' with your desired text
text_to_speech('Hello, how are you?')


# Build a NLP Language model to detect the sentence/word error in the text corpus.

In [None]:
from spellchecker import SpellChecker

def detect_errors(text):
    # Initialize the spell checker
    spell = SpellChecker()

    # Tokenize the text into words
    words = text.split()

    # Find misspelled words
    misspelled = spell.unknown(words)

    return misspelled

# Example usage
text = "This is an exmaple sentence with some mistkes."
errors = detect_errors(text)

if errors:
    print("Errors found:", errors)
else:
    print("No errors detected.")


# Build a Language model to correct the error in the text