# generating Speech using chatgpt api

In [None]:
%pip install gtts

In [None]:
from gtts import gTTS
import os

def text_to_speech(text, language='en', save_to_file=False, file_name='output.mp3'):
    tts = gTTS(text=text, lang=language, slow=False)
    
    if save_to_file:
        tts.save(file_name)
        print(f'Audio saved to "{file_name}"')
    else:
        tts.save('temp.mp3')
        os.system('start temp.mp3')  # Play the generated audio (Windows)

if __name__ == "__main__":
    input_text = input("Enter the text to convert to speech: ")
    text_to_speech(input_text, language='fr', save_to_file=True)

# training tacotron

In [None]:
import tensorflow as tf
from tacotron2 import Tacotron2
from utils import get_dummy_dataset  # Mock dataset for demonstration purposes

# Mock data - Replace with your own preprocessed dataset
train_data, train_text = get_dummy_dataset()

# Hyperparameters
batch_size = 4
num_epochs = 10
learning_rate = 0.001

# Create TensorFlow dataset
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_text))
train_dataset = train_dataset.shuffle(len(train_data)).batch(batch_size)

# Create Tacotron model
tacotron = Tacotron2(num_mel_channels=80, num_symbols=39)  # Modify arguments based on your dataset

# Optimizer and loss function
optimizer = tf.keras.optimizers.Adam(learning_rate)
mse_loss = tf.keras.losses.MeanSquaredError()

# Training loop
for epoch in range(num_epochs):
    total_loss = 0.0
    for batch in train_dataset:
        audio_batch, text_batch = batch

        # Forward pass
        with tf.GradientTape() as tape:
            mel_output, mel_output_postnet, _ = tacotron([text_batch, audio_batch])

            # Compute the loss
            loss = mse_loss(audio_batch, mel_output) + mse_loss(audio_batch, mel_output_postnet)

        # Backpropagation and optimization
        gradients = tape.gradient(loss, tacotron.trainable_variables)
        optimizer.apply_gradients(zip(gradients, tacotron.trainable_variables))

        total_loss += loss

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_dataset)}")

# Training completed! Now you can use the trained Tacotron model for synthesis.


# Recording Audio

In [None]:
%pip install pyaudio

In [None]:
%pip install soundfile

In [None]:
import pyaudio
import wave

def record_audio(output_file, duration, sample_rate=44100, channels=1):
    CHUNK = 1024
    FORMAT = pyaudio.paInt16

    audio = pyaudio.PyAudio()

    stream = audio.open(
        format=FORMAT,
        channels=channels,
        rate=sample_rate,
        input=True,
        frames_per_buffer=CHUNK
    )

    frames = []
    print("Recording...")

    for _ in range(0, int(sample_rate / CHUNK * duration)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("Recording finished.")

    stream.stop_stream()
    stream.close()
    audio.terminate()

    # Save audio as a WAV file
    with wave.open(output_file, 'wb') as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(audio.get_sample_size(FORMAT))
        wf.setframerate(sample_rate)
        wf.writeframes(b''.join(frames))

if __name__ == "__main__":
    output_file = "recorded_audio.wav"  # Change this to your desired output filename
    duration = 5  # Recording duration in seconds
    record_audio(output_file, duration)


# Voice Recognition

In [None]:
import speech_recognition as sr
from langdetect import detect

def transcribe_and_detect_language(wav_file):
    recognizer = sr.Recognizer()

    # Load the audio file
    with sr.AudioFile(wav_file) as source:
        audio = recognizer.record(source)

    # Use Google Web Speech API for speech recognition
    try:
        text = recognizer.recognize_google(audio, language="fr-FR")  # fr-FR Use French language for speech recognition
        detected_language = detect(text)
        return text, detected_language
    except sr.UnknownValueError:
        print("Google Web Speech API could not understand the audio.")
    except sr.RequestError as e:
        print(f"Could not request results from Google Web Speech API; {e}")

if __name__ == "__main__":
    wav_file = "recorded_audio.wav"  # Replace with the path to your .wav file
    recognized_text, language = transcribe_and_detect_language(wav_file)
    print("Recognized Text:")
    print(recognized_text)
    print("Detected Language:")
    print(language)


# Using chat gpt to generate text

In [None]:
import requests

def chat_with_gpt(text):
    api_key = "****"  # Replace with your ChatGPT API key
    endpoint = "https://api.openai.com/v1/chat/completions"

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-3.5-turbo",  # The ChatGPT model to use
        "messages": [{"role": "system", "content": "You are a helpful assistant."},
                     {"role": "user", "content": text}],
        "max_tokens": 100
    }

    response = requests.post(endpoint, json=payload, headers=headers)

    if response.status_code == 200:
        result = response.json()["choices"][0]["message"]["content"]
        return result
    else:
        print(f"Request failed with status code {response.status_code}")
        return None

if __name__ == "__main__":
    user_input = input("You: ")
    while user_input.lower() not in ['quit', 'exit']:
        response = chat_with_gpt(user_input)
        print("ChatGPT:", response)
        user_input = input("You: ")
