In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from transformers import pipeline
from datasets import load_dataset
import soundfile as sf
import torch
import re
from pydub import AudioSegment  # For audio concatenation

def preprocess_text(text):
    """
    Preprocesses the text to remove extra spaces and fix punctuation spacing.
    """
    # Remove extra spaces
    text = re.sub(r'\s+', ' ', text).strip()
    # Ensure proper spacing around punctuation
    text = re.sub(r'\s([?.!,;:])', r'\1', text)
    return text

def split_text_into_chunks(text, max_length=200):
    """
    Splits the text into chunks of a specified maximum length.
    Ensures chunks are split at spaces for readability.
    """
    words = text.split()
    chunks = []
    current_chunk = []
    current_length = 0

    for word in words:
        if current_length + len(word) + 1 <= max_length:  # +1 for space
            current_chunk.append(word)
            current_length += len(word) + 1
        else:
            chunks.append(" ".join(current_chunk))
            current_chunk = [word]
            current_length = len(word) + 1
    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks

def text_to_speech(text, output_file="speech.wav", max_length=200):
    """
    Converts text to speech, generating audio in chunks if necessary.
    
    Args:
        text (str): The text to be converted into speech.
        output_file (str): The name of the output audio file (default: "speech.wav").
        max_length (int): Maximum length of text in each chunk (default: 200 characters).

    Returns:
        str: The path to the saved audio file.
    """
    # Preprocess the text
    # cleaned_text = preprocess_text(text)
    # print(cleaned_text)

    # Split text into chunks
    chunks = split_text_into_chunks(text, max_length)

    # Initialize the TTS pipeline
    synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts")

    # Load the speaker embeddings dataset
    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
    speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)

    # Generate audio for each chunk and concatenate
    audio_segments = []
    for i, chunk in enumerate(chunks):
        print(f"Processing chunk {i+1}/{len(chunks)}: {chunk}")
        speech = synthesiser(chunk, forward_params={"speaker_embeddings": speaker_embedding})
        temp_file = f"chunk_{i}.wav"
        sf.write(temp_file, speech["audio"], samplerate=speech["sampling_rate"])
        audio_segments.append(AudioSegment.from_file(temp_file))

    # Concatenate all audio segments
    final_audio = sum(audio_segments)
    final_audio.export(output_file, format="wav")
    print(f"Final audio saved to {output_file}")

    return output_file

: 

In [2]:
text = """The production of biscuits involves several key stages to transform raw ingredients into the final packaged product. Here's an overview of the typical biscuit manufacturing process: 
Pre-Mixing: All ingredients are measured and prepared for mixing. This includes sifting dry ingredients like flour to remove impurities and ensure uniformity.

Mixing: The prepared ingredients are combined to form a consistent dough. The mixing time and speed are carefully controlled to achieve the desired dough texture, which influences the biscuit's final quality.

Moulding/Forming: The dough is shaped into the desired biscuit forms using equipment such as rollers, cutters, or moulders. This step determines the biscuit's size and shape.

Baking: The shaped dough pieces are baked in ovens at temperatures typically ranging from 160°C to 180°C. Baking time and temperature are adjusted based on the biscuit type to ensure proper texture and color development.

Cooling: After baking, biscuits are cooled to room temperature on cooling conveyors. Proper cooling is essential to prevent moisture condensation, which can affect texture and shelf life.

Packaging: Cooled biscuits are packed into appropriate packaging materials to maintain freshness and protect them during transportation. Packaging types vary, including individual packs, family packs, or bulk packaging.

Each of these stages is crucial in ensuring the production of high-quality biscuits that meet consumer expectations."""

In [3]:
audio_file = text_to_speech(text, max_length=600)

# Play the audio in a Python notebook environment
import IPython.display as ipd
ipd.Audio(audio_file)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Device set to use cpu
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Generating validation split: 100%|██████████| 7931/7931 [00:00<00:00, 46443.83 examples/s]


Processing chunk 1/3: The production of biscuits involves several key stages to transform raw ingredients into the final packaged product. Here's an overview of the typical biscuit manufacturing process: Pre-Mixing: All ingredients are measured and prepared for mixing. This includes sifting dry ingredients like flour to remove impurities and ensure uniformity. Mixing: The prepared ingredients are combined to form a consistent dough. The mixing time and speed are carefully controlled to achieve the desired dough texture, which influences the biscuit's final quality. Moulding/Forming: The dough is shaped into the
Processing chunk 2/3: desired biscuit forms using equipment such as rollers, cutters, or moulders. This step determines the biscuit's size and shape. Baking: The shaped dough pieces are baked in ovens at temperatures typically ranging from 160°C to 180°C. Baking time and temperature are adjusted based on the biscuit type to ensure proper texture and color development. Cooling: A