**Important!** Due to some libraries not being updated, this notebook requires Python 3.11 and won't work with any newer versions. Please make sure your environment meets this requirement.

In [None]:
!pip install "transformers==4.42.4" "accelerate>=0.26.0"

In [None]:
!pip install TTS

You can skip the cell below if you don't want to save the results to Google Drive.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
from TTS.tts.layers.xtts.gpt import GPT2InferenceModel
from transformers.generation import GenerationMixin
import glob
import numpy as np
import random
import soundfile as sf
import torchaudio
import shutil
from torchaudio.datasets import LIBRISPEECH
from pathlib import Path
import os

In [None]:
original_load = torch.load

def unsafe_load(*args, **kwargs):
    if 'weights_only' not in kwargs:
        kwargs['weights_only'] = False
    return original_load(*args, **kwargs)

torch.load = unsafe_load

from TTS.api import TTS

device = "cuda" if torch.cuda.is_available() else "cpu"

tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=True).to(device)

torch.load = original_load

In [None]:
command = "Wake up, Typist!"
emotions = ["Happy", "Sad", "Angry", "Dull"]

In [None]:
resampler = torchaudio.transforms.Resample(orig_freq=24000, new_freq=16000).to("cpu")

In [None]:
num_of_samples = 200
librispeech_files = glob.glob("data/raw/LIBRISPEECH/LibriSpeech/dev-clean/**/*.flac", recursive=True)

if not librispeech_files:
  print("No LibriSpeech files found: downloading dataset...")
  root = Path("data/raw/LIBRISPEECH")
  root.mkdir(parents=True, exist_ok=True)

  ds = LIBRISPEECH(root=root, url="dev-clean", download=True)

  librispeech_files = glob.glob("data/raw/LIBRISPEECH/LibriSpeech/dev-clean/**/*.flac", recursive=True)

In [None]:
for num in range(num_of_samples):
  reference_wav = [random.choice(librispeech_files)]

  emotion = random.choice(emotions)

  try:
    wav = tts.tts(
        text=command,
        language="en",
        emotion=emotion,
        temperature=random.uniform(0.7, 0.8),
        speaker_wav=reference_wav
    )

    wav_tensor = torch.tensor(wav).unsqueeze(0).float()
    wav_16k = resampler(wav_tensor)

    save_path = f"data/raw/custom/wakeup_dataset/sample_{num}.flac"
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    torchaudio.save(save_path, wav_16k, 16000, format="flac")

  except Exception as e:
    print(f"Error while generating sample {num}: {e}")

In [None]:
shutil.make_archive("wakeup_dataset", 'zip', "data/raw/custom/wakeup_dataset")

In [None]:
static_commands = [
    "Clear all",
    "Send all",
    "Enter all",
    "Delete last word",
    "Delete last sentence",
    "Place dot",
    "Place period",
    "New paragraph",
    "Insert phone number",
    "Insert mail",
    "Stop listening"
]

random_nouns = ["apple", "table", "code", "file", "text", "data", "screen", "line", "word", "cat", "system", "banana"]

In [None]:
def generate_dynamic_command():
    word1 = random.choice(random_nouns)
    word2 = random.choice(random_nouns)
    return f"replace {word1} with {word2}"

In [None]:
total_files = 0

for command in static_commands:
    print(f"Generating: '{command}'")
    for i in range(20):
        try:
            ref_wav = [random.choice(librispeech_files)]
            
            wav = tts.tts(
                text=command,
                language="en", 
                emotion=random.choice(emotions),
                speaker_wav=ref_wav,
                temperature=random.uniform(0.7, 0.8)
            )
            
            wav_tensor = torch.tensor(wav).unsqueeze(0).float()
            wav_16k = resampler(wav_tensor)
            
            safe_filename = command.replace(" ", "_").lower()
            save_path = f"data/raw/custom/commands_dataset/{safe_filename}_{i}.flac"

            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            
            torchaudio.save(save_path, wav_16k, 16000, format="flac")
            total_files += 1
            
        except Exception as e:
            print(f"Error: {e}")

In [None]:
print("Generating dynamic 'replace' command")

for i in range(50):
    try:
        cmd_text = generate_dynamic_command()
        
        ref_wav = [random.choice(librispeech_files)]
        wav = tts.tts(text=cmd_text, language="en", speaker_wav=ref_wav, emotion="Dull")
        
        wav_tensor = torch.tensor(wav).unsqueeze(0).float()
        wav_16k = resampler(wav_tensor)
        
        safe_filename = cmd_text.replace(" ", "_").lower()
        save_path = f"data/raw/custom/commands_dataset/dynamic_{safe_filename}_{i}.flac"
        
        torchaudio.save(save_path, wav_16k, 16000, format="flac")
        total_files += 1
    except Exception as e:
        print(f"Error: {e}")

In [None]:
shutil.make_archive("commands_dataset", 'zip', "data/raw/custom/commands_dataset")