In [1]:
!python --version
!pwd

Python 3.12.4
/d/projects/dataset-comandos/notebooks


In [1]:
import torch
from TTS.api import TTS
import os
import ipynbname
from pedalboard import *
from pedalboard.io import AudioFile
import numpy as np

from os import listdir
from os.path import isfile, join
import soundfile
import librosa
import wave

from audiomentations import *

nb_fname = ipynbname.name()
nb_path = ipynbname.path()

os.chdir('{0}/..'.format(os.path.split(nb_path)[0]))

print("Current directory: {0}".format(os.getcwd()))

if not os.path.exists("./speakers"): 
    os.makedirs("./speakers") 

if not os.path.exists("./data"): 
    os.makedirs("./data") 

path = "./speakers"

speakers = [f for f in listdir(path) if isfile(join(path, f))]

commands = [
    "Sim", "Não", "Cima", "Baixo", "Esquerda",
    "Direita", "Ligado", "Desligado", "Parar", "Vai",
    "Zero", "Um", "Dois", "Três", "Quatro", "Cinco", "Seis",
    "Sete", "Oito", "Nove", "Cama", "Pássaro", "Gato", "Cachorro", 
    "Feliz", "Casa", "Marvin", "Sheila", "Árvore", "Uau", "Trás", 
    "Avançar", "Seguir", "Aprender", "Visual"
]

device = "cuda" if torch.cuda.is_available() else "cpu"

print(TTS().list_models())

tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)

board_reverb = Pedalboard([
    Reverb(room_size=1),
])

noise = Compose([
    AddGaussianNoise(min_amplitude=0.01, max_amplitude=0.015, p=0.5),
])


mp3 = Compose([
    Mp3Compression(
        max_bitrate = 8,
    ),
])

board_phaser = Pedalboard([
    Phaser(),
])

piano = AddBackgroundNoise(
    sounds_path="./noise/piano.wav",
    min_snr_in_db=0,
    max_snr_in_db=0.0,
    noise_transform=PolarityInversion(),
    p=1.0
)

rain = AddBackgroundNoise(
    sounds_path="./noise/rain.wav",
    min_snr_in_db=0,
    max_snr_in_db=0.0,
    noise_transform=PolarityInversion(),
    p=1.0
)

for speaker in speakers:
    for command in commands:
        file = "./data/{0}_{1}".format(command, speaker)

        ### aplica ruído, reverberação, etc. para criar novos arquivos no dataset
        noise_file = "./data/{0}_noise_{1}".format(command, speaker)
        reverb_file = "./data/{0}_reverb_{1}".format(command, speaker)
        mp3_file = "./data/{0}_mp3_{1}".format(command, speaker)
        phaser_file = "./data/{0}_phaser_{1}".format(command, speaker)
        piano_file = "./data/{0}_piano_{1}".format(command, speaker)
        rain_file = "./data/{0}_rain_{1}".format(command, speaker)
        
        if not isfile(file):
            print("Processin file: {0}\n\n".format(file))
            tts.tts_to_file(
                text=command, speaker_wav="./speakers/{0}".format(speaker), 
                language="pt", file_path=file
            )
        else:
            print("File {0} already exist...\n\n".format(file))


        if not isfile(noise_file):
            print("Processin file: {0}\n\n".format(noise_file))

            with wave.open(file, "rb") as wave_file:
                frame_rate = wave_file.getframerate()

            signal, sr = librosa.load(file, sr = frame_rate)

            signal_noise = noise(samples=signal, sample_rate=frame_rate)

            soundfile.write(noise_file, signal_noise, frame_rate)
        else:
            print("File {0} already exist...\n\n".format(noise_file))

        if not isfile(mp3_file):
            print("Processin file: {0}\n\n".format(mp3_file))

            with wave.open(file, "rb") as wave_file:
                frame_rate = wave_file.getframerate()

            signal, sr = librosa.load(file, sr = frame_rate)

            signal_mp3 = mp3(samples=signal, sample_rate=frame_rate)

            soundfile.write(mp3_file, signal_mp3, frame_rate)
        else:
            print("File {0} already exist...\n\n".format(noise_file))

        if not isfile(piano_file):
            print("Processin file: {0}\n\n".format(piano_file))

            with wave.open(file, "rb") as wave_file:
                frame_rate = wave_file.getframerate()

            signal, sr = librosa.load(file, sr = frame_rate)

            signal_mp3 = piano(samples=signal, sample_rate=frame_rate)

            soundfile.write(piano_file, signal_mp3, frame_rate)
        else:
            print("File {0} already exist...\n\n".format(piano_file))

        if not isfile(rain_file):
            print("Processin file: {0}\n\n".format(rain_file))

            with wave.open(file, "rb") as wave_file:
                frame_rate = wave_file.getframerate()

            signal, sr = librosa.load(file, sr = frame_rate)

            signal_mp3 = rain(samples=signal, sample_rate=frame_rate)

            soundfile.write(rain_file, signal_mp3, frame_rate)
        else:
            print("File {0} already exist...\n\n".format(rain_file))

        ### phaser
        if not isfile(phaser_file):
            print("Processin file: {0}\n\n".format(phaser_file))

            with AudioFile(file) as f:
  
              # Open an audio file to write to:
              with AudioFile(phaser_file, 'w', f.samplerate, f.num_channels) as o:
              
                # Read one second of audio at a time, until the file is empty:
                while f.tell() < f.frames:
                  chunk = f.read(f.samplerate)
                  
                  # Run the audio through our pedalboard:
                  effected = board_phaser(chunk, f.samplerate, reset=False)
                  
                  # Write the output to our output file:
                  o.write(effected)
            
        else:
            print("File {0} already exist...\n\n".format(phaser_file))

        ### reverb
        if not isfile(reverb_file):
            print("Processin file: {0}\n\n".format(reverb_file))

            with AudioFile(file) as f:
  
              # Open an audio file to write to:
              with AudioFile(reverb_file, 'w', f.samplerate, f.num_channels) as o:
              
                # Read one second of audio at a time, until the file is empty:
                while f.tell() < f.frames:
                  chunk = f.read(f.samplerate)
                  
                  # Run the audio through our pedalboard:
                  effected = board_reverb(chunk, f.samplerate, reset=False)
                  
                  # Write the output to our output file:
                  o.write(effected)
            
        else:
            print("File {0} already exist...\n\n".format(reverb_file))

print("Finish...")

Current directory: D:\projects\dataset-comandos
<TTS.utils.manage.ModelManager object at 0x0000013EB5C249E0>
 > tts_models/multilingual/multi-dataset/xtts_v2 is already downloaded.


  from .autonotebook import tqdm as notebook_tqdm


 > Using model: xtts
File ./data/Sim_aaraoplay.wav already exist...


File ./data/Sim_noise_aaraoplay.wav already exist...


File ./data/Sim_noise_aaraoplay.wav already exist...


File ./data/Sim_piano_aaraoplay.wav already exist...


File ./data/Sim_rain_aaraoplay.wav already exist...


File ./data/Sim_phaser_aaraoplay.wav already exist...


File ./data/Sim_reverb_aaraoplay.wav already exist...


File ./data/Não_aaraoplay.wav already exist...


File ./data/Não_noise_aaraoplay.wav already exist...


File ./data/Não_noise_aaraoplay.wav already exist...


File ./data/Não_piano_aaraoplay.wav already exist...


File ./data/Não_rain_aaraoplay.wav already exist...


File ./data/Não_phaser_aaraoplay.wav already exist...


File ./data/Não_reverb_aaraoplay.wav already exist...


File ./data/Cima_aaraoplay.wav already exist...


File ./data/Cima_noise_aaraoplay.wav already exist...


File ./data/Cima_noise_aaraoplay.wav already exist...


File ./data/Cima_piano_aaraoplay.wav already exist...


F



Processin file: ./data/Sim_rain_itsConnorCreates.wav






Processin file: ./data/Sim_phaser_itsConnorCreates.wav


Processin file: ./data/Sim_reverb_itsConnorCreates.wav


Processin file: ./data/Não_itsConnorCreates.wav


 > Text splitted to sentences.
['Não']
 > Processing time: 0.5796587467193604
 > Real-time factor: 0.462025569880057
Processin file: ./data/Não_noise_itsConnorCreates.wav


Processin file: ./data/Não_mp3_itsConnorCreates.wav


Processin file: ./data/Não_piano_itsConnorCreates.wav


Processin file: ./data/Não_rain_itsConnorCreates.wav


Processin file: ./data/Não_phaser_itsConnorCreates.wav


Processin file: ./data/Não_reverb_itsConnorCreates.wav


Processin file: ./data/Cima_itsConnorCreates.wav


 > Text splitted to sentences.
['Cima']
 > Processing time: 0.6499390602111816
 > Real-time factor: 0.462414696620307
Processin file: ./data/Cima_noise_itsConnorCreates.wav


Processin file: ./data/Cima_mp3_itsConnorCreates.wav


Processin file: ./data/Cima_piano_itsConnorCreates.wav


Processin file: ./data/Cima_rain_itsConnorCrea