In [1]:
# check GPU
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-aaeee1db-9652-6d1c-8dec-b64fe663c6d2)


In [2]:
# mount gdrive/drive/google drive
from google.colab import drive
drive.mount('drive', force_remount=True)

Mounted at drive


In [4]:
# clone
%cd /content
!git clone https://github.com/as-ideas/DeepPitchExtractor
%cd DeepPitchExtractor
!pip install pyworld

/content
fatal: destination path 'DeepPitchExtractor' already exists and is not an empty directory.
/content/DeepPitchExtractor
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyworld
  Downloading pyworld-0.3.2.tar.gz (214 kB)
[K     |████████████████████████████████| 214 kB 19.0 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Building wheels for collected packages: pyworld
  Building wheel for pyworld (PEP 517) ... [?25l[?25hdone
  Created wheel for pyworld: filename=pyworld-0.3.2-cp38-cp38-linux_x86_64.whl size=673637 sha256=60cea7f8b9f4b1a18b6329b9035e0b35bb2542dba1f8901039abef657dcdfa41
  Stored in directory: /root/.cache/pip/wheels/b7/b1/d2/8c78d691f7d5b0bb4ba9993926db209429c92686476837627f
Successfully built pyworld
Installing collected packages: pyworld
Successfully installed pyworld-0.3.2


In [40]:
%%writefile /content/DeepPitchExtractor/preprocess.py
from random import Random

import pyworld as pw
import torch
import librosa
import numpy as np
from typing import Dict, Any, Tuple, Union

import tqdm
import argparse
from multiprocessing import Pool, cpu_count
from pathlib import Path

from dpe.audio import AudioProcessor
from dpe.utils import read_config, pickle_binary


def valid_n_workers(num: int) -> int:
    n = int(num)
    if n < 1:
        raise argparse.ArgumentTypeError('%r must be an integer greater than 0' % num)
    return n


class Preprocessor:

    def __init__(self,
                 data_dir: Path,
                 audio_processor: AudioProcessor) -> None:
        self._data_dir = data_dir
        self._spec_dir = self._data_dir / 'specs'
        self._pitch_dir = self._data_dir / 'pitches'
        self._spec_dir.mkdir(parents=True, exist_ok=True)
        self._pitch_dir.mkdir(parents=True, exist_ok=True)
        self._audio = audio_processor
        pass

    def __call__(self, path: Path) -> Union[Tuple[str, int], None]:
        try:
            item_id = path.stem
            wav, _ = librosa.load(str(path))
            spec = librosa.stft(
                y=wav,
                n_fft=self._audio.n_fft,
                hop_length=self._audio.hop_length,
                win_length=self._audio.win_length)
            spec = np.abs(spec)
            spec = torch.tensor(spec).float()
            pitch, _ = pw.dio(wav.astype(np.float64), self._audio.sample_rate,
                              frame_period=self._audio.hop_length / self._audio.sample_rate * 1000)
            pitch = torch.tensor(pitch).float()
            torch.save(spec, self._spec_dir / f'{item_id}.pt')
            torch.save(pitch, self._pitch_dir / f'{item_id}.pt')
            spec_len = spec.shape[-1]
            return item_id, spec_len
        except BaseException as e:
            print(e)
            return None


parser = argparse.ArgumentParser(description='Preprocessing for WaveRNN and Tacotron')
parser.add_argument('--path', '-p', help='directly point to dataset path')
parser.add_argument('--num_workers', '-w', metavar='N', type=valid_n_workers, default=cpu_count()-1, help='The number of worker threads to use for preprocessing')
parser.add_argument('--config', metavar='FILE', default='config.yaml', help='The config containing all hyperparams.')
args = parser.parse_args()


if __name__ == '__main__':
    config = read_config(args.config)
    print(config)
#    print(Path(config['paths/log_dir']))
    data_dir = Path(config[['data_dir']])
    wav_files = list(Path(args.path).glob('**/*.wav'))
    n_workers = max(1, args.num_workers)
    pool = Pool(processes=n_workers)
    audio = AudioProcessor(**config['audio'])
    preprocessor = Preprocessor(data_dir=data_dir, audio_processor=audio)
    dataset = []
    for data_point in tqdm.tqdm(pool.imap_unordered(preprocessor, wav_files), total=len(wav_files)):
        if data_point is not None:
            dataset.append(data_point)
    Random(42).shuffle(dataset)
    num_val = config['training']['n_val']
    val_dataset = dataset[:num_val]
    train_dataset = dataset[num_val:]
    pickle_binary(train_dataset, data_dir / 'train_dataset.pkl')
    pickle_binary(val_dataset, data_dir / 'val_dataset.pkl')

Overwriting /content/DeepPitchExtractor/preprocess.py


In [26]:
%%writefile /content/DeepPitchExtractor/config.yaml
paths:
  data_dir: 'data'
  checkpoint_dir: 'checkpoints'
  log_dir: 'pitch_log'

audio:
  sample_rate: 22050
  n_fft: 1024
  hop_length: 256
  win_length: 1024
  pitch_min: 50
  pitch_max: 500

training:
  n_val: 100
  batch_size: 32
  n_epochs: 1000

model:
  conv_channels: 256
  out_channels: 512
  dropout: 0.5

Overwriting /content/DeepPitchExtractor/config.yaml


In [41]:
# preprocessing:
%cd /content/DeepPitchExtractor
!mkdir audiodataset
wavs_path = "/content/drive/MyDrive/Fakeyou/oriol/wavs2.zip" #@param {type:"string"}
#!unzip -j "$wavs_path" -d /content/DeepPitchExtractor/audiodataset
!mv /content/DeepPitchExtractor/dpe/train.py /content/DeepPitchExtractor
!mv /content/DeepPitchExtractor/dpe/preprocess.py /content/DeepPitchExtractor
!mv /content/DeepPitchExtractor/dpe/predict.py /content/DeepPitchExtractor
!mv /content/DeepPitchExtractor/dpe/dataset.py /content/DeepPitchExtractor
!mv /content/DeepPitchExtractor/dpe/config.yaml /content/DeepPitchExtractor
#!mkdir data
!mkdir data/specs
!mkdir data/pitches
#!mkdir dpe/data
!python preprocess.py --path /content/DeepPitchExtractor/audiodataset
print("Listo.")

/content/DeepPitchExtractor
mkdir: cannot create directory ‘audiodataset’: File exists
mv: cannot stat '/content/DeepPitchExtractor/dpe/train.py': No such file or directory
mv: cannot stat '/content/DeepPitchExtractor/dpe/preprocess.py': No such file or directory
mv: cannot stat '/content/DeepPitchExtractor/dpe/predict.py': No such file or directory
mv: cannot stat '/content/DeepPitchExtractor/dpe/dataset.py': No such file or directory
mv: cannot stat '/content/DeepPitchExtractor/dpe/config.yaml': No such file or directory
mkdir: cannot create directory ‘data/specs’: File exists
mkdir: cannot create directory ‘data/pitches’: File exists
{'paths': {'data_dir': 'data', 'checkpoint_dir': 'checkpoints', 'log_dir': 'pitch_log'}, 'audio': {'sample_rate': 22050, 'n_fft': 1024, 'hop_length': 256, 'win_length': 1024, 'pitch_min': 50, 'pitch_max': 500}, 'training': {'n_val': 100, 'batch_size': 32, 'n_epochs': 1000}, 'model': {'conv_channels': 256, 'out_channels': 512, 'dropout': 0.5}}
Traceback 