# 概要

# 設定

## ライブラリのimport

In [1]:
import json
import os

import numpy as np
import pandas as pd

from pyannote.audio import Model, Inference
from IPython.display import Audio
from tqdm.notebook import tqdm

## ディレクトリの配置

In [2]:
import os
import shutil
os.makedirs("../input", exist_ok=True)
os.makedirs("../output", exist_ok=True)

## HuggingFaceよりembeddingモデルのロード

In [3]:
token = "your-huggingface-token"
model = Model.from_pretrained("pyannote/embedding", 
                              use_auth_token=token)
inference = Inference(model, window="whole", device="cuda")

## (optional) dataの解凍

In [4]:
# shutil.unpack_archive("your_data_path", extract_dir="../input")

## embeddingの抽出

In [5]:
search_dir = "../dataset/ITAcorpus_amitaro/"
target_extension = ".wav"
paths = []
for dirpath, dirnames, filenames in os.walk(search_dir):
    for file in filenames:
        if file.endswith(target_extension):
            path = dirpath + "/" + file
            paths.append(path)

embeddings = np.stack([inference(path) for path in tqdm(paths)], axis=0)
embeddings = (embeddings / np.maximum(1e-9, np.linalg.norm(embeddings, ord=2, axis=-1, keepdims=True))).mean(axis=0)
output_embedding = embeddings / np.maximum(1e-9, np.linalg.norm(embeddings, ord=2, axis=-1, keepdims=True))
np.save("../output/output_embedding.npy", output_embedding)

  0%|          | 0/424 [00:00<?, ?it/s]

## meta情報の記入

### 音声データセットの例

In [6]:
%%writefile ../output/output_meta.yaml
management:
  voice_id: 0
display:
  display_name: あみたろのITAコーパス読み上げ音声
  distribution_type: voice
  url: https://amitaro.net/voice/corpus-list/ita/
  voice_sample: https://www.youtube.com/embed/ksKu6JyLP5I
  thumbnail: https://amitaro.net/wp-content/uploads/cfc421fa043881e8a70485cbf9c04b39-150x150.png
voice_info:
  speaker: あみたろ
  actor: あみたろ
  corpus: ITA
  language: ja
  voice_gender: female
license:
  provider: あみたろの声素材工房
  register: nadare
  credit: あみたろの声素材工房
  license_type: original
  license_url: https://amitaro.net/voice/corpus-list/ita/
  terms_link: https://amitaro.net/voice/voice_rule/
  commercial_use: true
  price: 0
# model_info:
#  model_type:
#  base_model:
#  training_client:
#  input_voice: 
link:
  twitter: https://twitter.com/amitaro_utau
  ex_url_1: https://amitaro.net/
#  ex_url_2:
#  ex_url_3:


Overwriting ../output/output_meta.yaml


### RVCの例

In [None]:
%%writefile ../output/output_meta.yaml
management:
  voice_id: 1
display:
  display_name: 刻鳴時雨 RVC用学習済みモデル
  distribution_type: model
  url: https://huggingface.co/yasyune/Shigure_Tokina_RVC
#  voice_sample: 
  thumbnail: https://s2.booth.pm/4bbcead3-a3a3-40b7-8364-82e59ad26919/i/3640133/2b6374d2-6fea-441d-8fbb-57080de1ff06_base_resized.jpg
voice_info:
  speaker: 刻鳴時雨
  actor: 丸ころ
  corpus: ITA
  language: ja
  voice_gender: male
license:
  provider: 瓶詰め
  register: nadare
  credit: 瓶詰め
  license_type: original
  license_url: https://bindume-chan.booth.pm/items/3640133
  terms_link: https://bindume-chan.booth.pm/items/3640133
  commercial_use: true
  price: 0
model_info:
  model_type: RVC
  base_model: hubert_basr
#  training_client:
  input_voice: https://bindume-chan.booth.pm/items/3640133
# link:
#  twitter:
#  ex_url_1:
#  ex_url_2:
#  ex_url_3:


# (optional) datasetの更新

In [7]:
!git clone https://github.com/nadare881/voice-changer-vector-search.git
os.chdir("./voice-changer-vector-search")
!git pull origin develop
!git checkout develop
!git reset --hard origin/develop
!git checkout register
!git reset --hard origin/register
os.chdir("../")

fatal: destination path 'voice-changer-vector-search' already exists and is not an empty directory.


Already up to date.


From https://github.com/nadare881/voice-changer-vector-search
 * branch            develop    -> FETCH_HEAD


Your branch is ahead of 'origin/develop' by 1 commit.
  (use "git push" to publish your local commits)


Already on 'develop'


HEAD is now at abba4c7 develop縺ｮ霑ｽ蜉


Switched to branch 'register'


HEAD is now at 7d28dbb register request


In [9]:
import yaml
meta_df = pd.read_csv("./voice-changer-vector-search/data/meta.csv")
with open("../output/output_meta.yaml", encoding="utf-8") as f:
    meta = yaml.safe_load(f)
meta_dict = {}
for m in meta.values():
    meta_dict.update(m)
meta_df = pd.concat([meta_df, pd.DataFrame([meta_dict])], axis=0).reset_index(drop=True)

embeddings = np.concatenate([np.load("./voice-changer-vector-search/data/embeddings.npy"), np.expand_dims(output_embedding, axis=0)], axis=0)
meta_df.tail(5)

Unnamed: 0,voice_id,display_name,distribution_type,url,voice_sample,thumbnail,speaker,actor,corpus,language,...,provider,register,credit,license_type,license_url,terms_link,commercial_use,price,twitter,ex_url_1
0,0,あみたろのITAコーパス読み上げ音声,voice,https://amitaro.net/voice/corpus-list/ita/,https://www.youtube.com/embed/ksKu6JyLP5I,https://amitaro.net/wp-content/uploads/cfc421f...,あみたろ,あみたろ,ITA,ja,...,あみたろの声素材工房,nadare,あみたろの声素材工房,original,https://amitaro.net/voice/corpus-list/ita/,https://amitaro.net/voice/voice_rule/,True,0,https://twitter.com/amitaro_utau,https://amitaro.net/


In [10]:
display_columns = list(meta["display"].keys())
meta_df.to_csv("./voice-changer-vector-search/data/meta.csv", index=None, encoding="utf-8")
meta_df[list(meta["display"].keys())].to_csv("./voice-changer-vector-search/data/meta.tsv", index=None, encoding="utf-8", sep="\t")
np.save("./voice-changer-vector-search/data/embeddings.npy", embeddings)
pd.DataFrame(embeddings).astype(np.float16).to_csv("./voice-changer-vector-search/data/embeddings.tsv", index=None, header=None, encoding="utf-8", sep="\t")

In [11]:
os.chdir("./voice-changer-vector-search")
!git add ./data/*
!git commit -m "register request"
!git push -u origin register
!git checkout main
!git branch -D register
os.chdir("../")

[register d9299da] register request
 4 files changed, 3 insertions(+), 2 deletions(-)
 rewrite data/meta.csv (100%)
 copy data/{meta.csv => meta.tsv} (100%)
branch 'register' set up to track 'origin/register'.


remote: Resolving deltas:   0% (0/4)        
remote: Resolving deltas:  25% (1/4)        
remote: Resolving deltas:  50% (2/4)        
remote: Resolving deltas:  75% (3/4)        
remote: Resolving deltas: 100% (4/4)        
remote: Resolving deltas: 100% (4/4), completed with 1 local object.        
To https://github.com/nadare881/voice-changer-vector-search
   abba4c7..d9299da  register -> register


Your branch is ahead of 'origin/main' by 1 commit.

Switched to branch 'main'



  (use "git push" to publish your local commits)
Deleted branch register (was d9299da).
