# 日本語版VoxCelebの動画データを作成する

ここでは、[syncnet_python](https://github.com/joonson/syncnet_python)を使って[VoxCeleb](http://www.robots.ox.ac.uk/~vgg/data/voxceleb/)の日本語版のもととなる動画データを作成します。


### Driveの接続

In [0]:
from google.colab import drive
drive.mount('/content/drive')

### git clone とその他の操作

In [0]:
!git clone --recursive https://github.com/leichtrhino/youtuberer.git
%cd youtuberer/syncnet_python
!pip install -r requirements.txt
!pip install facenet-pytorch
!pip install pafy youtube_dl
!./download_model.sh
%cd ..

### 実行

In [0]:
!python 04-run-syncnet.py --max-video-length 1200 '/content/drive/My Drive/YouTuberer/videos-for-syncnet.csv' '/content/drive/My Drive/YouTuberer/syncnet_out'

### Confidenceスコア取得

In [0]:
!python 05-postprocess.py --verbose --ignore-noexist --without-embedding '/content/drive/My Drive/YouTuberer/syncnet_out'

In [0]:
import re
import os
import pickle

syncnet_out_dir = '/content/drive/My Drive/YouTuberer/syncnet_out'
pywork_dir = os.path.join(syncnet_out_dir, 'pywork')

def get_confidence_scores(vid):
  confidence_path = os.path.join(pywork_dir, vid, 'confidence.pckl')
  if not os.path.isfile(confidence_path):
    return []
  with open(confidence_path, 'rb') as fp:
    scores = [s['framewise-conf'] for s in pickle.load(fp)]
  return [(f'{vid}.{i:06}', str(s)) for i, score in enumerate(scores) for s in score]

vids = list(filter(lambda x: re.match(r'[\w\-]{8,12}', x), os.listdir(pywork_dir)))
header = 'scene,score'
body = '\n'.join(','.join(s) for s in sum(map(get_confidence_scores, vids), []))

with open('framewise-confidence-score.csv', 'w') as fp:
  print(header, file=fp)
  print(body, file=fp)
