Skip to content

Commit

Permalink
fix bugs in downloading voxpopuli corpus (#1165)
Browse files Browse the repository at this point in the history
* fix bugs in downloading voxpopuli corpus

* apply black
  • Loading branch information
DongjiGao committed Sep 26, 2023
1 parent 3875788 commit f7644ba
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
2 changes: 1 addition & 1 deletion lhotse/bin/modes/recipes/voxpopuli.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,6 @@ def voxpopuli(
type=click.Choice(["asr", "10k", "100k", "400k"] + LANGUAGES + LANGUAGES_V2),
default="asr",
)
def voxpopuli(target_dir: Pathlike):
def voxpopuli(target_dir: Pathlike, subset: str):
"""voxpopuli download."""
download_voxpopuli(target_dir)
6 changes: 4 additions & 2 deletions lhotse/recipes/voxpopuli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import gzip
import logging
import shutil
import tarfile
import tempfile
from ast import literal_eval
from collections import defaultdict
Expand Down Expand Up @@ -116,8 +117,9 @@ def download_voxpopuli(
logging.info(f"{len(url_list)} files to download...")
for url in tqdm(url_list):
tar_path = out_root / Path(url).name
download_url_to_file(url, out_root.as_posix(), Path(url).name)
safe_extract(tar_path, out_root)
download_url_to_file(url, tar_path)
with tarfile.open(tar_path, "r") as tar_file:
safe_extract(tar_file, out_root)
tar_path.unlink()

return target_dir
Expand Down

0 comments on commit f7644ba

Please sign in to comment.