Skip to content

Commit

Permalink
Removed dependency from pafy
Browse files Browse the repository at this point in the history
  • Loading branch information
Alberto Pettarin committed Feb 22, 2017
1 parent 29016f1 commit aecb26a
Show file tree
Hide file tree
Showing 10 changed files with 151 additions and 125 deletions.
2 changes: 1 addition & 1 deletion aeneas/diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def check_tools(cls):
"""
try:
from aeneas.tools.convert_syncmap import ConvertSyncMapCLI
# disabling this check, as it requires the optional dependency pafy
# disabling this check, as it requires the optional dependency youtube-dl
# COMMENTED from aeneas.tools.download import DownloadCLI
from aeneas.tools.execute_job import ExecuteJobCLI
from aeneas.tools.execute_task import ExecuteTaskCLI
Expand Down
167 changes: 107 additions & 60 deletions aeneas/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
* :class:`~aeneas.downloader.Downloader`, which download files from various Web sources.
.. note:: This module requires Python modules ``youtube-dl`` and ``pafy`` (``pip install youtube-dl pafy``).
.. note:: This module requires Python module ``youtube-dl`` (``pip install youtube-dl``).
"""

from __future__ import absolute_import
Expand All @@ -37,6 +37,10 @@
import aeneas.globalfunctions as gf


class DownloadError(Exception):
pass


class Downloader(Loggable):
"""
Download files from various Web sources.
Expand All @@ -54,9 +58,8 @@ def audio_from_youtube(
source_url,
download=True,
output_file_path=None,
preferred_index=None,
download_format=None,
largest_audio=True,
preferred_format=None
):
"""
Download an audio stream from a YouTube video,
Expand All @@ -80,76 +83,120 @@ def audio_from_youtube(
:param string source_url: the URL of the YouTube video
:param bool download: if ``True``, download the audio stream
best matching ``preferred_index`` or ``preferred_format``
best matching ``preferred_index`` or ``preferred_format``
and ``largest_audio``;
if ``False``, return the list of available audio streams
:param string output_file_path: the path where the downloaded audio should be saved;
if ``None``, create a temporary file
:param int preferred_index: preferably download this audio stream
:param int download_format: download the audio stream with given format index
:param bool largest_audio: if ``True``, download the largest audio stream available;
if ``False``, download the smallest one.
:param string preferred_format: preferably download this audio format
:rtype: string or list of pafy audio streams
:raises: ImportError: if ``pafy`` is not installed
:rtype: string or list of dict
:raises: ImportError: if ``youtube-dl`` is not installed
:raises: OSError: if ``output_file_path`` cannot be written
:raises: ValueError: if ``source_url`` is not a valid YouTube URL
"""
def select_audiostream(audiostreams):
""" Select the audiostream best matching the given parameters. """
if preferred_index is not None:
if preferred_index in range(len(audiostreams)):
self.log([u"Selecting audiostream with index %d", preferred_index])
return audiostreams[preferred_index]
else:
self.log_warn([u"Audio stream index '%d' not allowed", preferred_index])
self.log_warn(u"Ignoring the requested audio stream index")
# selecting by preferred format
streams = audiostreams
if preferred_format is not None:
self.log([u"Selecting audiostreams by preferred format %s", preferred_format])
streams = [audiostream for audiostream in streams if audiostream.extension == preferred_format]
if len(streams) < 1:
self.log([u"No audiostream with preferred format %s", preferred_format])
streams = audiostreams
# sort by size
streams = sorted([(audio.get_filesize(), audio) for audio in streams])
if largest_audio:
self.log(u"Selecting largest audiostream")
selected = streams[-1][1]
else:
self.log(u"Selecting smallest audiostream")
selected = streams[0][1]

def _list_audiostreams(self, source_url):
"""
Return a list of dicts, each describing
an available audiostream for the given ``source_url``.
"""
self.log(u"Getting audiostreams...")
audiostreams = []
options = {
"download": False,
"quiet": True,
"skip_download": True,
}
try:
with youtube_dl.YoutubeDL(options) as ydl:
info = ydl.extract_info(source_url, download=False)
audio_formats = [f for f in info["formats"] if f["vcodec"] == "none" and f["acodec"] != "none"]
for a in audio_formats:
audiostreams.append({
"format": a["format"].split(" ")[0],
"filesize": a["filesize"],
"ext": a["ext"],
"abr": a["abr"]
})
except (IOError, OSError, youtube_dl.DownloadError) as exc:
self.log_exc(u"The specified source URL '%s' is not a valid YouTube URL or you are offline" % (source_url), None, True, DownloadError)
self.log(u"Getting audiostreams... done")
return audiostreams

def _select_audiostream(self, audiostreams, download_format=None, largest_audio=False):
"""
Select the best-matching audiostream:
if a ``download_format`` is given, use it,
otherwise act according to ``largest_audio``.
If ``download_format`` is not matching any
of the available audiostreams, then just act
according to ``largest_audio``.
"""
self.log(u"Selecting best-matching audiostream...")
selected = None
if download_format is not None:
matching = [a for a in audiostreams if a["format"] == download_format]
if len(matching) > 0:
selected = matching[0]
if selected is None:
sa = sorted(audiostreams, key=lambda x: x["filesize"])
selected = sa[-1] if largest_audio else sa[0]
self.log(u"Selecting best-matching audiostream... done")
return selected

try:
import pafy
except ImportError as exc:
self.log_exc(u"Python module pafy is not installed", exc, True, ImportError)
def _compose_output_file_path(self, extension, output_file_path=None):
"""
If ``output_file_path`` is given, use it.
Otherwise (``output_file_path`` is ``None``),
create a temporary file with the correct extension.
"""
self.log(u"Determining output file path...")
if output_file_path is None:
self.log(u"output_file_path is None: creating temp file")
handler, output_file_path = gf.tmp_file(
root=self.rconf[RuntimeConfiguration.TMP_PATH],
suffix=(".%s" % extension)
)
gf.delete_file(handler, output_file_path)
else:
self.log(u"output_file_path is not None: cheking that file can be written")
if not gf.file_can_be_written(output_file_path):
self.log_exc(u"Path '%s' cannot be written. Wrong permissions?" % (output_file_path), None, True, OSError)
self.log(u"Determining output file path... done")
self.log([u"Output file path is '%s'", output_file_path])
return output_file_path

def _download_audiostream(self, source_url, fmt, output_path):
self.log(u"Downloading audiostream...")
options = {
"download": True,
"format": fmt,
"outtmpl": output_path,
"quiet": True,
"skip_download": False,
}
try:
with youtube_dl.YoutubeDL(options) as ydl:
ydl.download([source_url])
except (IOError, OSError, youtube_dl.DownloadError) as exc:
self.log_exc(u"The specified source URL '%s' is not a valid YouTube URL or you are offline" % (source_url), None, True, DownloadError)
self.log(u"Downloading audiostream... done")

try:
video = pafy.new(source_url)
except (IOError, OSError, ValueError) as exc:
self.log_exc(u"The specified source URL '%s' is not a valid YouTube URL or you are offline" % (source_url), exc, True, ValueError)
import youtube_dl
except ImportError as exc:
self.log_exc(u"Python module youtube-dl is not installed", exc, True, ImportError)

audiostreams = _list_audiostreams(self, source_url)
if not download:
self.log(u"Returning the list of audio streams")
return video.audiostreams

output_path = output_file_path
if output_file_path is None:
self.log(u"output_path is None: creating temp file")
handler, output_path = gf.tmp_file(root=self.rconf[RuntimeConfiguration.TMP_PATH])
else:
if not gf.file_can_be_written(output_path):
self.log_exc(u"Path '%s' cannot be written. Wrong permissions?" % (output_path), None, True, OSError)

audiostream = select_audiostream(video.audiostreams)
if output_file_path is None:
gf.delete_file(handler, output_path)
output_path += "." + audiostream.extension

self.log([u"output_path is '%s'", output_path])
self.log(u"Downloading...")
audiostream.download(filepath=output_path, quiet=True)
self.log(u"Downloading... done")
self.log(u"Returning list of audiostreams")
return audiostreams
if len(audiostreams) == 0:
self.log_exc(u"No audiostreams available for the provided URL", None, True, OSError)

audiostream = _select_audiostream(self, audiostreams, download_format, largest_audio)
output_path = _compose_output_file_path(self, audiostream["ext"], output_file_path)
_download_audiostream(self, source_url, audiostream["format"], output_path)
return output_path
57 changes: 24 additions & 33 deletions aeneas/tests/net_test_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,93 +24,84 @@
import unittest

from aeneas.downloader import Downloader
from aeneas.downloader import DownloadError
import aeneas.globalfunctions as gf


class TestDownloader(unittest.TestCase):

URL_MALFORMED = "foo"
URL_INVALID = "aaaaaaaaaaa"
URL_VALID = "https://www.youtube.com/watch?v=rU4a7AA8wM0"
URL_MALFORMED = u"foo"
URL_INVALID = u"aaaaaaaaaaa"
URL_VALID = u"https://www.youtube.com/watch?v=rU4a7AA8wM0"
OUTPUT_PATH_INVALID = u"/foo/bar/baz"

def audio_from_youtube(
self,
source_url,
download=True,
output_file_path=None,
preferred_index=None,
largest_audio=True,
preferred_format=None
download_format=None,
largest_audio=True
):
return Downloader().audio_from_youtube(
source_url,
download=download,
output_file_path=output_file_path,
preferred_index=preferred_index,
largest_audio=largest_audio,
preferred_format=preferred_format
download_format=download_format,
largest_audio=largest_audio
)

def download(
self,
expected_size,
preferred_index=None,
download_format=None,
largest_audio=True,
preferred_format=None
):
path = self.audio_from_youtube(
self.URL_VALID,
download=True,
output_file_path=None,
preferred_index=preferred_index,
largest_audio=largest_audio,
preferred_format=preferred_format
download_format=download_format,
largest_audio=largest_audio
)
self.assertTrue(gf.file_can_be_read(path))
self.assertEqual(gf.file_size(path), expected_size)
gf.delete_file(None, path)

def test_malformed_url(self):
with self.assertRaises(ValueError):
with self.assertRaises(DownloadError):
self.audio_from_youtube(self.URL_MALFORMED, download=False)

def test_invalid_url(self):
with self.assertRaises(ValueError):
with self.assertRaises(DownloadError):
self.audio_from_youtube(self.URL_INVALID, download=False)

def test_invalid_output_file(self):
with self.assertRaises(OSError):
self.audio_from_youtube(self.URL_VALID, download=True, output_file_path="/foo/bar/baz")
self.audio_from_youtube(
self.URL_VALID,
download=True,
output_file_path=self.OUTPUT_PATH_INVALID
)

def test_download_list(self):
audiostreams = self.audio_from_youtube(self.URL_VALID, download=False)
self.assertEqual(len(audiostreams), 5)

def test_download_simple(self):
self.download(1147614)
self.download(1146884)

def test_download_smallest(self):
self.download(353237, largest_audio=False)

def test_download_format(self):
# NOTE on Python 2 pafy uses "ogg", while on Python 3 pafy uses "opus"
if gf.PY2:
fmt = "ogg"
else:
fmt = "opus"
self.download(1147614, preferred_format=fmt)
self.download(1146884, download_format=u"140")

def test_download_format_smallest(self):
self.download(1147614, preferred_format="m4a", largest_audio=False)

def test_download_index(self):
self.download(880809, preferred_index=4)

def test_download_index_out_of_range(self):
self.download(1147614, preferred_index=1000)
self.download(1146884, download_format=u"140", largest_audio=False)

def test_download_index_and_bad_format(self):
self.download(880809, preferred_index=4, preferred_format="m4a", largest_audio=True)
def test_download_bad_format(self):
self.download(1146884, download_format=u"-1")


if __name__ == "__main__":
Expand Down
8 changes: 4 additions & 4 deletions aeneas/tools/abstract_cli_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,11 +535,11 @@ def get_text_file(self, text_format, text, parameters):
self.print_error(u"Cannot read file '%s'" % (text))
return None

def print_no_pafy_error(self):
self.print_error(u"You need to install Python modules youtube-dl and pafy to download audio from YouTube. Run:")
self.print_error(u"$ pip install youtube-dl pafy")
def print_no_dependency_error(self):
self.print_error(u"You need to install Python module youtube-dl to download audio from YouTube. Run:")
self.print_error(u"$ pip install youtube-dl")
self.print_error(u"or, to install for all users:")
self.print_error(u"$ sudo pip install youtube-dl pafy")
self.print_error(u"$ sudo pip install youtube-dl")


def main():
Expand Down
Loading

0 comments on commit aecb26a

Please sign in to comment.