Skip to content

Commit

Permalink
quich fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexandre Ovtchinnikov committed May 1, 2020
1 parent d1a2d6b commit acb5b61
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 1 deletion.
5 changes: 5 additions & 0 deletions src/mfs/cli.py
Expand Up @@ -39,6 +39,11 @@ def main(argv=sys.argv):
raise ValueError("{} is not a directory".format(dest))
url = args['URL']
follow = not args['--no-follow']

# browsers return https while the sites are still insecure, so to ease copy/paste
# this is a dirty hack and proper url handling is required .. but bit lazy now
url = url.replace('https://', 'http://')

print('Processing {}'.format(url))

scraper = None
Expand Down
3 changes: 2 additions & 1 deletion src/mfs/image_download.py
Expand Up @@ -11,7 +11,8 @@
def download_images(dl):
# avoid to many requests(coroutines) the same time.
# limit them by setting semaphores (simultaneous requests)
_sema = asyncio.Semaphore(10)
# setting to 5 as higher number will yield 503 from vfl.ru
_sema = asyncio.Semaphore(5)

async def wait_with_progressbar(coros):
for f in tqdm.tqdm(asyncio.as_completed(coros), total=len(coros)):
Expand Down
6 changes: 6 additions & 0 deletions tests/test_image_download.py
Expand Up @@ -64,6 +64,12 @@ def test_live_vfl(tmpdir, testdata):
shallow=False), 'Downloaded image does not match the reference one'


def test_live_vfl_2020(tmpdir, testdata):
fn = tmpdir.strpath + '/aaa.jpg'
base.download_images([('http://vfl.ru/fotos/9b930dc417050203.html', fn)])
assert len(os.listdir(tmpdir.strpath)) == 1, 'One images shall be downloaded'


def test_reference_radikal(testdata):
url = base._resolve_radikal(testdata.textdata('radikal.ru.html'))
assert url == 'http://s39.radikal.ru/i084/1106/a9/e1fca250702b.jpg', \
Expand Down

0 comments on commit acb5b61

Please sign in to comment.