Skip to content
This repository has been archived by the owner on Feb 11, 2020. It is now read-only.

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
the-robot committed Oct 21, 2018
1 parent 7c9a2c5 commit b170c38
Show file tree
Hide file tree
Showing 10 changed files with 293 additions and 99 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -3,3 +3,4 @@ bin
include
lib
pip-selfcheck.json
__pycache__
85 changes: 85 additions & 0 deletions app.py
@@ -0,0 +1,85 @@
import argparse
import time


from src.downloader import Downloader
from src.exceptions import RequestBlocked
from src.logger import Logger
from src.scraper import Scraper
from src import config, helper


# CONFIGS
LOGGER = Logger()
DOWNLOAD_PATH = config.DEFAULT_DOWNLOAD_PATH

class App:
__FILE_FORMAT = '.mp4'
__TIMEOUT = config.BLOCKED_TIMEOUT


def __init__(self, anime_url:str, download_path:str):
self.__scraper = Scraper(anime_url)
self.__downloader = Downloader(download_path)

def download(self, episode:str) -> bool:
while True:
try:
LOGGER.info(f'downloading episode {episode}')

# acquire list of downloadable video urls
videos = self.__scraper.get(episode)
break
except RequestBlocked:
LOGGER.error(f'request blocked by anime heaven for episode {episode}, going to try again in {self.__TIMEOUT} seconds')
time.sleep(self.__TIMEOUT)

if not videos:
LOGGER.error(f'url not found for episode {episode}')
return False

filename = self.__get_filename(episode)
# NOTE: use first download url only
todownload = videos[0]
self.__downloader.download(filename, todownload)

LOGGER.info(f'downloaded episode {episode}')
return True

def get_downloads(self) -> dict:
return self.__downloader.get_downloads()

def __get_filename(self, episode:str) -> str:
return f'Episode-{episode}{self.__FILE_FORMAT}'


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-a', '--anime', help='add anime url', type=str)
parser.add_argument('-e', '--episode', help='enter episode range (1-10) or just 1 to download single episode', type=str)
parser.add_argument('-d', '--download', help='download path, by defult download to current directory', type=str)

args = parser.parse_args()
# terminal can automatically add escape sequences, so remove it
anime = args.anime.replace('\\', '') if args.anime else args.anime

# check if given anime url is valid
if not helper.is_valid_anime(anime):
print('Invalid anime url')
print('I.e. http://animeheaven.eu/i.php?a=Bakuman.')
exit(1)

episodes = helper.get_episodes(args.episode)
# check if given episode url is valid
if not episodes:
print('Invalid episode(s)')
exit(1)

# check if download path exists
DOWNLOAD_PATH = args.download if args.download else DOWNLOAD_PATH

LOGGER.info(f'Download path: {DOWNLOAD_PATH}')

app = App(anime, DOWNLOAD_PATH)
for ep in episodes:
app.download(ep)
Binary file added src/.DS_Store
Binary file not shown.
99 changes: 0 additions & 99 deletions src/app.py

This file was deleted.

4 changes: 4 additions & 0 deletions src/config.py
@@ -0,0 +1,4 @@
ANIMEHEAVEN_ABUSE_MSG = 'You have triggered abuse protection'
BLOCKED_TIMEOUT = 120
DEFAULT_DOWNLOAD_PATH = './'
LOG_PATH = 'logs'
76 changes: 76 additions & 0 deletions src/downloader.py
@@ -0,0 +1,76 @@
import time
import os
import urllib.request


class Downloader:
def __init__(self, directory:str):
""" directory: where to store downloads """

self.__directory = self.__exists(directory)
self.__downloads = {} # to store downloaded path with episode number

# download progress timer
self.__start_time = None
self.__current_download = None


def download(self, name:str, url:str):
path = f'{self.get_path()}/{name}'
self.__current_download = name

# download
urllib.request.urlretrieve(url, path, self.__progress_hook)

if not os.path.exists(path):
raise IOError(f'File {name} missing in directory {self.get_path()}\
path: {path}')
self.__downloads[name] = path

def get_path(self) -> str:
return self.__directory

def get_downloads(self) -> dict:
return self.__downloads


def __exists(self, directory:str) -> str:
""" check if directory exists, else create one """

if not os.path.isdir(directory):
os.makedirs(directory)
return f'{os.path.dirname( os.path.abspath(directory) )}/{directory.split("/")[-1]}'

def __progress_hook(self, count:int, block_size:int, total_size:int):
""" download progress hook
https://blog.shichao.io/2012/10/04/progress_speed_indicator_for_urlretrieve_in_python.html """

# count = 0; download starts
if count == 0:
self.__start_time = time.time()
return

duration = time.time() - self.__start_time
downloaded = int(count * block_size)
speed = int(downloaded / (1024 * duration))
percent = int(count * block_size * 100 / total_size)

# convert file size to mb for print
downloaded_mb = f'{downloaded / (1024 * 1024):.2f}'
total_size_mb = f'{total_size / (1024 * 1024):.2f}'

# convert speed to mb or gb
if len(str(speed)) < 4:
speed_str = f'{speed:10} KB/s'
elif len(str(speed)) >= 4 and len(str(speed)) < 7:
speed_mb = f'{(speed / 1024):.2f} MB/s'
speed_str = f'{speed_mb:10}'
else:
speed_gb = f'{(speed / 1024 / 1024):.2f} GB/s'
speed_str = f'{speed_gb:10}'

if percent < 100:
print(f'{self.__current_download.ljust(20)} Percent: {percent}% {downloaded_mb.rjust(12)}/{total_size_mb} MB {speed_str}', end='\r')
else:
# go to newline
print(f'{self.__current_download.ljust(20)} Percent: {percent}% {downloaded_mb.rjust(12)}/{total_size_mb} MB {speed_str}')
7 changes: 7 additions & 0 deletions src/exceptions.py
@@ -0,0 +1,7 @@
class DriverNotFound(Exception):
"""Raised when Selenium driver cannot be found in system"""
pass

class RequestBlocked(Exception):
"""Rasied when Animeheaven blocked the request for abuse"""
pass
20 changes: 20 additions & 0 deletions src/helper.py
@@ -0,0 +1,20 @@
from urllib.parse import urlparse

def is_valid_anime(anime:str) -> bool:
url = urlparse(anime)
if not (url.scheme and url.netloc and url.path and url.query):
return False
return True

def get_episodes(ep:str) -> list:
# check if episode numbers are valid
try:
episodes = [int(episode) for episode in ep.split('-')]
except ValueError:
return False
# check either range or not
if not episodes or len(episodes) > 2:
return False

# if in range, generage episode numbers from x-y inclusive`
return episodes if len(episodes) == 1 else [ep for ep in range(episodes[0], episodes[1] + 1)]
42 changes: 42 additions & 0 deletions src/logger.py
@@ -0,0 +1,42 @@
from datetime import datetime
import os

from . import config


class Logger:
__LOG_PATH = config.LOG_PATH


def __init__(self, verbose=True):
self.__verbose = verbose
self.__logs = self.__exists( self.__LOG_PATH )
self.__today = datetime.today()

def info(self, msg:str):
self.__log(msg, 'Info')

def error(self, msg:str):
self.__log(msg, 'Error')


def __log(self, msg:str, msg_type:str):
log = self.__get_logfile()
now = self.__today.strftime('%d-%m-%Y %H:%M:%S %p')

with open(log, 'a') as logger:
msg = f'[{now} : {msg_type:5}] {msg}'
# show in terminal if verbose
if self.__verbose:
print(msg)
logger.writelines(msg + '\n')

def __get_logfile(self) -> str:
# group logs by current day
log = self.__today.strftime('%d-%m-%Y.log')
return f'{self.__logs}/{log}'

def __exists(self, directory:str) -> str:
if not os.path.isdir(directory):
os.makedirs(directory)
return f'{os.path.dirname( os.path.abspath(directory) )}/{directory.split("/")[-1]}'

0 comments on commit b170c38

Please sign in to comment.