In [2]:
import os
import requests
import shutil


def create_path(path):
    if not os.path.exists(path):
        os.makedirs(path)
    return path

def create_book_file(base_path, bookname, patch):
    output_file = os.path.join(base_path, bookname + patch['ext'])
    if os.path.exists(output_file):
        return None
    return output_file


def _download_book(url, book_path):
    if not os.path.exists(book_path):
        with requests.get(url, stream=True) as req:
            path = create_path('./tmp')
            tmp_file = os.path.join(path, '_-_temp_file_-_.bak')
            with open(tmp_file, 'wb') as out_file:
                shutil.copyfileobj(req.raw, out_file)
                out_file.close()
            shutil.move(tmp_file, book_path)


def download_book(request, output_file, patch):
    new_url = request.url.replace('%2F','/').replace('/book/', patch['url']) + patch['ext']
    request = requests.get(new_url, stream=True)
    if request.status_code == 200:
        _download_book(new_url, output_file)


replacements = {'/':'-', '\\':'-', ':':'-', '*':'', '>':'', '<':'', '?':'', \
                '|':'', '"':''}

def compose_bookname(title, author, edition, isbn):
    bookname = title + ' - ' + author + ', ' + edition + ' - ' + isbn
    if(len(bookname) > 145):
        bookname = title + ' - ' + author.split(',')[0] + ' et al., ' + \
                    edition + ' - ' + isbn
    if(len(bookname) > 145):
        bookname = title + ' - ' + author.split(',')[0] + ' et al. - ' + isbn
    if(len(bookname) > 145):
        bookname = title + ' - ' + isbn
    if(len(bookname) > 145):
        bookname = title[:130] + ' - ' + isbn
    bookname = bookname.encode('ascii', 'ignore').decode('ascii')
    return "".join([replacements.get(c, c) for c in bookname])
import os
import requests
import time
import argparse
import pandas as pd
from tqdm import tqdm
from helper import *


parser = argparse.ArgumentParser()
parser.add_argument('-f', '--folder', help='folder to store downloads')
parser.add_argument('--pdf', action='store_true', help='download PDF books')
parser.add_argument('--epub', action='store_true', help='download EPUB books')
args = parser.parse_args()

patches = []
if not args.pdf and not args.epub:
    args.pdf = args.epub = True
if args.pdf:
    patches.append({'url':'/content/pdf/', 'ext':'.pdf'})
if args.epub:
    patches.append({'url':'/download/epub/', 'ext':'.epub'})

folder = args.folder
folder = create_path(folder) if folder else create_path('./downloads')

table_url = 'https://resource-cms.springernature.com/springer-cms/rest/v1/content/17858272/data/v4'
table = 'table_' + table_url.split('/')[-1] + '.xlsx'
table_path = os.path.join(folder, table)
if not os.path.exists(./books):
    books = pd.read_excel(table_url)
    # Save table
    books.to_excel(./books)
else:
    books = pd.read_excel(./books, index_col=0, header=0)


books = books[
    [
      'OpenURL',
      'Book Title',
      'Author',
      'Edition',
      'Electronic ISBN',
      'English Package Name'
    ]
]

for url, title, author, edition, isbn, category in tqdm(books.values):
    dest_folder = create_path(os.path.join(folder, category))
    bookname = compose_bookname(title, author, edition, isbn)
    request = None
    for patch in patches:
        try:
            output_file = create_book_file(dest_folder, bookname, patch)
            if output_file is not None:
                request = requests.get(url) if request is None else request
                download_book(request, output_file, patch)
        except (OSError, IOError) as e:
            print(e)
            title = title.encode('ascii', 'ignore').decode('ascii')
            print('* Problem downloading: {}, so skipping it.'.format(title))
            time.sleep(30)
            request = None                    # Enforce new get request
            # then continue to download the next book

print('\nFinish downloading.')

NotADirectoryError: ignored