In [4]:
import os
import subprocess
from libgen.scraper import Scraper

# Function to convert files to PDF using Calibre
def convert_to_pdf(input_file, output_pdf):
    print(f"Converting {input_file} to {output_pdf}...")
    extension = input_file.split('.')[-1].lower()
    if extension in ['mobi', 'epub', 'djvu', 'docx']:
        command = f"/Applications/calibre.app/Contents/MacOS/ebook-convert '{input_file}' '{output_pdf}'"
        result = subprocess.run(command, shell=True, capture_output=True, text=True)
        if result.returncode == 0:
            print(f"Converted {input_file} to PDF as {output_pdf}")
        else:
            print(f"Error in conversion: {result.stderr.strip()}")
    else:
        print(f"No conversion required for {input_file}, already in PDF format.")
    return output_pdf

# Function to download and convert the book if necessary
def download_book(book, output_dir, downloaded_titles):
    os.makedirs(output_dir, exist_ok=True)

    output_file = os.path.join(output_dir, f"{book['name']}.{book['format'].lower()}")
    
    if book['name'] in downloaded_titles:
        print(f"Skipping already downloaded book: {book['name']}")
        return

    print(f"Downloading: {book['name']}")

    download_successful = scraper.download(book['link'], output_path=output_file)
    
    if download_successful:
        print(f"Downloaded: {output_file}")
        if book['format'].lower() != "pdf":
            output_pdf = os.path.join(output_dir, f"{book['name']}.pdf")
            convert_to_pdf(output_file, output_pdf)
        downloaded_titles.add(book['name'])
    else:
        print(f"Download failed for: {book['name']}")

# Function to filter and prioritize books
def filter_books(books):
    unique_books = {}
    
    for book in books:
        title = book['name']
        if title not in unique_books:
            unique_books[title] = book
        else:
            existing_book = unique_books[title]
            if (book['year'] > existing_book['year'] and book['format'].lower() == 'pdf') or \
               (book['format'].lower() == 'pdf' and existing_book['format'].lower() != 'pdf'):
                unique_books[title] = book

    return list(unique_books.values())

# Initialize the scraper
scraper = Scraper()

# Search for books based on user input
query = input("Enter the book title or query: ")
books = scraper.get_data(query)

# Filter the books
filtered_books = filter_books(books)

# Display the filtered books with an index for user selection
for index, book in enumerate(filtered_books):
    print(f"{index + 1}. {book['name']} ({book['year']}) - {book['format']}")

# Ask user to select the books they want to download
selection = input("Enter the numbers of the books to download (comma-separated, e.g., 1,3,5): ")
selected_indices = [int(x.strip()) - 1 for x in selection.split(',')]

# Validate the selected indices
selected_books = [filtered_books[i] for i in selected_indices if 0 <= i < len(filtered_books)]

# Download the selected books
downloaded_titles = set()
for book in selected_books:
    download_book(book, output_dir="downloads", downloaded_titles=downloaded_titles)

print("All selected books have been processed.")


1. Hands-On Machine Learning with ML.NET: Getting started with Microsoft ML.NET to implement popular machine learning algorithms in C# 1789801788, 9781789801781 (2020) - pdf
2. Hands-On Machine Learning with TensorFlow.js: A guide to building ML applications integrated with web technology using the TensorFlow.js library 9781838821739, 1838821732 (2019) - pdf
3. Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146 (2019) - epub
4. Hands-On ML Projects with OpenCV: Master Computer Vision and Machine Learning Using OpenCV and Python 9789388590877 (2023) - epub
5. GoLang for Machine Learning: A Hands-on-Guide to Building Efficient, Smart and Scalable ML Models with Go Programming (2024) - pdf
Downloading: Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146


Downloading: downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.epub:   0%|          | 18.3k/18.3M [10:19<176:06:03, 30.2B/s]


Downloaded: downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.epub
Converting downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.epub to downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.pdf...
Converted downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.epub to PDF as downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.pdf
All selected books have been processed.


Multiprocessing

In [2]:
import os
import subprocess
from libgen.scraper import Scraper
from concurrent.futures import ThreadPoolExecutor

# Function to convert files to PDF
def convert_to_pdf(input_file, output_pdf):
    print(f"Converting {input_file} to {output_pdf}...")
    extension = input_file.split('.')[-1].lower()
    if extension in ['mobi', 'epub', 'djvu', 'docx']:
        command = f"/Applications/calibre.app/Contents/MacOS/ebook-convert '{input_file}' '{output_pdf}'"
        result = subprocess.run(command, shell=True, capture_output=True, text=True)
        if result.returncode == 0:
            print(f"Converted {input_file} to PDF as {output_pdf}")
        else:
            print(f"Error in conversion: {result.stderr.strip()}")
    else:
        print(f"No conversion required for {input_file}, already in PDF format.")
    return output_pdf

import logging

# Set up logging
logging.basicConfig(filename='download_errors.log', level=logging.ERROR)

def download_book(book, output_dir, downloaded_titles):
    os.makedirs(output_dir, exist_ok=True)

    output_file = os.path.join(output_dir, f"{book['name']}.{book['format'].lower()}")
    
    if book['name'] in downloaded_titles:
        print(f"Skipping already downloaded book: {book['name']}")
        return

    print(f"Downloading: {book['name']}")

    try:
        download_successful = scraper.download(book['link'], output_path=output_file)
        if download_successful:
            print(f"Downloaded: {output_file}")
            if book['format'].lower() != "pdf":
                output_pdf = os.path.join(output_dir, f"{book['name']}.pdf")
                convert_to_pdf(output_file, output_pdf)
            downloaded_titles.add(book['name'])
        else:
            print(f"Download failed for: {book['name']}")
            logging.error(f"Download failed for {book['name']}. Link: {book['link']}")
    except Exception as e:
        print(f"Error occurred while downloading {book['name']}: {str(e)}")
        logging.error(f"Error occurred while downloading {book['name']}: {str(e)}")


# Function to filter and prioritize books
def filter_books(books):
    unique_books = {}
    
    for book in books:
        title = book['name']
        # Keep only the latest version or the PDF format
        if title not in unique_books:
            unique_books[title] = book
        else:
            existing_book = unique_books[title]
            # Prefer the latest version based on year or prefer PDF
            if (book['year'] > existing_book['year'] and book['format'].lower() == 'pdf') or \
               (book['format'].lower() == 'pdf' and existing_book['format'].lower() != 'pdf'):
                unique_books[title] = book

    return list(unique_books.values())

# Function to download books concurrently
def download_books_concurrently(books, output_dir):
    with ThreadPoolExecutor(max_workers=5) as executor:  # Adjust max_workers as needed
        futures = {executor.submit(download_book, book, output_dir, downloaded_titles): book for book in books}
        for future in futures:
            try:
                future.result()  # Wait for the download to complete
            except Exception as e:
                print(f"Error downloading {futures[future]['name']}: {e}")

# Initialize the scraper
scraper = Scraper()

# Search for books
query = "Hands-On ML"
books = scraper.get_data(query)

# Filter and prioritize the books
filtered_books = filter_books(books)

# Display filtered search results
for index, book in enumerate(filtered_books):
    print(f"Index: {index}")
    print(f"Title: {book['name']}")
    print(f"Author: {book['author']}")
    print(f"Publisher: {book['publisher']}")
    print(f"Year: {book['year']}")
    print(f"Language: {book['lang']}")
    print(f"Size: {book['size']}")
    print(f"Format: {book['format']}")
    print(f"Link: {book['link']}\n")

# Download and convert the selected books if necessary
downloaded_titles = set()
download_books_concurrently(filtered_books, output_dir="downloads")


Index: 0
Title: Hands-On Machine Learning with ML.NET: Getting started with Microsoft ML.NET to implement popular machine learning algorithms in C# 1789801788, 9781789801781
Author: Jarred Capellman
Publisher: Packt Publishing
Year: 2020
Language: English
Size: 18 Mb
Format: pdf
Link: http://library.lol/main/C5921E76476A028795B6A69DA756AF86

Index: 1
Title: Hands-On Machine Learning with TensorFlow.js: A guide to building ML applications integrated with web technology using the TensorFlow.js library 9781838821739, 1838821732
Author: Kai Sasaki
Publisher: Packt Publishing
Year: 2019
Language: English
Size: 26 Mb
Format: pdf
Link: http://library.lol/main/63D7D3E32E891D744F6EEE7E53D38784

Index: 2
Title: Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146
Author: Subhashini Tripuraneni; Charles Song
Publisher: Packt Publishing
Year: 2019
Language: English
Size: 18 Mb
Format: epub

Downloading: downloads/Hands-On ML Projects with OpenCV: Master Computer Vision and Machine Learning Using OpenCV and Python 9789388590877.epub:   0%|          | 720/5.71M [01:56<269:54:50, 6.16B/s]


Downloaded: downloads/Hands-On ML Projects with OpenCV: Master Computer Vision and Machine Learning Using OpenCV and Python 9789388590877.epub
Converting downloads/Hands-On ML Projects with OpenCV: Master Computer Vision and Machine Learning Using OpenCV and Python 9789388590877.epub to downloads/Hands-On ML Projects with OpenCV: Master Computer Vision and Machine Learning Using OpenCV and Python 9789388590877.pdf...




Error in conversion: qt.webenginecontext: 

GL Type: disabled
Surface Type: DefaultRenderableType
Surface Profile: CoreProfile
Surface Version: 4.1
QSG RHI Backend: OpenGL
Using Supported QSG Backend: yes
Using Software Dynamic GL: no
Using Multithreaded OpenGL: yes

Init Parameters:
  *  application-name calibre 
  *  browser-subprocess-path /Applications/calibre.app/Contents/Frameworks/QtWebEngineCore.framework/Helpers/QtWebEngineProcess.app/Contents/MacOS/QtWebEngineProcess 
  *  disable-features ConsolidatedMovementXY,InstalledApp,BackgroundFetch,WebOTP,WebPayments,WebUSB,PictureInPicture 
  *  disable-gpu  
  *  disable-speech-api  
  *  enable-features NetworkServiceInProcess,TracingServiceInProcess 
  *  enable-threaded-compositing  
  *  in-process-gpu  
  *  use-gl disabled 

Traceback (most recent call last):
  File "calibre/ebooks/conversion/plugins/epub_input.py", line 259, in convert
  File "calibre/utils/zipfile.py", line 774, in __init__
  File "calibre/utils/zipfile.py"

Downloading: downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.epub:   0%|          | 18.3k/18.3M [09:56<169:32:18, 31.4B/s]


Downloaded: downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.epub
Converting downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.epub to downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.pdf...
Converted downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.epub to PDF as downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.pdf


Multithreading or processing

In [5]:
import os
import subprocess
from libgen.scraper import Scraper
from concurrent.futures import ThreadPoolExecutor, as_completed

# Function to convert files to PDF using Calibre
def convert_to_pdf(input_file, output_pdf):
    print(f"Converting {input_file} to {output_pdf}...")
    extension = input_file.split('.')[-1].lower()
    if extension in ['mobi', 'epub', 'djvu', 'docx']:
        command = f"/Applications/calibre.app/Contents/MacOS/ebook-convert '{input_file}' '{output_pdf}'"
        result = subprocess.run(command, shell=True, capture_output=True, text=True)
        if result.returncode == 0:
            print(f"Converted {input_file} to PDF as {output_pdf}")
        else:
            print(f"Error in conversion: {result.stderr.strip()}")
    else:
        print(f"No conversion required for {input_file}, already in PDF format.")
    return output_pdf

# Function to download and convert the book if necessary
def download_book(book, output_dir, downloaded_titles):
    os.makedirs(output_dir, exist_ok=True)
    output_file = os.path.join(output_dir, f"{book['name']}.{book['format'].lower()}")
    
    if book['name'] in downloaded_titles:
        print(f"Skipping already downloaded book: {book['name']}")
        return

    print(f"Downloading: {book['name']}")

    download_successful = scraper.download(book['link'], output_path=output_file)
    
    if download_successful:
        print(f"Downloaded: {output_file}")
        if book['format'].lower() != "pdf":
            output_pdf = os.path.join(output_dir, f"{book['name']}.pdf")
            convert_to_pdf(output_file, output_pdf)
        downloaded_titles.add(book['name'])
    else:
        print(f"Download failed for: {book['name']}")

# Function to filter and prioritize books
def filter_books(books):
    unique_books = {}
    
    for book in books:
        title = book['name']
        if title not in unique_books:
            unique_books[title] = book
        else:
            existing_book = unique_books[title]
            if (book['year'] > existing_book['year'] and book['format'].lower() == 'pdf') or \
               (book['format'].lower() == 'pdf' and existing_book['format'].lower() != 'pdf'):
                unique_books[title] = book

    return list(unique_books.values())

# Function to download books using threading
def download_books_concurrently(selected_books, output_dir):
    downloaded_titles = set()

    # Using ThreadPoolExecutor to parallelize downloads
    with ThreadPoolExecutor(max_workers=5) as executor:  # You can adjust max_workers for more threads
        future_to_book = {executor.submit(download_book, book, output_dir, downloaded_titles): book for book in selected_books}
        
        # Monitor the progress of downloads
        for future in as_completed(future_to_book):
            book = future_to_book[future]
            try:
                future.result()
                print(f"Successfully downloaded: {book['name']}")
            except Exception as exc:
                print(f"{book['name']} generated an exception: {exc}")

# Initialize the scraper
scraper = Scraper()

# Search for books based on user input
query = input("Enter the book title or query: ")
books = scraper.get_data(query)

# Filter the books
filtered_books = filter_books(books)

# Display the filtered books with an index for user selection
for index, book in enumerate(filtered_books):
    print(f"{index + 1}. {book['name']} ({book['year']}) - {book['format']}")

# Ask user to select the books they want to download
selection = input("Enter the numbers of the books to download (comma-separated, e.g., 1,3,5): ")
selected_indices = [int(x.strip()) - 1 for x in selection.split(',')]

# Validate the selected indices
selected_books = [filtered_books[i] for i in selected_indices if 0 <= i < len(filtered_books)]

# Download the selected books concurrently
download_books_concurrently(selected_books, output_dir="downloads")

print("All selected books have been processed.")


1. Hands-On Machine Learning with ML.NET: Getting started with Microsoft ML.NET to implement popular machine learning algorithms in C# 1789801788, 9781789801781 (2020) - pdf
2. Hands-On Machine Learning with TensorFlow.js: A guide to building ML applications integrated with web technology using the TensorFlow.js library 9781838821739, 1838821732 (2019) - pdf
3. Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146 (2019) - epub
4. Hands-On ML Projects with OpenCV: Master Computer Vision and Machine Learning Using OpenCV and Python 9789388590877 (2023) - epub
5. GoLang for Machine Learning: A Hands-on-Guide to Building Efficient, Smart and Scalable ML Models with Go Programming (2024) - pdf
Downloading: Hands-On Machine Learning with ML.NET: Getting started with Microsoft ML.NET to implement popular machine learning algorithms in C# 1789801788, 9781789801781Downloading: Hands-On 

Downloading: downloads/Hands-On Machine Learning with ML.NET: Getting started with Microsoft ML.NET to implement popular machine learning algorithms in C# 1789801788, 9781789801781.pdf:   0%|          | 18.3k/18.3M [07:31<128:10:20, 41.6B/s]


Downloaded: downloads/Hands-On Machine Learning with ML.NET: Getting started with Microsoft ML.NET to implement popular machine learning algorithms in C# 1789801788, 9781789801781.pdf
Successfully downloaded: Hands-On Machine Learning with ML.NET: Getting started with Microsoft ML.NET to implement popular machine learning algorithms in C# 1789801788, 9781789801781


Downloading: downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.epub:   0%|          | 18.3k/18.3M [08:16<141:01:40, 37.7B/s]


Downloaded: downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.epub
Converting downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.epub to downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.pdf...
Converted downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.epub to PDF as downloads/Hands-On Artificial Intelligence on Amazon Web Services: Decrease the time to market for AI and ML applications with the power of AWS 9781789531473, 9781789534146.pdf
Successfully downloaded: Hands-On Artificial Intelligence on Am