In [1]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

# Function to extract the manga name from the URL
def extract_manga_name(url):
    parsed_url = urlparse(url)
    path_parts = parsed_url.path.strip("/").split("-chapter")[0]  # Remove chapter info
    return path_parts.replace("/", "_")  # Ensure valid folder name

# Function to sanitize filenames
def sanitize_filename(filename):
    return "".join(c for c in filename if c.isalnum() or c in (" ", ".", "_")).rstrip()

# Function to download images in order from the "readerarea" div
def download_manga_images(save_dir, url):
    manga_name = extract_manga_name(url)
    save_dir = os.path.join(save_dir, manga_name)  # Folder for the manga
    os.makedirs(save_dir, exist_ok=True)

    response = requests.get(url)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, "html.parser")

    # Find the div with id="readerarea"
    readerarea = soup.find("div", id="readerarea")
    if not readerarea:
        print("No 'readerarea' div found on the page.")
        return

    img_tags = readerarea.find_all("img")  # Find only images inside "readerarea"

    for index, img in enumerate(img_tags, start=1):  # Keep images in order
        img_url = img.get("src")
        if not img_url:
            continue

        # Only process PNG and JPG images
        if not (img_url.lower().endswith(".png") or img_url.lower().endswith(".jpg") or img_url.lower().endswith(".jpeg") or img_url.lower().endswith(".webp")):
            continue

        img_url = urljoin(url, img_url)
        parsed_img_url = urlparse(img_url)
        img_extension = os.path.splitext(parsed_img_url.path)[1]  # Get file extension

        # Ensure ordered naming (e.g., 001.png, 002.jpg)
        img_name = f"{index:03d}{img_extension}"  # Format as 001, 002, etc.

        img_path = os.path.join(save_dir, img_name)

        try:
            img_response = requests.get(img_url)
            img_response.raise_for_status()
            with open(img_path, "wb") as img_file:
                img_file.write(img_response.content)
            print(f"Downloaded: {img_path}")
        except Exception as e:
            print(f"Failed to download {img_url}: {e}")


In [2]:
# Example usage
save_dir = "Z:/github/Autotranslate_Manga/data/manga"
list_manga_urls = [
    "https://rawkuma.com/ore-no-haitoku-meshi-wo-onedari-sezu-ni-irarenai-otonari-no-top-idol-sama-chapter-1/",
    "https://rawkuma.com/kuchi-ni-dashitemo-yoroshii-desuka-chapter-1/",
    "https://rawkuma.com/madougu-no-shuuriya-hajimemashita-chapter-1/",
    "https://rawkuma.com/oji-kun-to-mei-chan-chapter-1-2/",
    "https://rawkuma.com/hensachi-30-gal-to-gariben-inkya-na-ore-gakunen-top-no-ore-ga-gal-wo-yuutousei-ni-kaete-mita-chapter-1/",
    "https://rawkuma.com/chiisai-boku-no-haru-chapter-1/",
    "https://rawkuma.com/manga-teki-tenkai-de-kare-wo-oto-shitai-chapter-1/",
    "https://rawkuma.com/gakuen-1-no-bishoujo-wa-xx-eshi-no-ore-ni-horete-iru-chapter-1/",
    "https://rawkuma.com/isekai-ryouridou-chapter-1/",
    "https://rawkuma.com/dororo-and-hyakkimaru-chapter-1/"
]

for manga_url in list_manga_urls:
    download_manga_images(save_dir, manga_url)

Downloaded: Z:/github/Autotranslate_Manga/data/manga\ore-no-haitoku-meshi-wo-onedari-sezu-ni-irarenai-otonari-no-top-idol-sama\001.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manga\ore-no-haitoku-meshi-wo-onedari-sezu-ni-irarenai-otonari-no-top-idol-sama\002.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manga\ore-no-haitoku-meshi-wo-onedari-sezu-ni-irarenai-otonari-no-top-idol-sama\003.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manga\ore-no-haitoku-meshi-wo-onedari-sezu-ni-irarenai-otonari-no-top-idol-sama\004.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manga\ore-no-haitoku-meshi-wo-onedari-sezu-ni-irarenai-otonari-no-top-idol-sama\005.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manga\ore-no-haitoku-meshi-wo-onedari-sezu-ni-irarenai-otonari-no-top-idol-sama\006.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manga\ore-no-haitoku-meshi-wo-onedari-sezu-ni-irarenai-otonari-no-top-idol-sama\007.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manga\or

In [3]:
save_dir = "Z:/github/Autotranslate_Manga/data/manhwa"

list_manhwa_urls = [
    "https://rawkuma.com/toshokan-no-daimajutsushi-chapter-48-3/",
    "https://rawkuma.com/the-strongest-florist-chapter-73/",
    "https://rawkuma.com/the-little-princess-and-her-monster-prince-chapter-9-1/",
    "https://rawkuma.com/monster-duke-daughter-chapter-156/",
    "https://rawkuma.com/villain-to-kill-chapter-155/",
    "https://rawkuma.com/becoming-the-sacheon-dangs-swordsmaster-rank-young-lord-chapter-25/",
    "https://rawkuma.com/my-high-school-bully-chapter-219/",
    "https://rawkuma.com/beloved-by-the-male-leads-nephew-chapter-75/",
    "https://rawkuma.com/the-apothecary-prince-chapter-35/",
    "https://rawkuma.com/my-very-own-tower-strategy-guide-chapter-21/"
]

for manhwa_url in list_manhwa_urls:
    download_manga_images(save_dir, manhwa_url)

Downloaded: Z:/github/Autotranslate_Manga/data/manhwa\toshokan-no-daimajutsushi\001.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manhwa\toshokan-no-daimajutsushi\002.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manhwa\toshokan-no-daimajutsushi\003.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manhwa\toshokan-no-daimajutsushi\004.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manhwa\toshokan-no-daimajutsushi\005.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manhwa\toshokan-no-daimajutsushi\006.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manhwa\toshokan-no-daimajutsushi\007.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manhwa\toshokan-no-daimajutsushi\008.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manhwa\toshokan-no-daimajutsushi\009.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manhwa\toshokan-no-daimajutsushi\010.jpg
Downloaded: Z:/github/Autotranslate_Manga/data/manhwa\toshokan-no-daimajutsushi\011.jpg
Downloaded: Z:/github/Autotransl