In [None]:
import re
import os
import zipfile
import httpx
import pandas as pd
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import pillow_avif
import tempfile
from tqdm.notebook import tqdm


START_URL = "https://thebeginningaftertheendmanga.com"
START_CHAPTER = 185


# Setup download directory
dir_output = "downloads"
os.makedirs(dir_output, exist_ok=True)
# Setup headers
header_ua = '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"'
header_ua_mobile = "?0"
header_ua_platform = '"Linux"'
header_user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
headers = {
    "sec-ch-ua": header_ua,
    "sec-ch-ua-mobile": header_ua_mobile,
    "sec-ch-ua-platform": header_ua_platform,
    "user-agent": header_user_agent,
}

def httpx_get_soup(url, headers) -> BeautifulSoup:
    """
    Fetches the content of a URL and returns a BeautifulSoup object.
    """
    with httpx.Client() as client:
        response = client.get(url, headers=headers)
        if response.status_code == 200:
            return BeautifulSoup(response.text, 'html.parser')
        else:
            print(f"Failed to retrieve the page: {url}\nStatus code: {response.status_code}")
            exit(1)
            return None

def get_episodes(soup) -> dict:
    """
    Extracts episode URLs from the soup object based on the provided regex.
    Returns a dictionary with episode numbers as keys and URLs as values.
    """
    chapter_links = [
        a['href'] for a in soup.find_all('a', href=re.compile(r'https://.+manga\.com/(manga|uncategorized)/.+-ch.*-\d+.*'))
    ]
    df_chapters = pd.DataFrame({'chapter_url': chapter_links})
    df_chapters = df_chapters.drop_duplicates(subset='chapter_url').reset_index(drop=True)
    df_chapters['chapter_no'] = df_chapters['chapter_url'].str.extract(r'-ch[a-zA-Z]*-(\d+.*)?/')
    df_chapters['chapter_no'] = df_chapters['chapter_no'].str.replace('-', '.', regex=False).astype(float)
    df_chapters = df_chapters.sort_values(by='chapter_no').reset_index(drop=True)
    out = dict(zip(df_chapters['chapter_no'], df_chapters['chapter_url']))
    return out

def download_cbz(url, episode_no, path_cbz, img_format="avif"):
    """
    Downloads a CBZ file from the given URL and saves it to the specified path.
    Default format is AVIF. Options are AVIF or JPEG.
    """
    # Step 1: Fetch the page content as soup
    soup = httpx_get_soup(url, headers)
    # --> DEBUG
    # with open("soup.txt", "w", encoding="utf-8") as f:
    #     f.write(str(soup))
    # --> DEBUG
    # Step 2: Find all image links of content images
    image_urls = [
        img['data-src'] if 'data-src' in img.attrs else img['src']
        for div in soup.find_all('div', class_='kt-inside-inner-col')
        for img in div.find_all('img')
    ]
    # # Setp 3: Download images
    headers['referer'] = url
    n = 0
    with tempfile.TemporaryDirectory() as temp_dir:
        with httpx.Client() as client:
            for url in tqdm(image_urls, desc=f"Downloading Episode: {episode_no}", unit="image", leave=False):
                image_url = url
                response = client.get(image_url, headers=headers)
                if response.status_code == 200:
                    n += 1
                    image = Image.open(BytesIO(response.content))
                    file_name = f"{str(n).zfill(3)}.{img_format}"
                    if img_format == "avif":
                        image.save(os.path.join(temp_dir, file_name), format="AVIF")
                    else:
                        image.save(os.path.join(temp_dir, file_name), format="JPEG")
                else:
                    print(f"Failed to download image of episode {episode_no}: {response.status_code}")
                    exit(1)        
        # Step 4: Create new CBZ with downloaded images
        with zipfile.ZipFile(path_cbz, 'w', compression=zipfile.ZIP_DEFLATED) as zipf:
            for filename in sorted(os.listdir(temp_dir)):
                file_path = os.path.join(temp_dir, filename)
                zipf.write(file_path, arcname=filename)


# Step 1: Fetch the series page content as soup and extract title first episode url
soup = httpx_get_soup(START_URL, headers)
# --> DEBUG
# with open("soup.txt", "w", encoding="utf-8") as f:
#     f.write(str(soup))
# --> DEBUG
site_title_elem = soup.find(class_="site-title")
series_title = site_title_elem.text.strip() if site_title_elem else None
if not series_title:
    print("Failed to find the series title.")
    exit(1)

# Step 2: Get the list of episode URLs from the first episode
episodes = get_episodes(soup)
episodes = dict(sorted(episodes.items(), key=lambda item: item[0]))

# Step 3: Download each episode as a CBZ file
dir_output = os.path.join(dir_output, series_title)
os.makedirs(dir_output, exist_ok=True)
for key in tqdm(episodes.keys(), desc="Processing Episodes", unit="url"):
    episode_no = key
    url = episodes[key]
    if isinstance(episode_no, float) and episode_no.is_integer():
        episode_no = int(episode_no)
    if episode_no < START_CHAPTER:
        continue
    path_cbz = os.path.join(dir_output, f"{series_title}, Episode {episode_no}.cbz")
    if os.path.exists(path_cbz):
        continue
    download_cbz(url, episode_no, path_cbz, img_format="avif")


AttributeError: Can only use .str accessor with string values!

In [None]:
import re
import os
import zipfile
import httpx
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import pillow_avif
import tempfile
from tqdm.notebook import tqdm

url_series = "https://www.webtoons.com/en/action/omniscient-reader/list?title_no=2154"
dir_output = "downloads"
os.makedirs(dir_output, exist_ok=True)

# Setup headers
header_ua = '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"'
header_ua_mobile = "?0"
header_ua_platform = '"Linux"'
header_user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
headers = {
    "sec-ch-ua": header_ua,
    "sec-ch-ua-mobile": header_ua_mobile,
    "sec-ch-ua-platform": header_ua_platform,
    "user-agent": header_user_agent,
}

def httpx_get_soup(url, headers) -> BeautifulSoup:
    """
    Fetches the content of a URL and returns a BeautifulSoup object.
    """
    with httpx.Client() as client:
        response = client.get(url, headers=headers)
        if response.status_code == 200:
            return BeautifulSoup(response.text, 'html.parser')
        else:
            print(f"Failed to retrieve the page: {url}\nStatus code: {response.status_code}")
            exit(1)
            return None

def get_episodes(soup, regex) -> dict:
    """
    Extracts episode links from the BeautifulSoup object.
    """
    links = [a['href'] for a in soup.find_all('a', href=re.compile(regex))]
    dat = {int(re.search(r'episode_no=(\d+)', link).group(1)): link for link in links}
    return dat

def download_cbz(url, path_cbz, img_format="avif"):
    """
    Downloads a CBZ file from the given URL and saves it to the specified path.
    Default format is AVIF. Options are AVIF or JPEG.
    """
    # Step 1: Fetch the page content as soup
    soup = httpx_get_soup(url, headers)
    episode_no = re.search(r'episode_no=(\d+)', url).group(1)
    # Step 2: Find all image links of content images
    regex = r'https://webtoon-phinf\.pstatic\.net.*\.(jpg|jpeg|png).*'
    links = soup.find_all('img', attrs={'data-url': re.compile(regex), 'class': '_images'})
    # Setp 3: Download images
    headers['referer'] = url
    n = 0
    with tempfile.TemporaryDirectory() as temp_dir:
        with httpx.Client() as client:
            for url in tqdm(links, desc=f"Downloading Episode: {episode_no}", unit="image", leave=False):
                image_url = url['data-url']
                response = client.get(image_url, headers=headers)
                if response.status_code == 200:
                    n += 1
                    image = Image.open(BytesIO(response.content))
                    file_name = f"{str(n).zfill(3)}.{img_format}"
                    if img_format == "avif":
                        image.save(os.path.join(temp_dir, file_name), format="AVIF")
                    else:
                        image.save(os.path.join(temp_dir, file_name), format="JPEG")
                else:
                    print(f"Failed to download image: {response.status_code}")
                    exit(1)        
            
        # Step 4: Create new CBZ with downloaded images
        with zipfile.ZipFile(path_cbz, 'w', compression=zipfile.ZIP_DEFLATED) as zipf:
            for filename in sorted(os.listdir(temp_dir)):
                file_path = os.path.join(temp_dir, filename)
                zipf.write(file_path, arcname=filename)

# Step 1: Fetch the series page content as soup and extract title first episode url
soup = httpx_get_soup(url_series, headers)
series_title = soup.find('title').text.strip()
episodes = get_episodes(soup, r'.+&episode_no=\d+')
first_episode_no = min(episodes.keys())

# Step 2: Get the list of episode URLs from the first episode
url = episodes[first_episode_no]
soup = httpx_get_soup(url, headers)
episodes = get_episodes(soup, r'https://www\.webtoons\.com.+viewer\?title_no=\d+')
episodes = dict(sorted(episodes.items(), key=lambda item: item[0]))

# Step 3: Download each episode as a CBZ file
dir_output = os.path.join(dir_output, series_title)
os.makedirs(dir_output, exist_ok=True)
for key in tqdm(episodes.keys(), desc="Processing Episodes", unit="url"):
    episode_no = key
    url = episodes[key]
    path_cbz = os.path.join(dir_output, f"{series_title}, Episode {episode_no}.cbz")
    if os.path.exists(path_cbz):
        continue
    download_cbz(url, path_cbz, img_format="avif")

Processing Episodes:   0%|          | 0/252 [00:00<?, ?url/s]

Downloading Episode: 39:   0%|          | 0/151 [00:00<?, ?image/s]

Downloading Episode: 40:   0%|          | 0/145 [00:00<?, ?image/s]

Downloading Episode: 41:   0%|          | 0/127 [00:00<?, ?image/s]

Downloading Episode: 42:   0%|          | 0/143 [00:00<?, ?image/s]

Downloading Episode: 43:   0%|          | 0/170 [00:00<?, ?image/s]

Downloading Episode: 44:   0%|          | 0/153 [00:00<?, ?image/s]

Downloading Episode: 45:   0%|          | 0/150 [00:00<?, ?image/s]

Downloading Episode: 46:   0%|          | 0/141 [00:00<?, ?image/s]

Downloading Episode: 47:   0%|          | 0/131 [00:00<?, ?image/s]

Downloading Episode: 48:   0%|          | 0/156 [00:00<?, ?image/s]

Downloading Episode: 49:   0%|          | 0/146 [00:00<?, ?image/s]

Downloading Episode: 50:   0%|          | 0/134 [00:00<?, ?image/s]

Downloading Episode: 51:   0%|          | 0/148 [00:00<?, ?image/s]

Downloading Episode: 52:   0%|          | 0/143 [00:00<?, ?image/s]

Downloading Episode: 53:   0%|          | 0/144 [00:00<?, ?image/s]

Downloading Episode: 54:   0%|          | 0/162 [00:00<?, ?image/s]

Downloading Episode: 55:   0%|          | 0/163 [00:00<?, ?image/s]

Downloading Episode: 56:   0%|          | 0/153 [00:00<?, ?image/s]

Downloading Episode: 57:   0%|          | 0/141 [00:00<?, ?image/s]

Downloading Episode: 58:   0%|          | 0/129 [00:00<?, ?image/s]

Downloading Episode: 59:   0%|          | 0/153 [00:00<?, ?image/s]

Downloading Episode: 60:   0%|          | 0/141 [00:00<?, ?image/s]

Downloading Episode: 61:   0%|          | 0/151 [00:00<?, ?image/s]

Downloading Episode: 62:   0%|          | 0/146 [00:00<?, ?image/s]

Downloading Episode: 63:   0%|          | 0/172 [00:00<?, ?image/s]

Downloading Episode: 64:   0%|          | 0/162 [00:00<?, ?image/s]

Downloading Episode: 65:   0%|          | 0/173 [00:00<?, ?image/s]

Downloading Episode: 66:   0%|          | 0/165 [00:00<?, ?image/s]

Downloading Episode: 67:   0%|          | 0/121 [00:00<?, ?image/s]

Downloading Episode: 68:   0%|          | 0/180 [00:00<?, ?image/s]

Downloading Episode: 69:   0%|          | 0/131 [00:00<?, ?image/s]

Downloading Episode: 70:   0%|          | 0/174 [00:00<?, ?image/s]

Downloading Episode: 71:   0%|          | 0/89 [00:00<?, ?image/s]

Downloading Episode: 72:   0%|          | 0/130 [00:00<?, ?image/s]

Downloading Episode: 73:   0%|          | 0/161 [00:00<?, ?image/s]

Downloading Episode: 74:   0%|          | 0/135 [00:00<?, ?image/s]

Downloading Episode: 75:   0%|          | 0/147 [00:00<?, ?image/s]

Downloading Episode: 76:   0%|          | 0/158 [00:00<?, ?image/s]

Downloading Episode: 77:   0%|          | 0/135 [00:00<?, ?image/s]

Downloading Episode: 78:   0%|          | 0/144 [00:00<?, ?image/s]

Downloading Episode: 79:   0%|          | 0/166 [00:00<?, ?image/s]

Downloading Episode: 80:   0%|          | 0/153 [00:00<?, ?image/s]

Downloading Episode: 81:   0%|          | 0/165 [00:00<?, ?image/s]

Downloading Episode: 82:   0%|          | 0/152 [00:00<?, ?image/s]

Downloading Episode: 83:   0%|          | 0/157 [00:00<?, ?image/s]

Downloading Episode: 84:   0%|          | 0/149 [00:00<?, ?image/s]

Downloading Episode: 85:   0%|          | 0/157 [00:00<?, ?image/s]

Downloading Episode: 86:   0%|          | 0/157 [00:00<?, ?image/s]

Downloading Episode: 87:   0%|          | 0/151 [00:00<?, ?image/s]

Downloading Episode: 88:   0%|          | 0/172 [00:00<?, ?image/s]

Downloading Episode: 89:   0%|          | 0/143 [00:00<?, ?image/s]

Downloading Episode: 90:   0%|          | 0/121 [00:00<?, ?image/s]

Downloading Episode: 91:   0%|          | 0/95 [00:00<?, ?image/s]

Downloading Episode: 92:   0%|          | 0/148 [00:00<?, ?image/s]

Downloading Episode: 93:   0%|          | 0/168 [00:00<?, ?image/s]

Downloading Episode: 94:   0%|          | 0/122 [00:00<?, ?image/s]

Downloading Episode: 95:   0%|          | 0/145 [00:00<?, ?image/s]

Downloading Episode: 96:   0%|          | 0/130 [00:00<?, ?image/s]

Downloading Episode: 97:   0%|          | 0/137 [00:00<?, ?image/s]

Downloading Episode: 98:   0%|          | 0/125 [00:00<?, ?image/s]

Downloading Episode: 99:   0%|          | 0/112 [00:00<?, ?image/s]

Downloading Episode: 100:   0%|          | 0/127 [00:00<?, ?image/s]

Downloading Episode: 101:   0%|          | 0/131 [00:00<?, ?image/s]

Downloading Episode: 102:   0%|          | 0/191 [00:00<?, ?image/s]

Downloading Episode: 103:   0%|          | 0/100 [00:00<?, ?image/s]

Downloading Episode: 104:   0%|          | 0/108 [00:00<?, ?image/s]

Downloading Episode: 105:   0%|          | 0/119 [00:00<?, ?image/s]

Downloading Episode: 106:   0%|          | 0/101 [00:00<?, ?image/s]

Downloading Episode: 107:   0%|          | 0/90 [00:00<?, ?image/s]

Downloading Episode: 108:   0%|          | 0/88 [00:00<?, ?image/s]

Downloading Episode: 109:   0%|          | 0/98 [00:00<?, ?image/s]

Downloading Episode: 110:   0%|          | 0/87 [00:00<?, ?image/s]

Downloading Episode: 111:   0%|          | 0/87 [00:00<?, ?image/s]

Downloading Episode: 112:   0%|          | 0/110 [00:00<?, ?image/s]

Downloading Episode: 113:   0%|          | 0/102 [00:00<?, ?image/s]

Downloading Episode: 114:   0%|          | 0/97 [00:00<?, ?image/s]

Downloading Episode: 115:   0%|          | 0/85 [00:00<?, ?image/s]

Downloading Episode: 116:   0%|          | 0/124 [00:00<?, ?image/s]

Downloading Episode: 117:   0%|          | 0/137 [00:00<?, ?image/s]

Downloading Episode: 118:   0%|          | 0/90 [00:00<?, ?image/s]

Downloading Episode: 119:   0%|          | 0/115 [00:00<?, ?image/s]

Downloading Episode: 120:   0%|          | 0/92 [00:00<?, ?image/s]

Downloading Episode: 121:   0%|          | 0/99 [00:00<?, ?image/s]

Downloading Episode: 122:   0%|          | 0/106 [00:00<?, ?image/s]

Downloading Episode: 123:   0%|          | 0/89 [00:00<?, ?image/s]

Downloading Episode: 124:   0%|          | 0/111 [00:00<?, ?image/s]

Downloading Episode: 125:   0%|          | 0/116 [00:00<?, ?image/s]

Downloading Episode: 126:   0%|          | 0/110 [00:00<?, ?image/s]

Downloading Episode: 127:   0%|          | 0/115 [00:00<?, ?image/s]

Downloading Episode: 128:   0%|          | 0/128 [00:00<?, ?image/s]

Downloading Episode: 129:   0%|          | 0/118 [00:00<?, ?image/s]

Downloading Episode: 130:   0%|          | 0/153 [00:00<?, ?image/s]

Downloading Episode: 131:   0%|          | 0/121 [00:00<?, ?image/s]

Downloading Episode: 132:   0%|          | 0/137 [00:00<?, ?image/s]

Downloading Episode: 133:   0%|          | 0/121 [00:00<?, ?image/s]

Downloading Episode: 134:   0%|          | 0/116 [00:00<?, ?image/s]

Downloading Episode: 135:   0%|          | 0/147 [00:00<?, ?image/s]

Downloading Episode: 136:   0%|          | 0/141 [00:00<?, ?image/s]

Downloading Episode: 137:   0%|          | 0/119 [00:00<?, ?image/s]

Downloading Episode: 138:   0%|          | 0/95 [00:00<?, ?image/s]

Downloading Episode: 139:   0%|          | 0/121 [00:00<?, ?image/s]

Downloading Episode: 140:   0%|          | 0/101 [00:00<?, ?image/s]

Downloading Episode: 141:   0%|          | 0/118 [00:00<?, ?image/s]

Downloading Episode: 142:   0%|          | 0/116 [00:00<?, ?image/s]

Downloading Episode: 143:   0%|          | 0/120 [00:00<?, ?image/s]

Downloading Episode: 144:   0%|          | 0/95 [00:00<?, ?image/s]

Downloading Episode: 145:   0%|          | 0/100 [00:00<?, ?image/s]

Downloading Episode: 146:   0%|          | 0/123 [00:00<?, ?image/s]

Downloading Episode: 147:   0%|          | 0/137 [00:00<?, ?image/s]

Downloading Episode: 148:   0%|          | 0/81 [00:00<?, ?image/s]

Downloading Episode: 149:   0%|          | 0/97 [00:00<?, ?image/s]

Downloading Episode: 150:   0%|          | 0/103 [00:00<?, ?image/s]

Downloading Episode: 151:   0%|          | 0/94 [00:00<?, ?image/s]

Downloading Episode: 152:   0%|          | 0/92 [00:00<?, ?image/s]

Downloading Episode: 153:   0%|          | 0/97 [00:00<?, ?image/s]

Downloading Episode: 154:   0%|          | 0/104 [00:00<?, ?image/s]

Downloading Episode: 155:   0%|          | 0/112 [00:00<?, ?image/s]

Downloading Episode: 156:   0%|          | 0/105 [00:00<?, ?image/s]

Downloading Episode: 157:   0%|          | 0/116 [00:00<?, ?image/s]

Downloading Episode: 158:   0%|          | 0/112 [00:00<?, ?image/s]

Downloading Episode: 159:   0%|          | 0/85 [00:00<?, ?image/s]

Downloading Episode: 160:   0%|          | 0/95 [00:00<?, ?image/s]

Downloading Episode: 161:   0%|          | 0/88 [00:00<?, ?image/s]

Downloading Episode: 162:   0%|          | 0/114 [00:00<?, ?image/s]

Downloading Episode: 163:   0%|          | 0/132 [00:00<?, ?image/s]

Downloading Episode: 164:   0%|          | 0/105 [00:00<?, ?image/s]

Downloading Episode: 165:   0%|          | 0/111 [00:00<?, ?image/s]

Downloading Episode: 166:   0%|          | 0/111 [00:00<?, ?image/s]

Downloading Episode: 167:   0%|          | 0/101 [00:00<?, ?image/s]

Downloading Episode: 168:   0%|          | 0/90 [00:00<?, ?image/s]

Downloading Episode: 169:   0%|          | 0/104 [00:00<?, ?image/s]

Downloading Episode: 170:   0%|          | 0/113 [00:00<?, ?image/s]

Downloading Episode: 171:   0%|          | 0/91 [00:00<?, ?image/s]

Downloading Episode: 172:   0%|          | 0/109 [00:00<?, ?image/s]

Downloading Episode: 173:   0%|          | 0/119 [00:00<?, ?image/s]

Downloading Episode: 174:   0%|          | 0/124 [00:00<?, ?image/s]

Downloading Episode: 175:   0%|          | 0/117 [00:00<?, ?image/s]

Downloading Episode: 176:   0%|          | 0/120 [00:00<?, ?image/s]

Downloading Episode: 177:   0%|          | 0/124 [00:00<?, ?image/s]

Downloading Episode: 178:   0%|          | 0/140 [00:00<?, ?image/s]

Downloading Episode: 179:   0%|          | 0/125 [00:00<?, ?image/s]

Downloading Episode: 180:   0%|          | 0/145 [00:00<?, ?image/s]

Downloading Episode: 181:   0%|          | 0/113 [00:00<?, ?image/s]

Downloading Episode: 182:   0%|          | 0/99 [00:00<?, ?image/s]

Downloading Episode: 183:   0%|          | 0/130 [00:00<?, ?image/s]

Downloading Episode: 184:   0%|          | 0/103 [00:00<?, ?image/s]

Downloading Episode: 185:   0%|          | 0/104 [00:00<?, ?image/s]

Downloading Episode: 186:   0%|          | 0/102 [00:00<?, ?image/s]

Downloading Episode: 187:   0%|          | 0/85 [00:00<?, ?image/s]

Downloading Episode: 188:   0%|          | 0/124 [00:00<?, ?image/s]

Downloading Episode: 189:   0%|          | 0/100 [00:00<?, ?image/s]

Downloading Episode: 190:   0%|          | 0/106 [00:00<?, ?image/s]

Downloading Episode: 191:   0%|          | 0/113 [00:00<?, ?image/s]

Downloading Episode: 192:   0%|          | 0/123 [00:00<?, ?image/s]

Downloading Episode: 193:   0%|          | 0/102 [00:00<?, ?image/s]

Downloading Episode: 194:   0%|          | 0/105 [00:00<?, ?image/s]

Downloading Episode: 195:   0%|          | 0/152 [00:00<?, ?image/s]

Downloading Episode: 196:   0%|          | 0/98 [00:00<?, ?image/s]

Downloading Episode: 197:   0%|          | 0/98 [00:00<?, ?image/s]

Downloading Episode: 198:   0%|          | 0/90 [00:00<?, ?image/s]

Downloading Episode: 199:   0%|          | 0/116 [00:00<?, ?image/s]

Downloading Episode: 200:   0%|          | 0/100 [00:00<?, ?image/s]

Downloading Episode: 201:   0%|          | 0/105 [00:00<?, ?image/s]

Downloading Episode: 202:   0%|          | 0/119 [00:00<?, ?image/s]

Downloading Episode: 203:   0%|          | 0/113 [00:00<?, ?image/s]

Downloading Episode: 204:   0%|          | 0/114 [00:00<?, ?image/s]

Downloading Episode: 205:   0%|          | 0/110 [00:00<?, ?image/s]

Downloading Episode: 206:   0%|          | 0/95 [00:00<?, ?image/s]

Downloading Episode: 207:   0%|          | 0/81 [00:00<?, ?image/s]

Downloading Episode: 208:   0%|          | 0/110 [00:00<?, ?image/s]

Downloading Episode: 209:   0%|          | 0/154 [00:00<?, ?image/s]

Downloading Episode: 210:   0%|          | 0/97 [00:00<?, ?image/s]

Downloading Episode: 211:   0%|          | 0/89 [00:00<?, ?image/s]

Downloading Episode: 212:   0%|          | 0/93 [00:00<?, ?image/s]

Downloading Episode: 213:   0%|          | 0/92 [00:00<?, ?image/s]

Downloading Episode: 214:   0%|          | 0/86 [00:00<?, ?image/s]

Downloading Episode: 215:   0%|          | 0/97 [00:00<?, ?image/s]

Downloading Episode: 216:   0%|          | 0/99 [00:00<?, ?image/s]

Downloading Episode: 217:   0%|          | 0/70 [00:00<?, ?image/s]

Downloading Episode: 218:   0%|          | 0/104 [00:00<?, ?image/s]

Downloading Episode: 219:   0%|          | 0/90 [00:00<?, ?image/s]

Downloading Episode: 220:   0%|          | 0/83 [00:00<?, ?image/s]

Downloading Episode: 221:   0%|          | 0/138 [00:00<?, ?image/s]

Downloading Episode: 222:   0%|          | 0/85 [00:00<?, ?image/s]

Downloading Episode: 223:   0%|          | 0/91 [00:00<?, ?image/s]

Downloading Episode: 224:   0%|          | 0/125 [00:00<?, ?image/s]

Downloading Episode: 225:   0%|          | 0/118 [00:00<?, ?image/s]

Downloading Episode: 226:   0%|          | 0/94 [00:00<?, ?image/s]

Downloading Episode: 227:   0%|          | 0/119 [00:00<?, ?image/s]

Downloading Episode: 228:   0%|          | 0/96 [00:00<?, ?image/s]

Downloading Episode: 229:   0%|          | 0/104 [00:00<?, ?image/s]

Downloading Episode: 230:   0%|          | 0/103 [00:00<?, ?image/s]

Downloading Episode: 231:   0%|          | 0/108 [00:00<?, ?image/s]

Downloading Episode: 232:   0%|          | 0/126 [00:00<?, ?image/s]

Downloading Episode: 233:   0%|          | 0/92 [00:00<?, ?image/s]

Downloading Episode: 234:   0%|          | 0/92 [00:00<?, ?image/s]

Downloading Episode: 235:   0%|          | 0/101 [00:00<?, ?image/s]

Downloading Episode: 236:   0%|          | 0/103 [00:00<?, ?image/s]

Downloading Episode: 237:   0%|          | 0/101 [00:00<?, ?image/s]

Downloading Episode: 238:   0%|          | 0/106 [00:00<?, ?image/s]

Downloading Episode: 239:   0%|          | 0/104 [00:00<?, ?image/s]

Downloading Episode: 240:   0%|          | 0/108 [00:00<?, ?image/s]

Downloading Episode: 241:   0%|          | 0/89 [00:00<?, ?image/s]

Downloading Episode: 242:   0%|          | 0/88 [00:00<?, ?image/s]

Downloading Episode: 243:   0%|          | 0/103 [00:00<?, ?image/s]

Downloading Episode: 244:   0%|          | 0/119 [00:00<?, ?image/s]

Downloading Episode: 245:   0%|          | 0/115 [00:00<?, ?image/s]

Downloading Episode: 246:   0%|          | 0/115 [00:00<?, ?image/s]

Downloading Episode: 247:   0%|          | 0/105 [00:00<?, ?image/s]

Downloading Episode: 248:   0%|          | 0/123 [00:00<?, ?image/s]

Downloading Episode: 249:   0%|          | 0/114 [00:00<?, ?image/s]

Downloading Episode: 250:   0%|          | 0/118 [00:00<?, ?image/s]

Downloading Episode: 251:   0%|          | 0/88 [00:00<?, ?image/s]

Downloading Episode: 252:   0%|          | 0/102 [00:00<?, ?image/s]

Development

https://www.webtoons.com/en/action/omniscient-reader/list?title_no=2154

https://www.webtoons.com/en/action/omniscient-reader/episode-0-prologue/viewer?title_no=2154&episode_no=1

In [None]:
import re
import os
import zipfile
import httpx
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO

# https://www.webtoons.com/en/action/omniscient-reader/episode-1-prologue/viewer?title_no=2154&episode_no=1
# https://www.webtoons.com/en/action/omniscient-reader/list?title_no=2154


header_ua = '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"'
header_ua_mobile = "?0"
header_ua_platform = '"Linux"'
header_user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"


# headers = {
#     "referer": "https://www.webtoons.com/en/action/omniscient-reader/episode-0-prologue/viewer?title_no=2154&episode_no=1",
#     "sec-ch-ua": '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
#     "sec-ch-ua-mobile": "?0",
#     "sec-ch-ua-platform": '"Linux"',
#     "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
# }

headers = {
    "sec-ch-ua": header_ua,
    "sec-ch-ua-mobile": header_ua_mobile,
    "sec-ch-ua-platform": header_ua_platform,
    "user-agent": header_user_agent,
}

episode_no = 1

url_series = "https://www.webtoons.com/en/action/omniscient-reader/list?title_no=2154"

# title_no = re.search(r'title_no=(\d+)', url_series).group(1)
# url_chapter = url_series.split('/list')[0] + '/x/viewer?title_no=' + title_no + '&episode_no=' + str(episode_no)

# response = requests.get(url_chapter)
# response.raise_for_status()  # Raise an error for bad HTTP responses
# soup = BeautifulSoup(response.text, 'html.parser')

url = url_series

def httpx_get_soup(url, headers) -> BeautifulSoup:
    """
    Fetches the content of a URL and returns a BeautifulSoup object.
    """
    with httpx.Client() as client:
        response = client.get(url, headers=headers)
        if response.status_code == 200:
            return BeautifulSoup(response.text, 'html.parser')
        else:
            print(f"Failed to retrieve the page: {url}\nStatus code: {response.status_code}")
            exit(1)
            return None

def get_episodes(soup, regex) -> dict:
    """
    Extracts episode links from the BeautifulSoup object.
    """
    links = [a['href'] for a in soup.find_all('a', href=re.compile(regex))]
    dat = {int(re.search(r'episode_no=(\d+)', link).group(1)): link for link in links}
    return dat

soup = httpx_get_soup(url, headers)
episodes = get_episodes(soup, r'.+&episode_no=\d+')
first_episode_no = min(episodes.keys())

url = episodes[episode_no]
soup = httpx_get_soup(url, headers)
episodes = get_episodes(soup, r'https://www\.webtoons\.com.+viewer\?title_no=\d+')
episodes = dict(sorted(episodes.items(), key=lambda item: item[0]))

for episode_no, link in episodes.items():
    print(f"Episode {episode_no}: {link}")

# ="https://www.webtoons.com/en/action/omniscient-reader/episode-189/viewer?title_no=2154
# print(soup.prettify())

Episode 1: https://www.webtoons.com/en/action/omniscient-reader/episode-0-prologue/viewer?title_no=2154&episode_no=1
Episode 2: https://www.webtoons.com/en/action/omniscient-reader/episode-1/viewer?title_no=2154&episode_no=2
Episode 3: https://www.webtoons.com/en/action/omniscient-reader/episode-2/viewer?title_no=2154&episode_no=3
Episode 4: https://www.webtoons.com/en/action/omniscient-reader/episode-3/viewer?title_no=2154&episode_no=4
Episode 5: https://www.webtoons.com/en/action/omniscient-reader/episode-4/viewer?title_no=2154&episode_no=5
Episode 6: https://www.webtoons.com/en/action/omniscient-reader/episode-5/viewer?title_no=2154&episode_no=6
Episode 7: https://www.webtoons.com/en/action/omniscient-reader/episode-6/viewer?title_no=2154&episode_no=7
Episode 8: https://www.webtoons.com/en/action/omniscient-reader/episode-7/viewer?title_no=2154&episode_no=8
Episode 9: https://www.webtoons.com/en/action/omniscient-reader/episode-8/viewer?title_no=2154&episode_no=9
Episode 10: https:/

In [None]:
import pillow_avif
import tempfile
# from tqdm import tqdm
from tqdm.notebook import tqdm

folder = "data"
os.makedirs(folder, exist_ok=True)

path_cbz = os.path.join(folder, "omniscient_reader.cbz")


url_chapter = "https://www.webtoons.com/en/action/omniscient-reader/episode-0-prologue/viewer?title_no=2154&episode_no=1"
url = url_chapter

def download_cbz(url, path_cbz, img_format="avif"):
    """
    Downloads a CBZ file from the given URL and saves it to the specified path.
    Default format is AVIF. Options are AVIF or JPEG.
    """
    # Step 1: Fetch the page content as soup
    soup = httpx_get_soup(url, headers)
    # Step 2: Find all image links of content images
    regex = r'https://webtoon-phinf\.pstatic\.net.*\.(jpg|jpeg|png).*'
    links = soup.find_all('img', attrs={'data-url': re.compile(regex), 'class': '_images'})
    # Setp 3: Download images
    headers['referer'] = url
    n = 0
    with tempfile.TemporaryDirectory() as temp_dir:
        with httpx.Client() as client:
            for url in tqdm(links, desc="Downloading images", unit="image"):
                image_url = url['data-url']
                response = client.get(image_url, headers=headers)
                if response.status_code == 200:
                    n += 1
                    image = Image.open(BytesIO(response.content))
                    file_name = f"{str(n).zfill(3)}.{img_format}"
                    if img_format == "avif":
                        image.save(os.path.join(temp_dir, file_name), format="AVIF")
                    else:
                        image.save(os.path.join(temp_dir, file_name), format="JPEG")
                else:
                    print(f"Failed to download image: {response.status_code}")
                    exit(1)        
        # Step 4: Create new CBZ with downloaded images
        with zipfile.ZipFile(path_cbz, 'w', compression=zipfile.ZIP_DEFLATED) as zipf:
            for filename in sorted(os.listdir(temp_dir)):
                file_path = os.path.join(temp_dir, filename)
                zipf.write(file_path, arcname=filename)

download_cbz(url, path_cbz, img_format="avif")

In [None]:
urls = [
    "https://www.webtoons.com/en/action/omniscient-reader/episode-0-prologue/viewer?title_no=2154&episode_no=1",
    "https://www.webtoons.com/en/action/omniscient-reader/episode-0-prologue/viewer?title_no=2154&episode_no=2"
]

def download_cbz_all(urls, folder, img_format="avif"):
    """
    Downloads multiple CBZ files from the given URLs and saves them to the specified folder.
    """
    for url in tqdm(urls, desc="Processing URLs", unit="url"):
        episode_no = re.search(r'episode_no=(\d+)', url).group(1)
        path_cbz = os.path.join(folder, f"omniscient_reader_{episode_no}.cbz")
        download_cbz(url, path_cbz, img_format)

download_cbz_all(urls, folder, img_format="avif")