In [1]:
import requests
from bs4 import BeautifulSoup
import os
import subprocess
import re

BASE_URL = "https://vtucircle.com/bcs601-module-{}-textbook/"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (compatible; vtucircle-downloader/1.0)"
}
DOWNLOAD_DIR = "/home/rgsuhas/Desktop/sem-6"
os.makedirs(DOWNLOAD_DIR, exist_ok=True)

def get_download_url(module_num):
    url = BASE_URL.format(module_num)
    resp = requests.get(url, headers=HEADERS)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # Look for any <a> tag with 'drive.google.com' in href
    for a in soup.find_all("a", href=True):
        href = a["href"]
        if "drive.google.com" in href:
            return href
    return None

def extract_drive_id(url):
    """
    Extract file ID from various Google Drive URL formats.
    """
    match = re.search(r"/file/d/([a-zA-Z0-9_-]+)", url)
    if match:
        return match.group(1)
    match = re.search(r"[?&]id=([a-zA-Z0-9_-]+)", url)
    if match:
        return match.group(1)
    raise ValueError("Unable to extract Google Drive file ID from URL.")

def download_file(url, dest_path):
    if "drive.google.com" in url:
        print(" → Downloading from Google Drive using gdown…")
        try:
            file_id = extract_drive_id(url)
            subprocess.run(["gdown", "--id", file_id, "-O", dest_path], check=True)
            print(f"Downloaded (Google Drive): {dest_path}")
        except Exception as e:
            print(" ❌ gdown failed:", e)
    else:
        print(" → Downloading direct file…")
        resp = requests.get(url, headers=HEADERS, stream=True)
        resp.raise_for_status()
        total = int(resp.headers.get('content-length', 0))
        with open(dest_path, "wb") as f:
            for chunk in resp.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
        print(f"Downloaded: {dest_path} ({total} bytes)")

if __name__ == "__main__":
    for n in range(1, 6):
        print(f"\n[Module {n}] Fetching page…")
        dl_url = get_download_url(n)
        if dl_url:
            print(" → Found download link:", dl_url)
            fname = os.path.join(DOWNLOAD_DIR, f"BCS601_Module_{n}.pdf")
            download_file(dl_url, fname)
        else:
            print(" ⚠️ No download link found for module", n)



[Module 1] Fetching page…
 ⚠️ No download link found for module 1

[Module 2] Fetching page…
 ⚠️ No download link found for module 2

[Module 3] Fetching page…
 ⚠️ No download link found for module 3

[Module 4] Fetching page…
 ⚠️ No download link found for module 4

[Module 5] Fetching page…
 ⚠️ No download link found for module 5


In [4]:
pip install gdown


Defaulting to user installation because normal site-packages is not writeable
Collecting gdown
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Collecting PySocks!=1.5.7,>=1.5.6 (from requests[socks]->gdown)
  Downloading PySocks-1.7.1-py3-none-any.whl.metadata (13 kB)
Downloading gdown-5.2.0-py3-none-any.whl (18 kB)
Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)
Installing collected packages: PySocks, gdown
Successfully installed PySocks-1.7.1 gdown-5.2.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests
from bs4 import BeautifulSoup
import os
import subprocess
import re

BASE_URL = "https://vtucircle.com/bcs601-module-{}-textbook/"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (compatible; vtucircle-downloader/1.0)"
}
DOWNLOAD_DIR = "/home/rgsuhas/Desktop/sem-6"
os.makedirs(DOWNLOAD_DIR, exist_ok=True)

def get_download_url(module_num):
    url = BASE_URL.format(module_num)
    resp = requests.get(url, headers=HEADERS)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # Look for any <a> tag with 'drive.google.com' in href
    for a in soup.find_all("a", href=True):
        href = a["href"]
        if "drive.google.com" in href:
            return href
    return None

def extract_drive_id(url):
    """
    Extract file ID from various Google Drive URL formats.
    """
    match = re.search(r"/file/d/([a-zA-Z0-9_-]+)", url)
    if match:
        return match.group(1)
    match = re.search(r"[?&]id=([a-zA-Z0-9_-]+)", url)
    if match:
        return match.group(1)
    raise ValueError("Unable to extract Google Drive file ID from URL.")

def download_file(url, dest_path):
    if "drive.google.com" in url:
        print(" → Downloading from Google Drive using gdown…")
        try:
            file_id = extract_drive_id(url)
            subprocess.run(["gdown", "--id", file_id, "-O", dest_path], check=True)
            print(f"Downloaded (Google Drive): {dest_path}")
        except Exception as e:
            print(" ❌ gdown failed:", e)
    else:
        print(" → Downloading direct file…")
        resp = requests.get(url, headers=HEADERS, stream=True)
        resp.raise_for_status()
        total = int(resp.headers.get('content-length', 0))
        with open(dest_path, "wb") as f:
            for chunk in resp.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
        print(f"Downloaded: {dest_path} ({total} bytes)")

if __name__ == "__main__":
    for n in range(1, 6):
        print(f"\n[Module {n}] Fetching page…")
        dl_url = get_download_url(n)
        if dl_url:
            print(" → Found download link:", dl_url)
            fname = os.path.join(DOWNLOAD_DIR, f"BCS601_Module_{n}.pdf")
            download_file(dl_url, fname)
        else:
            print(" ⚠️ No download link found for module", n)



[Module 1] Fetching page…
 ⚠️ No download link found for module 1

[Module 2] Fetching page…
 ⚠️ No download link found for module 2

[Module 3] Fetching page…
 ⚠️ No download link found for module 3

[Module 4] Fetching page…
 ⚠️ No download link found for module 4

[Module 5] Fetching page…
 ⚠️ No download link found for module 5
