In [1]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# Step 1: Set the URL of the website containing the PDF and image files
url = "https://ieltsfever.org/academic-reading/"

# Step 2: Parse the HTML content of the website and extract the URLs of the PDF and image files
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Find all the links that end with .pdf or .jpg/.png
pdf_links = [link['href'] for link in soup.find_all('a') if link.get('href') and link['href'].endswith('.pdf')]
image_links = [link['src'] for link in soup.find_all('img') if link.get('src') and (link['src'].endswith('.jpg') or link['src'].endswith('.png'))]

# Step 3: Download the PDF and image files and save them to a directory on your local machine
if not os.path.exists('pdf_documents'):
    os.makedirs('pdf_documents')
if not os.path.exists('image_documents'):
    os.makedirs('image_documents')

for link in pdf_links:
    response = requests.get(urljoin(url, link))
    # Shorten the file name if it is too long
    file_name = os.path.basename(link)
    if len(file_name) > 100:
        file_name = file_name[:50] + '...' + file_name[-47:]
    with open(f'pdf_documents/{file_name}', 'wb') as f:
        f.write(response.content)

for link in image_links:
    response = requests.get(urljoin(url, link))
    # Shorten the file name if it is too long
    file_name = os.path.basename(link)
    if len(file_name) > 100:
        file_name = file_name[:50] + '...' + file_name[-47:]
    with open(f'image_documents/{file_name}', 'wb') as f:
        f.write(response.content)
