In [None]:
import requests
from bs4 import BeautifulSoup
import os
import urllib

def download_images(url, output_folder, max_images=100):
    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Fetch the HTML content of the webpage
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find and download images
    image_tags = soup.find_all('img', limit=max_images)
    for i, img_tag in enumerate(image_tags):
        img_url = img_tag.get('src')
        if img_url:
            # Download the image
            img_data = requests.get(img_url).content
            img_filename = os.path.join(output_folder, f'image_{i + 1}.jpg')
            with open(img_filename, 'wb') as img_file:
                img_file.write(img_data)
            print(f'Downloaded: {img_filename}')

# Example usage:
url_to_scrape = 'https://example.com'
output_directory = 'downloaded_images'

download_images(url_to_scrape, output_directory)

In [None]:
import requests
from bs4 import BeautifulSoup
import os
from urllib.parse import urljoin

def download_images_recursive(url, output_folder, max_depth=2, max_images=100):
    os.makedirs(output_folder, exist_ok=True)
    visited_urls = set()

    def download_images_internal(current_url, depth):
        if depth > max_depth or current_url in visited_urls:
            return

        visited_urls.add(current_url)
        response = requests.get(current_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        image_tags = soup.find_all('img', limit=max_images)
        for i, img_tag in enumerate(image_tags):
            img_url = img_tag.get('src')
            if img_url:
                img_url = urljoin(current_url, img_url)
                img_data = requests.get(img_url).content
                img_filename = os.path.join(output_folder, f'image_{i + 1}.jpg')
                with open(img_filename, 'wb') as img_file:
                    img_file.write(img_data)
                print(f'Downloaded: {img_filename}')

        # Follow links recursively
        for link in soup.find_all('a', href=True):
            next_url = urljoin(current_url, link['href'])
            download_images_internal(next_url, depth + 1)

    download_images_internal(url, 0)

# Example usage:
url_to_scrape = 'https://en.wikipedia.org/wiki/Main_Page'
output_directory = 'downloaded_images_recursive'

download_images_recursive(url_to_scrape, output_directory)


In [None]:
import requests
from bs4 import BeautifulSoup
import os
from urllib.parse import urljoin

def download_images_with_keywords(url, output_folder, keywords, max_depth=2, max_images=100):
    os.makedirs(output_folder, exist_ok=True)
    visited_urls = set()

    def download_images_internal(current_url, depth):
        if depth > max_depth or current_url in visited_urls:
            return

        visited_urls.add(current_url)
        response = requests.get(current_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find images with alt attribute containing any of the keywords
        image_tags = soup.find_all('img', {'alt': lambda x: x and any(keyword.lower() in x.lower() for keyword in keywords)}, limit=max_images)
        for i, img_tag in enumerate(image_tags):
            img_url = img_tag.get('src')
            if img_url:
                img_url = urljoin(current_url, img_url)
                img_data = requests.get(img_url).content
                # Construct a unique filename based on search keywords and index
                img_filename = os.path.join(output_folder, f'{"_".join(keywords)}_{i + 1}.jpg')
                with open(img_filename, 'wb') as img_file:
                    img_file.write(img_data)
                print(f'Downloaded: {img_filename}')

        # Follow links recursively
        for link in soup.find_all('a', href=True):
            next_url = urljoin(current_url, link['href'])
            download_images_internal(next_url, depth + 1)

    download_images_internal(url, 0)

# Example usage:
url_to_scrape = 'https://en.wikipedia.org/wiki/Elon_Musk'
output_directory = 'downloaded_elon_musk_images'
search_keywords = ['elon', 'musk']

download_images_with_keywords(url_to_scrape, output_directory, search_keywords)
