In [1]:
#download images
import os
import requests
import csv
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# Function to download images from URLs
def download_image(image_url, output_dir, title):
    try:
        response = requests.get(image_url)
        if response.status_code == 200:
            image_content = response.content
            image_name = os.path.basename(image_url)
            image_path = os.path.join(output_dir, image_name)
            with open(image_path, 'wb') as f:
                f.write(image_content)
            print(f"Image downloaded: {image_path}")
        else:
            print(f"Failed to download image from: {image_url}")
    except Exception as e:
        print(f"Error downloading image: {e}")

# Function to extract and download images
def extract_and_download_images(url, output_dir):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            img_elements = soup.find_all('img')
            for img_element in img_elements:
                img_url = img_element.get('src')
                if img_url and img_url.endswith('.jpg'):
                    img_url = urljoin(url, img_url)
                    title = img_element.get('alt', '')
                    download_image(img_url, output_dir, title)
    except Exception as e:
        print(f"An error occurred: {e}")

# Output directory for downloaded images
output_directory = "pikwizard_images"
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# URL of the Pikwizard website
url = "https://pikwizard.com/most-popular/sports-backgrounds-images/"

# Extract and download images
extract_and_download_images(url, output_directory)

Image downloaded: pikwizard_images\42dc903754199f8ccec6a2a571cbf89a.jpg
Image downloaded: pikwizard_images\896f26132e7fe5b8649259ab0cdf8114.jpg
Image downloaded: pikwizard_images\568e9e3d8434f7ecb13562e24b7e680a.jpg
Image downloaded: pikwizard_images\9d0989c264738fee794aa469288f6731.jpg
Image downloaded: pikwizard_images\11495840d5d724ae10e1e525fa310fff.jpg
Image downloaded: pikwizard_images\881946ee69edfb1e197ee81c6083a460.jpg
Image downloaded: pikwizard_images\66d2690058f99fd503dc3f1a3a07ff5f.jpg
Image downloaded: pikwizard_images\6567d21c791f980c972254110bdc4edf.jpg
Image downloaded: pikwizard_images\240_F_336411417_DtEpDXNacEM9Ay6Af1DDPq2rODrJa5zd.jpg
Image downloaded: pikwizard_images\44d62a1163af51d0065fe6cca22d48e7.jpg
Image downloaded: pikwizard_images\877eff0bb70bf424dad0aba7201b2e36.jpg
Image downloaded: pikwizard_images\b594eb62c3416aeed8353de2c0b49e18.jpg
Image downloaded: pikwizard_images\ebf62ac0cbe2779901ab260f2c5bed9b.jpg
Image downloaded: pikwizard_images\2d9efb68394b3

In [2]:
#download images and extract metadata
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from PIL import Image
import csv

# Function to download images from URLs
def download_image(image_url, output_dir, title):
    try:
        response = requests.get(image_url)
        if response.status_code == 200:
            image_content = response.content
            image_name = os.path.basename(image_url)
            image_path = os.path.join(output_dir, image_name)
            with open(image_path, 'wb') as f:
                f.write(image_content)
            print(f"Image downloaded: {image_path}")
            return image_path
        else:
            print(f"Failed to download image from: {image_url}")
    except Exception as e:
        print(f"Error downloading image: {e}")

# Function to extract and download images
def extract_and_download_images(url, output_dir):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            img_elements = soup.find_all('img')
            image_metadata = []
            for img_element in img_elements:
                img_url = img_element.get('src')
                if img_url and img_url.endswith('.jpg'):
                    img_url = urljoin(url, img_url)
                    title = img_element.get('alt', '')
                    image_path = download_image(img_url, output_dir, title)
                    if image_path:
                        image_info = get_image_metadata(image_path)
                        image_metadata.append({"Title": title, "URL": img_url, "Info": image_info})
            return image_metadata
    except Exception as e:
        print(f"An error occurred: {e}")

# Function to get image metadata
def get_image_metadata(image_path):
    try:
        with Image.open(image_path) as img:
            info = {
                "format": img.format,
                "mode": img.mode,
                "size": img.size,
                # Add more metadata fields as needed
            }
            return info
    except Exception as e:
        print(f"Error getting image metadata: {e}")
        return {}

# Output directory for downloaded images
output_directory = "pikwizard_images"
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# URL of the Pikwizard website
url = "https://pikwizard.com/most-popular/burst-photo/"

# Extract and download images
image_metadata = extract_and_download_images(url, output_directory)

# Write metadata to a CSV file
csv_file_path = "image_metadata.csv"
csv_header = ["Title", "URL", "Info", "File"]
with open(csv_file_path, mode='w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=csv_header)
    writer.writeheader()
    for metadata in image_metadata:
        metadata["File"] = os.path.basename(metadata["Info"].get("filename", ""))
        if "filename" in metadata["Info"]:
            del metadata["Info"]["filename"]
        writer.writerow(metadata)

print("Metadata saved to:", csv_file_path)


Image downloaded: pikwizard_images\584a20bfaa6a32622b04fe4c59b4d666.jpg
Image downloaded: pikwizard_images\17b2c3b59725e6e154ecc55fcf1fe111.jpg
Image downloaded: pikwizard_images\240_F_711360308_EFhrUk5JOmSbYlc8lcIJs0m1rNs8vXnm.jpg
Image downloaded: pikwizard_images\2773e4c1c81230ae1596f6382a628ffa.jpg
Image downloaded: pikwizard_images\240_F_668149291_sCSC65dvkW0ZRDF34grzSdtuFCIusMxS.jpg
Image downloaded: pikwizard_images\a7a5447624f582c55cb7ee04b4ede69d.jpg
Image downloaded: pikwizard_images\458b7201d968b5de982cb3011cc663bc.jpg
Image downloaded: pikwizard_images\240_F_688489481_PGjxqDQwbAC2Ae2UJimQzUOCg6i11hQ5.jpg
Image downloaded: pikwizard_images\b8a7978e52f15b875a08a95b460222cc.jpg
Image downloaded: pikwizard_images\1fbd62e64b8416f0ffbe0428c31b02bb.jpg
Image downloaded: pikwizard_images\240_F_637257056_Gx6R0XRaSyuwNsx2SV5ZgTv8yY2FakgR.jpg
Image downloaded: pikwizard_images\342c0f8f385ed3fa72ecdf39a1da04e3.jpg
Image downloaded: pikwizard_images\2bdfc39b0a83d9a7c82a5524f7eb430f.jpg
