In [1]:
import pandas as pd 
import os
import requests
from duckduckgo_search import DDGS
from PIL import Image # pillows python library for image editing 

For this exercise we will play around API's a bit. API stands for Application Programming Interface. It is like a user interface but for programs, that is it lets you interact with a software service in a programatic way. It is how software talks to other software to get information. This is usually done in a JSON format 

In this notebook we will be using DuckDuckGo's API to search the web for images programmatically. Luckily for us DuckDuckGo is free to use and someone wrote a neat wrapper python package to interact with the API. For the documentation see https://pypi.org/project/duckduckgo-search/

In [8]:
# First we need to run the query. This is similar to how the search interface looks on through the browser
results = DDGS().images(
    keywords="butterfly",
    region="wt-wt",
    safesearch="off",
    size=None,
    color="Monochrome",
    type_image=None,
    layout=None,
    license_image=None,
    max_results=10,
)


In [3]:
# Next we need to actually download the images into the current directory.
# Let's make a folder for the outputs
images = "downloaded_images"

if not os.path.exists(images):
    os.makedirs(images)
# Loop through the resulting images
for idx, result in enumerate(results):
    image_url = result['image'] # get the URL of the images
    try:
        response = requests.get(image_url)
        response.raise_for_status()  # Check if the request was successful
        
        # Define a filename 
        filename = os.path.join(images, f"image_{idx}.jpg")
        with open(filename, "wb") as f:
            f.write(response.content)
        print(f"Downloaded {filename}")
    except Exception as e:
        print(f"Failed to download image {idx}: {e}")

Downloaded downloaded_images/image_0.jpg
Downloaded downloaded_images/image_1.jpg
Downloaded downloaded_images/image_2.jpg
Downloaded downloaded_images/image_3.jpg
Failed to download image 4: 403 Client Error: Forbidden for url: https://www.publicdomainpictures.net/pictures/120000/velka/monarch-butterfly-2-1433183647wMO.jpg
Downloaded downloaded_images/image_5.jpg
Downloaded downloaded_images/image_6.jpg
Downloaded downloaded_images/image_7.jpg
Downloaded downloaded_images/image_8.jpg
Downloaded downloaded_images/image_9.jpg


Analysis

Now we can analyse the images. We will try out two different methods for quantitative image analysis. 

First we will try to stack the images with some opacity to see what the results look like. 

In [6]:
# Get the list of image files sorted by name, here we can also load a specific category of images
image_files = sorted([f for f in os.listdir(images) if f.endswith('.jpg')])

# We wil do this via a loop. Loops need a starting place so we first transform one image than we go through the whole list
base_path = os.path.join(images, image_files[0]) # get the path of the first image
base_image = Image.open(base_path).convert("RGBA") # get the first image 
alpha_mask = Image.new("L", base_image.size, int(0.2 * 255)) # create a mask to set the opacity, now it is 0.2 that is 20% 
base_image.putalpha(alpha_mask) # apply the mask to the first image

width, height = base_image.size # get the size of the first image this we will use to resize the rest to this size

# Create an accumulator starting with the base image
composite = base_image.copy()

# Loop over the images, note these are the same steps as above
for image_file in image_files[1:]:
    image_path = os.path.join(images, image_file) # get the path
    overlay = Image.open(image_path).convert("RGBA") # get the image 
    # Resize the overlay to match the base image if needed 
    if overlay.size != (width, height):
        overlay = overlay.resize((width, height))
    
    overlay.putalpha(alpha_mask) # apply the mask
    
    # Overlay onto the current composite image
    composite = Image.alpha_composite(composite, overlay)

# Save and show the final composite image
composite.save("stacked_images.png")
composite.show()


Next we can wrap all this in a function to run an experiment. Here we will study the difference in image results between Russia and the US for the seach term "Pride". 

In [7]:
def create_composite_image(keywords, region="wt-wt", safesearch="moderate", size=None,
                           color=None, type_image=None, layout=None,
                           license_image=None, max_results=10, download_dir="downloaded_images",
                           composite_filename="stacked_images.png", opacity=0.2):

    # Run the image search
    results = DDGS().images(
        keywords=keywords,
        region=region,
        safesearch=safesearch,
        size=size,
        color=color,
        type_image=type_image,
        layout=layout,
        license_image=license_image,
        max_results=max_results,
    )
    
    # Create the download directory if it doesn't exist
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)
    
    # Download the images
    for idx, result in enumerate(results):
        image_url = result['image']  # Get the URL for the image
        try:
            response = requests.get(image_url)
            response.raise_for_status()  # Ensure we got a valid response
            
            # Define the filename
            filename = os.path.join(download_dir, f"image_{idx}.jpg")
            with open(filename, "wb") as f:
                f.write(response.content)
            print(f"Downloaded {filename}")
        except Exception as e:
            print(f"Failed to download image {idx}: {e}")
    
    # Get the list of downloaded image files (sorted by name)
    image_files = sorted([f for f in os.listdir(download_dir) if f.endswith('.jpg')])
    if not image_files:
        raise ValueError("No images were downloaded.")
    
    # Open the first image as the base and convert it to RGBA
    base_path = os.path.join(download_dir, image_files[0])
    base_image = Image.open(base_path).convert("RGBA")
    width, height = base_image.size
    
    # Create an alpha mask based on the provided opacity
    alpha_mask = Image.new("L", base_image.size, int(opacity * 255))
    base_image.putalpha(alpha_mask)
    
    # Initialize the composite with the base image
    composite = base_image.copy()
    
    # Loop over the remaining images and composite them one by one
    for image_file in image_files[1:]:
        image_path = os.path.join(download_dir, image_file)
        overlay = Image.open(image_path).convert("RGBA")
        
        # Resize overlay to match the base image dimensions if needed
        if overlay.size != (width, height):
            overlay = overlay.resize((width, height))
        
        # Apply the same opacity mask to the overlay
        overlay.putalpha(Image.new("L", overlay.size, int(opacity * 255)))
        
        # Composite the overlay image onto the current composite image
        composite = Image.alpha_composite(composite, overlay)
    
    # Save the composite image and display it
    composite.save(composite_filename)
    composite.show()
    
    return composite

Downloaded downloaded_images/image_0.jpg
Downloaded downloaded_images/image_1.jpg
Downloaded downloaded_images/image_2.jpg
Downloaded downloaded_images/image_3.jpg
Failed to download image 4: 403 Client Error: Forbidden for url: https://www.publicdomainpictures.net/pictures/120000/velka/monarch-butterfly-2-1433183647wMO.jpg
Downloaded downloaded_images/image_5.jpg
Downloaded downloaded_images/image_6.jpg
Downloaded downloaded_images/image_7.jpg
Downloaded downloaded_images/image_8.jpg
Downloaded downloaded_images/image_9.jpg


In [13]:
composite_img_ru = create_composite_image("pride", region="ru-ru", color=None, download_dir="downloaded_images_ru", composite_filename="stacked_images_RU.png", max_results=20, opacity=0.2)
composite_img_us = create_composite_image("pride", region="us-en", color=None, download_dir="downloaded_images_us", composite_filename="stacked_images_US.png", max_results=20, opacity=0.2)


Downloaded downloaded_images_ch/image_0.jpg
Downloaded downloaded_images_ch/image_1.jpg
Downloaded downloaded_images_ch/image_2.jpg
Failed to download image 3: 403 Client Error: Forbidden for url: https://www.allianceforeatingdisorders.com/wp-content/uploads/2020/06/pride-flag.jpg
Downloaded downloaded_images_ch/image_4.jpg
Downloaded downloaded_images_ch/image_5.jpg
Downloaded downloaded_images_ch/image_6.jpg
Downloaded downloaded_images_ch/image_7.jpg
Downloaded downloaded_images_ch/image_8.jpg
Downloaded downloaded_images_ch/image_9.jpg
Downloaded downloaded_images_ch/image_10.jpg
Downloaded downloaded_images_ch/image_11.jpg
Downloaded downloaded_images_ch/image_12.jpg
Downloaded downloaded_images_ch/image_13.jpg
Downloaded downloaded_images_ch/image_14.jpg
Downloaded downloaded_images_ch/image_15.jpg
Downloaded downloaded_images_ch/image_16.jpg
Downloaded downloaded_images_ch/image_17.jpg
Downloaded downloaded_images_ch/image_18.jpg
Downloaded downloaded_images_ch/image_19.jpg
Down

Now we can compare the results. Some images are the same but we recieve them in different order, this results in different pictures as the order in which we stack the pictures matters for visibility.

We can do maybe grouping next if we want to or something else using the data we loaded. But I think this a good start to demonstrate metaimage analysis.