# Image analysis

## Image stacking (from Rogers 2021, p.6)

In [None]:
%pip install pandas requests duckduckgo_search # PIL is already installed here!

In [None]:
import os
import pandas as pd 
import requests
from PIL import Image # pillows python library for image editing 
from duckduckgo_search import DDGS

For this exercise we will play around API's a bit. API stands for Application Programming Interface. It is like a user interface, but for programs. It lets you interact with a software service in a programmatic way. It is how software talks to other software to get information. This is usually done in a JSON format. APIs are often used for data collection by digital methods researchers. For example, both 4CAT and Communalytic relies on APIs for many of their data collection features.

In this notebook, we use DuckDuckGo's API to search the web for images programmatically. DuckDuckGo is a search engine like Google Search, but focused on public values. Luckily for us, DuckDuckGo is free to use and someone wrote a well-maintained Python package to interact with the DuckDuckGo API. For the documentation see https://pypi.org/project/duckduckgo-search/

In [None]:
# First, we need to run the query. This is similar to how the search interface
# looks in a web browser. You can fine-tune the parameters that are passed 
# to the function to see what changes!
results = DDGS().images(
    keywords="butterfly",
    region="wt-wt",
    safesearch="off",
    size=None,
    color="Monochrome",
    type_image=None,
    layout=None,
    license_image=None,
    max_results=10,
)

In [None]:
# Next, we need to actually download the images into the current directory.

# Let's make a folder for the outputs!
images = "downloaded_images"

if not os.path.exists(images):
    os.makedirs(images)

# Loop through the resulting images
for idx, result in enumerate(results):
    image_url = result['image'] # Get the URL of the images
    try:
        response = requests.get(image_url) # Download the image from the web address stored in image_url 
        response.raise_for_status()  # Check if the request was successful
        
        # Define a filename 
        filename = os.path.join(images, f"image_{idx}.jpg")
        with open(filename, "wb") as f:
            f.write(response.content)
        print(f"Downloaded {filename}")
    except Exception as e:
        print(f"Failed to download image {idx}: {e}")

### Analysis

Now we can analyse the images. We will try out two different methods for quantitative image analysis. 

First, we will try to stack the images with some opacity to see what the results look like. 

In [None]:
# Get the list of image files sorted by name. Here, we can also load a specific category of images (only JPEGS, for example).
image_files = sorted([f for f in os.listdir(images) if f.endswith('.jpg')])

# We will do this via a loop. Loops need a starting place, so we first transform one image. Then, we go through the whole list.
base_path = os.path.join(images, image_files[0]) # Get the path of the first image
base_image = Image.open(base_path).convert("RGBA") # Get the first image 
alpha_mask = Image.new("L", base_image.size, int(0.2 * 255)) # Create a mask to set the opacity, now it is 0.2 that is 20%
# You can adjust the parameter 0.2 above to see if other values result in a better looking final image.
base_image.putalpha(alpha_mask) # Apply the mask to the first image

width, height = base_image.size # Get the size of the first image: we will use this to resize the rest to this size

# Create an accumulator starting with the base image
composite = base_image.copy()

# Loop over the images. Note that these are the same steps as above!
for image_file in image_files[1:]:
    image_path = os.path.join(images, image_file) # Get the path
    overlay = Image.open(image_path).convert("RGBA") # Get the image 
    # Resize the overlay to match the base image if needed 
    if overlay.size != (width, height):
        overlay = overlay.resize((width, height))
    
    overlay.putalpha(alpha_mask) # Apply the mask
    
    # Overlay onto the current composite image
    composite = Image.alpha_composite(composite, overlay)

# Save and show the final composite image
composite.save("stacked_images.png")
display(composite)


Next, we can wrap all this in a function to run an experiment. Here, we will study the difference in image results between Russia and the US for the seach term "Pride".

In [None]:
def create_composite_image(keywords, region="wt-wt", safesearch="moderate", size=None,
                           color=None, type_image=None, layout=None,
                           license_image=None, max_results=10, download_dir="downloaded_images",
                           composite_filename="stacked_images.png", opacity=0.2):

    # Run the image search
    results = DDGS().images(
        keywords=keywords,
        region=region,
        safesearch=safesearch,
        size=size,
        color=color,
        type_image=type_image,
        layout=layout,
        license_image=license_image,
        max_results=max_results,
    )
    
    # Create the download directory if it doesn't exist
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)
    
    # Download the images
    for idx, result in enumerate(results):
        image_url = result['image']  # Get the URL for the image
        try:
            response = requests.get(image_url)
            response.raise_for_status()  # Ensure we got a valid response
            
            # Define the filename
            filename = os.path.join(download_dir, f"image_{idx}.jpg")
            with open(filename, "wb") as f:
                f.write(response.content)
            print(f"Downloaded {filename}")
        except Exception as e:
            print(f"Failed to download image {idx}: {e}")
    
    # Get the list of downloaded image files (sorted by name)
    image_files = sorted([f for f in os.listdir(download_dir) if f.endswith('.jpg')])
    if not image_files:
        raise ValueError("No images were downloaded.")
    
    # Open the first image as the base and convert it to RGBA
    base_path = os.path.join(download_dir, image_files[0])
    base_image = Image.open(base_path).convert("RGBA")
    width, height = base_image.size
    
    # Create an alpha mask based on the provided opacity
    alpha_mask = Image.new("L", base_image.size, int(opacity * 255))
    base_image.putalpha(alpha_mask)
    
    # Initialize the composite with the base image
    composite = base_image.copy()
    
    # Loop over the remaining images and composite them one by one
    for image_file in image_files[1:]:
        image_path = os.path.join(download_dir, image_file)
        overlay = Image.open(image_path).convert("RGBA")
        
        # Resize overlay to match the base image dimensions if needed
        if overlay.size != (width, height):
            overlay = overlay.resize((width, height))
        
        # Apply the same opacity mask to the overlay
        overlay.putalpha(Image.new("L", overlay.size, int(opacity * 255)))
        
        # Composite the overlay image onto the current composite image
        composite = Image.alpha_composite(composite, overlay)
    
    # Save the composite image and display it
    composite.save(composite_filename)
    display(composite)
    
    return composite

In [None]:
composite_img_ru = create_composite_image("pride", region="ru-ru", color=None, download_dir="downloaded_images_ru", composite_filename="stacked_images_RU.png", max_results=20, opacity=0.2)
composite_img_us = create_composite_image("pride", region="us-en", color=None, download_dir="downloaded_images_us", composite_filename="stacked_images_US.png", max_results=20, opacity=0.2)


Now, we can compare the results. A few images are the same, but we receive them in different order. This results in different pictures, as the order in which we stack the pictures matters for visibility.

In this notebook, we introduced one way to make metapictures for visual analysis. We used the pillow library (PIL) for image manipulation. A next step could be clustering the images according to their various properties. Alternatively, techniques in this notebook can be repurposed to use your own images from your dataset. How to go about that?

In [None]:
# This is a revised version of the function above, adapted to be used on your own images.
# Note: it will only work if you have a folder "local_images" in your file browser to the left,
# and that folder has a number of JPEG images that you want to stack!
def create_composite_image_from_a_local_folder(download_dir="local_images",
                           composite_filename="stacked_local_images.png", opacity=0.2):

    # Get the list of images in the local folder (sorted by name)
    image_files = sorted([f for f in os.listdir(download_dir) if f.endswith('.jpg')])
    if not image_files:
        raise ValueError("No images found in the local folder", download_dir)
    
    # Open the first image as the base and convert it to RGBA
    base_path = os.path.join(download_dir, image_files[0])
    base_image = Image.open(base_path).convert("RGBA")
    width, height = base_image.size
    
    # Create an alpha mask based on the provided opacity
    alpha_mask = Image.new("L", base_image.size, int(opacity * 255))
    base_image.putalpha(alpha_mask)
    
    # Initialize the composite with the base image
    composite = base_image.copy()
    
    # Loop over the remaining images and composite them one by one
    for image_file in image_files[1:]:
        image_path = os.path.join(download_dir, image_file)
        overlay = Image.open(image_path).convert("RGBA")
        
        # Resize overlay to match the base image dimensions if needed
        if overlay.size != (width, height):
            overlay = overlay.resize((width, height))
        
        # Apply the same opacity mask to the overlay
        overlay.putalpha(Image.new("L", overlay.size, int(opacity * 255)))
        
        # Composite the overlay image onto the current composite image
        composite = Image.alpha_composite(composite, overlay)
    
    # Save the composite image and display it
    composite.save(composite_filename)
    display(composite)

In [None]:
create_composite_image_from_a_local_folder()