In [2]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime
from dateutil.relativedelta import relativedelta 
import requests
import os

In [3]:
driver = webdriver.Chrome()
driver.get("https://archive.vogue.com/login")

In [8]:
search_term = "Bloom"
start_date = "JANUARY 1900"  # Format: "MONTH YEAR"
end_date = "DECEMBER 2022"  

# Helper function to generate a list of months and years in range
def generate_date_range(start, end):
    start_date = datetime.strptime(start, "%B %Y")
    end_date = datetime.strptime(end, "%B %Y")
    date_list = []
    while start_date <= end_date:
        date_list.append(start_date.strftime("%B %Y"))
        start_date += relativedelta(months=1)  # Increment by one month
    return date_list

# Generate the range of dates
date_range = generate_date_range(start_date, end_date)

bloom = driver.find_elements(By.CLASS_NAME, "spread")
bloom

all_image_bloom = []
for img in bloom:
            src = img.get_attribute("src")
            alt = img.get_attribute("alt")
            
            if src and alt in alt:  # Date matches the `alt` text
                # Extract the date part from the `alt` attribute
                date_part = alt.split('|')[0].split(' - ')[-1].strip()
                all_image_bloom.append({"src": src, "date": date_part})
all_image_bloom

os.makedirs("downloaded_images", exist_ok=True)

csv_file = "all_image_bloom.csv"

# Open the CSV file for writing
with open(csv_file, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    # Write the header
    writer.writerow(["Date", "Image URL", "File Name"])

    # Loop through all images in `all_image_fur`
    for image in all_image_bloom:
        src = image["src"]
        date = image["date"].replace(" ", "_").replace(",", "")  # Make date filename-friendly
        file_name = f"{date}.jpg"  # Construct the image file name

        try:
            # Download the image
            response = requests.get(src)
            response.raise_for_status()  # Raise HTTPError for bad responses

            # Save the image
            with open(f"downloaded_images/{file_name}", "wb") as img_file:
                img_file.write(response.content)

            print(f"Downloaded: {file_name}")
            
            writer.writerow([image["date"], src, file_name])
        except requests.exceptions.RequestException as e:
            print(f"Failed to download {src}. Error: {e}")
            # failed download 
            writer.writerow([image["date"], src, "Download failed"])

print(f"All images downloaded and metadata saved to {csv_file}")

Downloaded: JUNE_2017.jpg
Downloaded: May_29_1902.jpg
Downloaded: MARCH_15_1970.jpg
Downloaded: JULY_2013.jpg
Downloaded: NOVEMBER_2014.jpg
Downloaded: June_2015.jpg
Downloaded: JANUARY_1998.jpg
Downloaded: AUGUST_1999.jpg
Downloaded: OCTOBER_2016.jpg
Downloaded: April_1_1926.jpg
Downloaded: March_1_1926.jpg
Downloaded: October_15_1923.jpg
Downloaded: APRIL_15_1938.jpg
Downloaded: December_1_1920.jpg
Downloaded: DECEMBER_1_1927.jpg
Downloaded: April_1_1918.jpg
Downloaded: January_15_1918.jpg
Downloaded: February_1_1921.jpg
Downloaded: September_1_1920.jpg
Downloaded: November_1_1920.jpg
Downloaded: MAY_15_1947.jpg
All images downloaded and metadata saved to all_image_bloom.csv
