In [135]:
import cv2

# Scrape Images

In [58]:
import requests
from bs4 import BeautifulSoup
import os 

url = "https://archive.vogue.com/issue/20211101"
r = requests.get(url)

soup = BeautifulSoup(r.text, 'html.parser')


images = soup.find('img', class_='bndwgt__issuecover_main')

print(images)

<img alt="Issue: - NOVEMBER 2021 | Vogue" class="bndwgt__issuecover_main" src="https://vogueprod.blob.core.windows.net/vogueoutput20211101thumbnails/Covers/0x600/20211101.jpg"/>


# Save images

In [131]:
import datetime 
from PIL import Image 
from io import BytesIO

# Define the base URL of the website 
# "https://vogueprod.blob.core.windows.net/vogueoutput20231201thumbnails/Covers/0x600/20231201.jpg" 
base_url = "https://vogueprod.blob.core.windows.net/vogueoutput{date}thumbnails/Covers/0x600/{date}.jpg" 

# Define the starting and end years we want to use 
start_year = 2000
end_year = 2023

# create a directory to save the changes 
output_directory = "all_pics/scraped/vogue"
os.makedirs(output_directory, exist_ok=True)

# Loop over every year 
for year in range (start_year, end_year+1):
    for month in range(1,13):
        # create a datetime object for the first day of the month 
        first_day_of_month = datetime.date(year, month,1)
        
        # format the date as YYMM01
        formatted_date = first_day_of_month.strftime("%Y%m01") #strftime converts as datetime object into string
        
        # construct the image URL 
        image_url = base_url.format(date=formatted_date)
        
        # get the image data
        response = requests.get(image_url)
        if response.status_code == 200:
            # open the image using PIL 
            image = Image.open(BytesIO(response.content))
            
            # Define the file path to save the image 
            file_path = os.path.join(output_directory, f"vogue{formatted_date}.jpg")
            
            # Save the image as JPG file 
            image.save(file_path)
            
            # Close the image 
            image.close()
            
            # print a message that the images have been saved 
            #print(f"Saved:vogue{formatted_date}")
            
            # print an error message
        else:
            print(f"Error in saving: vogue{formatted_date}: {response.status_code}")

Error in saving: vogue20200701: 404
Error in saving: vogue20210701: 404
Error in saving: vogue20220701: 404
Error in saving: vogue20230701: 404


# Detect Faces from images

In [140]:
def detect_faces(image, cascade_path="haarcascade_frontalface_default.xml"):
    face_cascade = cv2.CascadeClassifier(cascade_path)
    grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(
            grayscale_image,
            scaleFactor=1.1,
            minNeighbors=5)
    return faces

def get_faces_in_image(image_path, company, image_number):
    image = cv2.imread(image_path)
    print(image_path)
    faces = detect_faces(image)
    face_number = 0
    for (x, y, width, height) in faces:
        cropped = image[y:y + height, x:x + width]
        # Finding the year of the image 
        year = int(image_path.split('/')[-1][5:9]) #first 4 characters after the company name is the year
        file_name = "all_pics/cropped_faces/{0}/{1}{2:08d}_{3}_{4}.jpg".format(company, company, image_number, face_number, year)
        cv2.imwrite(file_name, cropped)
        face_number += 1
    
    # were there any faces found?
    return (face_number != 0)
    
# def get_all_faces(company):
#     directory = "all_pics/scraped/{}".format(company)
#     output_directory = "all_pics/cropped_faces/{}".format(company)
#     os.makedirs(output_directory, exist_ok=True)  # Added error handling for directory creation
#     image_number = 0
#     for image in os.listdir(directory):
#         image_path = "{}/{}".format(directory, image)
#         found_faces = get_faces_in_image(image_path, company, image_number)
#         if (found_faces):
#             image_number += 1
#     return image_number

def get_all_faces(company):
    directory = "all_pics/scraped/{}".format(company)
    output_directory = "all_pics/cropped_faces/{}".format(company)
    os.makedirs(output_directory, exist_ok=True)  # Added error handling for directory creation
    image_number = 0
    for image in os.listdir(directory):
        image_path = "{}/{}".format(directory, image)
        # Skip directories and non-image files
        if not os.path.isfile(image_path) or not image.lower().endswith(('.jpg', '.jpeg', '.png')):
            continue
        found_faces = get_faces_in_image(image_path, company, image_number)
        if (found_faces):
            image_number += 1
    return image_number


In [141]:
faces = get_all_faces("vogue")
print("Found {} faces in Vogue covers".format(faces))

all_pics/scraped/vogue/vogue20080201.jpg
all_pics/scraped/vogue/vogue20000401.jpg
all_pics/scraped/vogue/vogue20180201.jpg
all_pics/scraped/vogue/vogue20100401.jpg
all_pics/scraped/vogue/vogue20030501.jpg
all_pics/scraped/vogue/vogue20130501.jpg
all_pics/scraped/vogue/vogue20060901.jpg
all_pics/scraped/vogue/vogue20160901.jpg
all_pics/scraped/vogue/vogue20050801.jpg
all_pics/scraped/vogue/vogue20150801.jpg
all_pics/scraped/vogue/vogue20120101.jpg
all_pics/scraped/vogue/vogue20020101.jpg
all_pics/scraped/vogue/vogue20190601.jpg
all_pics/scraped/vogue/vogue20090601.jpg
all_pics/scraped/vogue/vogue20061001.jpg
all_pics/scraped/vogue/vogue20161001.jpg
all_pics/scraped/vogue/vogue20051101.jpg
all_pics/scraped/vogue/vogue20151101.jpg
all_pics/scraped/vogue/vogue20201201.jpg
all_pics/scraped/vogue/vogue20061201.jpg
all_pics/scraped/vogue/vogue20161201.jpg
all_pics/scraped/vogue/vogue20231101.jpg
all_pics/scraped/vogue/vogue20201001.jpg
all_pics/scraped/vogue/vogue20110201.jpg
all_pics/scraped

all_pics/scraped/vogue/vogue20160601.jpg
all_pics/scraped/vogue/vogue20060601.jpg
all_pics/scraped/vogue/vogue20230501.jpg
all_pics/scraped/vogue/vogue20200401.jpg
all_pics/scraped/vogue/vogue20091001.jpg
all_pics/scraped/vogue/vogue20191001.jpg
all_pics/scraped/vogue/vogue20070201.jpg
all_pics/scraped/vogue/vogue20170201.jpg
all_pics/scraped/vogue/vogue20040301.jpg
all_pics/scraped/vogue/vogue20140301.jpg
all_pics/scraped/vogue/vogue20220101.jpg
all_pics/scraped/vogue/vogue20090901.jpg
all_pics/scraped/vogue/vogue20190901.jpg
all_pics/scraped/vogue/vogue20101201.jpg
all_pics/scraped/vogue/vogue20001201.jpg
all_pics/scraped/vogue/vogue20160101.jpg
all_pics/scraped/vogue/vogue20060101.jpg
all_pics/scraped/vogue/vogue20230201.jpg
all_pics/scraped/vogue/vogue20200301.jpg
all_pics/scraped/vogue/vogue20011101.jpg
all_pics/scraped/vogue/vogue20111101.jpg
all_pics/scraped/vogue/vogue20021001.jpg
all_pics/scraped/vogue/vogue20121001.jpg
all_pics/scraped/vogue/vogue20070501.jpg
all_pics/scraped

In [144]:
def rename(company):
    # Define the directories
    input_directory = "all_pics/cropped_faces/{}".format(company)
    output_directory = "all_pics/cropped_faces_renamed1/{}".format(company)

    # Create the output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)

    count = 0
    for image in os.listdir(input_directory):
        # Define the file paths
        input_image_path = os.path.join(input_directory, image)
        
        # Skip non-image files
        if not image.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
            print("Skipping non-image file:", input_image_path)
            continue
            
        # Extract the year from the filename
        year = int(image.split('_')[-1].split('.')[0])  # Assuming the year is the last part before the extension

        # Define the output file path
        output_image_path = os.path.join(output_directory, "{}{:08d}_{}.jpg".format(company, count, year))

        # Read the image
        image_data = cv2.imread(input_image_path)
        
        # Check if the image data is valid
        if image_data is not None:
            # Write the image to the output directory with the new name
            cv2.imwrite(output_image_path, image_data)
            count += 1
        else:
            print("Error: Unable to read image:", input_image_path)

In [145]:
rename("vogue")

Skipping non-image file: all_pics/cropped_faces/vogue/.DS_Store


In [146]:
def take_average(image):
    average_row_color = np.average(image, axis=0)
    average_color = np.average(average_row_color, axis=0)
    color_block = np.zeros((100, 100, 3), dtype=np.uint8)
    color_block[:, :] = average_color
    return color_block  

def get_average_colors(company):
    directory = "all_pics/cropped_faces_renamed1/{}".format(company)
    for image in os.listdir(directory):
        image_path = "{}/{}".format(directory, image)
        face = cv2.imread(image_path)
        average_color = take_average(face)
        file_name = "all_pics/average_color/{}/{}".format(company, image)
        cv2.imwrite(file_name, average_color)

In [147]:
get_average_colors("vogue")

# Identify skin and calculate skin color (one image)

In [148]:
from sklearn.cluster import KMeans
from collections import Counter

def cluster_face(image, white_threshold):
    
    # save a patch of pixels from the center of the pic to identify the skin label
    (height, width, three) = image.shape
    (center_x, center_y) = (width // 2, height // 2)
    patch = image[center_x - 10 : center_x + 10, center_y - 10 : center_y + 10]
    flattened_patch = patch.transpose(2, 0, 1). reshape(3, -1).transpose()
    
    # remove white pixels, and fit a classifier
    flattened = image.transpose(2, 0, 1).reshape(3, -1).transpose()
    filtered = np.array([pixel for pixel in flattened if pixel.sum() < white_threshold])
    k_means = KMeans(n_clusters=2)
    k_means.fit(filtered)
    
    # identify the skin label
    patch_labels = k_means.predict(flattened_patch)
    skin_label = Counter(patch_labels).most_common()[0][0]
    
    # save the average skin pixel
    all_labels = k_means.predict(filtered)
    skin_mask = (all_labels == skin_label)
    skin_pixels = filtered[skin_mask]
    average_color = np.average(skin_pixels, axis=0)
    return average_color

In [150]:
# EXAMPLE

# Load the image using OpenCV
image_path = "all_pics/cropped_faces_renamed1/vogue/vogue00000011_2014.jpg"  # Replace this with the path to your image file
image = cv2.imread(image_path)

# Set the white threshold (you may need to adjust this value)
white_threshold = 1000

# Call the cluster_face function
average_color = cluster_face(image, white_threshold)

print("Average skin color:", average_color)

Average skin color: [153.9437748  174.1520551  227.63292656]


  super()._check_params_vs_input(X, default_n_init=10)


# Identify skin and calculate skin color for all images

In [151]:
import csv       
             
def all_cluster_face(folder_path, company_name, white_threshold, output_csv):
    skin_colors = []
    for image_file in os.listdir(folder_path):
        if image_file.endswith(".jpg") or image_file.endswith(".jpeg") or image_file.endswith(".png"):
            image_path = os.path.join(folder_path, image_file)
            image = cv2.imread(image_path)
            if image is not None:
                average_color = cluster_face(image, white_threshold)
                # Extract year from the filename
                year = int(image_file.split('_')[-1].split('.')[0])  # Adjusted to split at the correct underscore
                skin_colors.append((image_file, company_name, year, *average_color))
            else:
                print("Error loading image:", image_path)
    
    # Save skin color values to a CSV file
    with open(output_csv, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Image Name', 'Company Name', 'Year', 'Blue', 'Green', 'Red'])
        writer.writerows(skin_colors)

## Save skin color values in a CSV file

In [152]:
folder_path = "all_pics/cropped_faces_renamed1/vogue"
company_name = 'vogue'
white_threshold = 1000
output_csv = "skintones.csv"
all_cluster_face(folder_path, company_name, white_threshold, output_csv)

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().