In [None]:
from datasets import load_dataset
import requests

#Get CIFAR 100 labels:

ds = load_dataset("uoft-cs/cifar100")

label_names = ds["train"].features["fine_label"].names

#Get ImageNet labels:

url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
labels = requests.get(url).text.strip().split("\n")

objects = set(labels).union(set(label_names))


In [None]:
import pandas as pd

#This is the path to the McRae dataset

df = pd.read_csv("CONCS_FEATS_concstats_brm.txt", delimiter='\t')

In [None]:
import openai

def get_typical_color(object_name, model_version="gpt-4o"):
    """
    Queries GPT to find the typical color of an object.
    If there's no single typical color, GPT should return 'multiple'.
    """
    API_KEY = "YOUR API KEY HERE"

    client = openai.OpenAI(api_key=API_KEY)

    prompt = (
        f"What is the most common color of a '{object_name}'? "
        f"If there is most common color (clothing for example), just reply with 'multiple'. "
        f"Give only one-word answer."
    )

    response = client.chat.completions.create(
        model=model_version,
        messages=[
            {"role": "system", "content": "You're a visual and color expert."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=10,
        temperature=0,
        top_p=1
    )

    return response.choices[0].message.content.strip().lower()


In [None]:
import pandas as pd

concepts = df["Concept"].unique()
color_results = {}

for concept in concepts:
    color = get_typical_color(concept)
    color_results[concept] = color

# Optional: convert to DataFrame
color_df = pd.DataFrame(list(color_results.items()), columns=["Concept", "Typical_Color"])


In [None]:
color_df["Typical_Color"].value_counts()

In [None]:
color_df = color_df[color_df["Typical_Color"].str.replace(".", "") != "multiple"]

In [None]:
color_df["Concept"].values

In [None]:
# get unique objects! 
objects = objects - set(color_df["Concept"].values)

In [None]:
len(objects)

In [None]:
import pandas as pd

concepts = list(objects)
color_results = {}

for concept in concepts:
    color = get_typical_color(concept)
    print(color)
    color_results[concept] = color

# Optional: convert to DataFrame
color_df2 = pd.DataFrame(list(color_results.items()), columns=["Concept", "Typical_Color"])


In [None]:
df = pd.concat([color_df, color_df2])

In [None]:
df = df[df["Typical_Color"].str.replace(".", "") != "multiple"]

In [None]:
df["Typical_Color"] = df["Typical_Color"].str.replace(".", "")

In [None]:
df["Typical_Color"] = df["Typical_Color"].str.replace("gray", "grey").str.replace("golden", "gold").str.replace("olive", "green").str.replace("reddish", "red").str.replace("yellowish", "yellow").str.replace("chestnut", "brown").str.replace("beige", "tan")




In [None]:
df["Typical_Color"].value_counts()

In [None]:
valid_colors = [
    "white", "black", "brown", "green", "grey", "red", "silver", "yellow",
    "orange", "blue", "pink", "gold", "brass", 
    "tan",  "burgundy", "purple"
]

df = df[df["Typical_Color"].isin(valid_colors)]


In [None]:
df

In [None]:
import requests
import pandas as pd

# Replace these with your actual credentials
API_KEY = "YOUR GOOGLE API KEY"  # Replace with your API Key
CX_ID = "YOUR GOOGLE CX ID"  # Your CX ID from the script


In [None]:
import requests
import pandas as pd
import os
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt

# Create a folder to store downloaded images
os.makedirs("downloaded_images", exist_ok=True)

# Function to fetch and download images
def fetch_and_download_images(object_name, color, num_results=3):
    QUERY = f"a {color} {object_name} on a white background"
    URL = f"https://www.googleapis.com/customsearch/v1?q={QUERY}&cx={CX_ID}&key={API_KEY}&searchType=image&num={num_results}"

    image_urls = []
    image_paths = []

    try:
        response = requests.get(URL)
        data = response.json()

        if "items" in data:
            image_urls = [item["link"] for item in data["items"][:num_results]]
            for i, image_url in enumerate(image_urls):
                image_response = requests.get(image_url)
                if image_response.status_code == 200:
                    # Sanitize filename
                    safe_name = f"{color}_{object_name}".replace(" ", "_").replace("/", "_")
                    image_path = f"downloaded_images/{safe_name}_{i+1}.jpg"
                    with open(image_path, "wb") as f:
                        f.write(image_response.content)
                    image_paths.append(image_path)
        return image_urls, image_paths
    except Exception as e:
        print(f"Error fetching images for {color} {object_name}: {e}")
        return [], []

# Apply the function to your DataFrame
df[["image_url", "image_path"]] = df.apply(
    lambda row: pd.Series(fetch_and_download_images(row["Concept"], row["Typical_Color"])), axis=1
)


In [None]:
from PIL import Image
import matplotlib.pyplot as plt

# Loop through the DataFrame and display each image
for i, row in df.iterrows():
    if row["image_path"]:  # Check if there are image paths
        for j, image_path in enumerate(row["image_path"]):  # Iterate through all images
            print(f"Displaying: {image_path}")  # Print file name

            # Open and display the image
            try:
                image = Image.open(image_path)
                plt.figure()  # Create a new figure for each image
                plt.imshow(image)
                plt.axis("off")  # Hide axes
                #plt.title(f"{row['correct_answer']} {row['correct_object']} - Image {j+1}")  # Title with object, color, and number
                plt.show()  # Show the image
            except Exception as e:
                print(f"Error displaying {image_path}: {e}")


In [None]:
# Step 1: Expand the dataframe so each image has its own row

# Explode the 'image_path' column while keeping other data duplicated
df_expanded = df.explode("image_path").reset_index(drop=True)

# Create an empty column for the GPT API responses
df_expanded["gpt_response"] = None

df_expanded

In [None]:
# Step 2: Define the function to query the GPT API for each image

import openai
import base64

def evaluate_image_with_gpt(image_path, object_name, color, model_version="gpt-4o"):
    """
    Queries the GPT API to analyze the image and answer specific questions.
    """
    API_KEY = "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"
    
    client = openai.OpenAI(api_key=API_KEY)
    
    # Encode image to base64
    with open(image_path, "rb") as image_file:
        image_data = base64.b64encode(image_file.read()).decode("utf-8")
    
    # Define the questions
    questions = [
        f"1. Is this an image of a {color} {object_name}? Answer with 'yes' or 'no'.",
        "2. Is this image on a white background? Answer with 'yes' or 'no'.",
        "3. Is this image an illustration (cartoon, clipart, painting) or a realistic image? Answer with 'illustration' or 'realistic'.",
        f"4. On a scale of 1 through 10, 1 being not realistic and 10 being realistic, how realistic is this {object_name}? Answer with the number.'"
    ]
    
    # Send request to GPT API
    response = client.chat.completions.create(
        model=model_version,
        messages=[
            {"role": "system", "content": "Analyze the image and answer the following questions."},
            {"role": "user", "content": [
                {"type": "text", "text": "\n".join(questions)},
                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
            ]}
        ],
        max_tokens=1000,
        temperature=0,
        top_p=0
    )
    
    return response.choices[0].message.content


In [None]:
df_expanded

In [None]:
# Step 3: Loop through each row in the expanded dataframe and query GPT API

for i, row in df_expanded.iterrows():
    image_path = row["image_path"]
    object_name = row["Concept"]
    color = row["Typical_Color"]  # Assuming the color is stored in this column
    
    try:
        gpt_response = evaluate_image_with_gpt(image_path, object_name, color)
        df_expanded.at[i, "gpt_response"] = gpt_response  # Store response in dataframe
    except Exception as e:
        df_expanded.at[i, "gpt_response"] = f"Error: {e}"  # Store error if any


In [None]:
df_expanded["gpt_response"]

In [None]:
df_expanded.to_csv("gpt_ranked_images.csv", index = False)

In [None]:
import pandas as pd 

df_expanded  = pd.read_csv("gpt_ranked_images.csv")

In [None]:
import re

# Function to calculate score based on GPT response
def calculate_score(response):
    """
    Calculates the score based on the response format.
    """
    if not isinstance(response, str):
        return 0  # Handle cases where response is not a string

    # Extract responses using regex
    match = re.findall(r"\d+\.\s(Yes|No|Illustration|Realistic|\d+)", response)

    if len(match) < 4:
        return 0  # If responses are incomplete, return 0

    # Extract individual responses
    answer_1, answer_2, answer_3, answer_4 = match

    # If answer to Q1 is "No", score is 0
    if answer_1.lower() == "no":
        return 0

    # Otherwise, calculate the score
    score = 0
    if answer_2.lower() == "yes":
        score += 10
    if answer_3.lower() == "realistic":
        score += 10
    if answer_4.isdigit():
        score += int(answer_4)

    return score

# Apply scoring function to the GPT response column
df_expanded["score"] = df_expanded["gpt_response"].apply(calculate_score)

In [None]:
df_expanded["score"]

In [None]:
# Convert list columns into strings to make them hashable for grouping
df_expanded_fixed = df_expanded.copy()

for col in df_expanded_fixed.columns:
    if df_expanded_fixed[col].apply(lambda x: isinstance(x, list)).any():
        df_expanded_fixed[col] = df_expanded_fixed[col].apply(lambda x: tuple(x) if isinstance(x, list) else x)

# Identify the columns to group by (excluding image_path, gpt_response, score)
grouping_keys = df_expanded_fixed.columns.difference(["image_path", "gpt_response", "score"]).tolist()

# Find the index of the row with the highest score for each group
best_score_idx = df_expanded_fixed.groupby(grouping_keys)["score"].idxmax()

# Select only the best-scoring rows
df_best_scores = df_expanded_fixed.loc[best_score_idx].reset_index(drop=True)



In [None]:
# Convert both columns to lowercase for consistency
objects = df["correct_object"].str.lower().tolist()
concepts = df_best_scores["Concept"].str.lower().tolist()

# Store any overlaps
overlaps = []

for obj in objects:
    for concept in concepts:
        if obj == concept:  # optional: skip exact matches
            overlaps.append((obj, concept))

# Convert to DataFrame to inspect
overlap_df = pd.DataFrame(overlaps, columns=["correct_object", "Concept"])


In [None]:
df_best_scores = df_best_scores[~df_best_scores["Concept"].isin(overlap_df["Concept"].values)]

In [None]:
# Convert both columns to lowercase for consistency
objects = df["correct_object"].str.lower().tolist()
concepts = df_best_scores["Concept"].str.lower().tolist()

# Store any overlaps
overlaps = []

for obj in objects:
    for concept in concepts:
        if obj in concept:  # optional: skip exact matches
            overlaps.append((obj, concept))

# Convert to DataFrame to inspect
overlap_df = pd.DataFrame(overlaps, columns=["correct_object", "Concept"])


In [None]:
overlap_df

In [None]:
overlap_df[overlap_df["Concept"] == 'american lobster']

In [None]:
drop_list = ['african elephant', 'indian elephant','american black bear', 'brown bear','ice bear','king penguin',
            'bell pepper', 'grand piano','oak_tree', 'great grey owl', 'soup bowl','grey fox',
       'red fox', 'keyboard', 'american alligator', 'common iguana', 'pine_tree', 'mud turtle', 'custard apple',
            'american lobster', 'school bus', 'willow_tree']

In [None]:
len(df_best_scores)

In [None]:
df_best_scores = df_best_scores[~df_best_scores["Concept"].str.lower().isin(drop_list)]

In [None]:
df_best_scores = df_best_scores.drop_duplicates("Concept")

In [None]:
df_failed  = df_best_scores[df_best_scores["score"] == 0]

In [3]:
#ROUND TWO: for failed images, retry! 

In [None]:
import requests
import pandas as pd
import os
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt

# Create a folder to store downloaded images
os.makedirs("downloaded_images", exist_ok=True)

# Function to fetch image URLs and download multiple images
def fetch_and_download_images(object_name, color, num_results=3):
    QUERY = f"a {object_name} on a white background" #{color} 
    URL = f"https://www.googleapis.com/customsearch/v1?q={QUERY}&cx={CX_ID}&key={API_KEY}&searchType=image&num={num_results}"

    image_urls = []
    image_paths = []

    try:
        response = requests.get(URL)
        data = response.json()

        # Extract image URLs
        if "items" in data:
            image_urls = [item["link"] for item in data["items"][:num_results]]  # Get first 'num_results' images
            
            # Download images
            for i, image_url in enumerate(image_urls):
                image_response = requests.get(image_url)
                
                if image_response.status_code == 200:
                    image_path = f"downloaded_images/{color}_{object_name}_{i+1}.jpg"
                    with open(image_path, "wb") as f:
                        f.write(image_response.content)
                    
                    image_paths.append(image_path)  # Store the file path
        return image_urls, image_paths  # Return both lists
    except Exception as e:
        print(f"Error fetching images for {color} {object_name}: {e}")
        return [], []
# Fetch image URLs and download images
df_failed[["image_url", "image_path"]] = df_failed.apply(
    lambda row: pd.Series(fetch_and_download_images(row["Concept"], row["Typical_Color"])), axis=1
)



In [None]:
df_expanded_failed = df_failed.explode("image_path").reset_index(drop=True)

# Create an empty column for the GPT API responses
df_expanded_failed["gpt_response"] = None

df_expanded_failed

In [None]:
for i, row in df_expanded_failed.iterrows():
    image_path = row["image_path"]
    object_name = row["Concept"]
    color = row["Typical_Color"]  # Assuming the color is stored in this column
    
    try:
        gpt_response = evaluate_image_with_gpt(image_path, object_name, color)
        df_expanded_failed.at[i, "gpt_response"] = gpt_response  # Store response in dataframe
    except Exception as e:
        df_expanded_failed.at[i, "gpt_response"] = f"Error: {e}"  # Store error if any


In [None]:
df_expanded_failed["score"] = df_expanded_failed["gpt_response"].apply(calculate_score)

In [None]:
df_expanded_failed["score"]

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

# Loop through the DataFrame and display each image
for i, row in df_expanded_failed.iterrows():
    if row["image_path"]:  # Check if there are image paths
        #for j, image_path in enumerate(row["image_path"]):  # Iterate through all images
        image_path = row["image_path"]
        print(f"Displaying: {image_path}")  # Print file name
        score = row["score"]
        print(f"Score: {score}")
        # Open and display the image
        try:
            image = Image.open(image_path)
            plt.figure()  # Create a new figure for each image
            plt.imshow(image)
            plt.axis("off")  # Hide axes
            #plt.title(f"{row['correct_answer']} {row['correct_object']} - Image {j+1}")  # Title with object, color, and number
            plt.show()  # Show the image
        except Exception as e:
            print(f"Error displaying {image_path}: {e}")


In [None]:
len(df_expanded_failed)

In [None]:
df_expanded_failed = df_expanded_failed[df_expanded_failed["image_path"] != "downloaded_images/black_binoculars_1.jpg"]
df_expanded_failed = df_expanded_failed[df_expanded_failed["image_path"] != "downloaded_images/brown_espresso_2.jpg"]
df_expanded_failed = df_expanded_failed[df_expanded_failed["image_path"] != "downloaded_images/grey_vault_1.jpg"]
df_expanded_failed = df_expanded_failed[df_expanded_failed["image_path"] != "downloaded_images/brown_worm fence_3.jpg"]


In [None]:
# Convert list columns into strings to make them hashable for grouping
df_expanded_failed = df_expanded_failed.copy()

for col in df_expanded_failed.columns:
    if df_expanded_failed[col].apply(lambda x: isinstance(x, list)).any():
        df_expanded_failed[col] = df_expanded_failed[col].apply(lambda x: tuple(x) if isinstance(x, list) else x)

# Identify the columns to group by (excluding image_path, gpt_response, score)
grouping_keys = df_expanded_failed.columns.difference(["image_path", "gpt_response", "score"]).tolist()

# Find the index of the row with the highest score for each group
best_score_idx = df_expanded_failed.groupby(grouping_keys)["score"].idxmax()

# Select only the best-scoring rows
df_best_scores_failed = df_expanded_failed.loc[best_score_idx].reset_index(drop=True)
df_best_scores_failed

In [None]:
len(df_best_scores_failed[df_best_scores_failed["score"] == 0])

In [None]:
df_final = pd.concat([df_best_scores[df_best_scores["score"] != 0], df_best_scores_failed])

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

# Loop through the DataFrame and display each image
for i, row in df_final.iterrows():
    if row["image_path"]:  # Check if there are image paths
        #for j, image_path in enumerate(row["image_path"]):  # Iterate through all images
        image_path = row["image_path"]
        print(f"Displaying: {image_path}")  # Print file name
        score = row["score"]
        print(f"Score: {score}")
        # Open and display the image
        try:
            image = Image.open(image_path)
            plt.figure()  # Create a new figure for each image
            plt.imshow(image)
            plt.axis("off")  # Hide axes
            #plt.title(f"{row['correct_answer']} {row['correct_object']} - Image {j+1}")  # Title with object, color, and number
            plt.show()  # Show the image
        except Exception as e:
            print(f"Error displaying {image_path}: {e}")


In [None]:
df_final["correct_object"] = df_final["Concept"]
df_final["correct_answer"] = df_final["Typical_Color"]

In [None]:
columns = list(set(df.columns).intersection(set(df_final.columns)))

In [None]:
import numpy as np
# Define thresholds
black_threshold = 30  # RGB values ≤ 30 are considered black
black_ratio_threshold = 0.7  # 90% of the last row must be black
blue_color = (14, 119, 176)  # The specific blue color to detect
blue_tolerance = 30  # Allow some variation in blue detection
blue_ratio_threshold = 0.7  # 90% of the last row must be blue

# List to store images with black or blue in the last row
valid_images = []

for index, row in df.iterrows():
    image_path = row.get("image_path")
    
    if not image_path:
        continue  # Skip if no image path is provided
    
    try:
        image = Image.open(image_path).convert("RGB")  # Convert to RGB
        image_array = np.array(image)
        
        # Get the last row of pixels
        last_row = image_array[-1, :, :]

        # Detect black pixels
        black_pixels = np.sum(np.all(last_row <= black_threshold, axis=1))

        # Detect blue pixels (within tolerance range)
        blue_pixels = np.sum(
            (np.abs(last_row[:, 0] - blue_color[0]) <= blue_tolerance) &
            (np.abs(last_row[:, 1] - blue_color[1]) <= blue_tolerance) &
            (np.abs(last_row[:, 2] - blue_color[2]) <= blue_tolerance)
        )

        total_pixels = last_row.shape[0]

        # Check if at least 90% of the last row is black or blue
        if (black_pixels / total_pixels >= black_ratio_threshold) or (blue_pixels / total_pixels >= blue_ratio_threshold):
            valid_images.append((image_path, image))  # Store path and image object

    except Exception as e:
        print(f"Error processing {image_path}: {e}")

# Display valid images
if valid_images:
    for _, img in valid_images:
        display(img)
else:
    print("No images found with 90% black or blue pixels in the last row.")

# List to store cropped images with their paths
cropped_images = []

for image_path, image in valid_images:
    try:
        image_array = np.array(image)

        # Identify rows that are mostly black (90% of pixels or more)
        black_rows = np.sum(np.all(image_array <= black_threshold, axis=2), axis=1) / image_array.shape[1] >= black_ratio_threshold

        # Identify rows that are mostly blue (90% of pixels within tolerance)
        blue_rows = np.sum(
            (np.abs(image_array[:, :, 0] - blue_color[0]) <= blue_tolerance) &
            (np.abs(image_array[:, :, 1] - blue_color[1]) <= blue_tolerance) &
            (np.abs(image_array[:, :, 2] - blue_color[2]) <= blue_tolerance),
            axis=1
        ) / image_array.shape[1] >= blue_ratio_threshold

        # Iterate from the bottom up to find the first non-black or non-blue row
        crop_end = image_array.shape[0]  # Default to full image height

        for i in range(image_array.shape[0] - 1, -1, -1):
            if black_rows[i]:  # If it's a black row, continue moving up
                continue
            elif blue_rows[i]:  # If it's a blue row, continue moving up
                continue
            else:
                crop_end = i + 1  # Stop cropping here
                break

        # Ensure we don't crop the entire image to zero height
        if crop_end <= 0:
            print(f"Skipping {image_path} to avoid empty image.")
            cropped_images.append((image_path, image))
            continue

        # Crop the image from the top to the last valid row
        cropped_image = image.crop((0, 0, image.width, crop_end))

        cropped_images.append((image_path, cropped_image))

    except Exception as e:
        print(f"Error processing {image_path}: {e}")


In [None]:

# Display cropped images
if cropped_images:
    for _, img in cropped_images:
        display(img)
else:
    print("No images were cropped.")

# Save cropped images back to their original paths**
for image_path, cropped_image in cropped_images:
    try:
        cropped_image.save(image_path)
        print(f"Saved cropped image to {image_path}")
    except Exception as e:
        print(f"Error saving cropped image {image_path}: {e}")


In [None]:
df_final.to_csv("final_images.csv", index=False)