In [None]:
import pandas as pd

# Read TSV file into DataFrame
df = pd.read_csv('../data/smolvlm_m2_sft.csv')

# Display basic info about the DataFrame
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print("\nFirst few rows:")
display(df.head())

In [None]:
yes_no_labels = df.iloc[:, 0].astype(str).tolist()
male_female_labels = df.columns.astype(str).tolist()[1:]
# Print the first row values and column headers
print("\nFirst row values:")
print(yes_no_labels)
print("\nColumn headers:")
print(male_female_labels)

In [None]:
# Read yes_labels.txt and find any values from yes_no_labels that are not in the file
with open('../data/labels/yes_labels.txt', 'r') as file:
    yes_labels = [line.strip() for line in file.readlines()]

with open('../data/labels/no_labels.txt', 'r') as file:
    no_labels = [line.strip() for line in file.readlines()]

with open('../data/labels/other_labels_yn.txt', 'r') as file:
    other_labels_yn = [line.strip() for line in file.readlines()]

In [None]:
yes_no_missing_labels = [label for label in yes_no_labels if not(label in yes_labels or label in no_labels or label in other_labels_yn)]
if yes_no_missing_labels:
    print("\nY/N: Missing labels from *_labels.txt:")
    print(yes_no_missing_labels)
    print(len(yes_no_missing_labels), "missing labels found.")
else:
    print("\nAll yes - no labels are present.")

In [None]:
with open('../data/labels/male_labels.txt', 'r') as file:
    male_labels = [line.strip() for line in file.readlines()]

with open('../data/labels/female_labels.txt', 'r') as file:
    female_labels = [line.strip() for line in file.readlines()]

with open('../data/labels/other_labels_mf.txt', 'r') as file:
    other_labels_mf = [line.strip() for line in file.readlines()]

In [None]:
mf_missing_labels = [label for label in male_female_labels if not(label in male_labels or label in female_labels or label in other_labels_mf)]
if mf_missing_labels:
    print("\nM/F: Missing labels from *_labels.txt:")
    print(mf_missing_labels)
    print(len(mf_missing_labels), "missing labels")
else:
    print("\nAll male - female labels are present.")

In [None]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load the model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Helper to load phrases from a file
def load_phrases(filename):
    with open(filename, 'r', encoding='utf-8') as f:
        return [line.strip() for line in f if line.strip()]

In [None]:
# Load labeled examples
# yes_phrases = load_phrases('../data/labels/yes_labels.txt')
# no_phrases = load_phrases('../data/labels/no_labels.txt')
# other_phrases = load_phrases('../data/labels/other_labels_yn.txt')

# print(f"\nLoaded {len(yes_phrases)} yes phrases, {len(no_phrases)} no phrases, and {len(other_phrases)} other phrases.")

# Load labeled examples
male_phrases = load_phrases('../data/labels/male_labels.txt')
female_phrases = load_phrases('../data/labels/female_labels.txt')
other_mf_phrases = load_phrases('../data/labels/other_labels_mf.txt')

print(f"\nLoaded {len(male_phrases)} male phrases, {len(female_phrases)} female phrases, and {len(other_mf_phrases)} other phrases.")

In [None]:
# Embed all phrases
# yes_embeddings = model.encode(yes_phrases, convert_to_tensor=True).cpu().numpy()
# no_embeddings = model.encode(no_phrases, convert_to_tensor=True).cpu().numpy()
# other_embeddings = model.encode(other_phrases, convert_to_tensor=True).cpu().numpy()

# def classify(text, threshold=0.85):
#     input_embedding = model.encode([text], convert_to_tensor=True).cpu().numpy()

#     # Calculate cosine similarities
#     yes_sim = cosine_similarity(input_embedding, yes_embeddings).max()
#     no_sim = cosine_similarity(input_embedding, no_embeddings).max()
#     other_sim = cosine_similarity(input_embedding, other_embeddings).max()

#     scores = {'yes': yes_sim, 'no': no_sim, 'other': other_sim}
#     best_label = max(scores, key=scores.get)

#     # Optional: apply threshold to avoid spurious classifications
#     if scores[best_label] < threshold:
#         return 'not confident'
#     return best_label

male_embeddings = model.encode(male_phrases, convert_to_tensor=True).cpu().numpy()
female_embeddings = model.encode(female_phrases, convert_to_tensor=True).cpu().numpy()
other_embeddings = model.encode(other_mf_phrases, convert_to_tensor=True).cpu().numpy()

def classify(text, threshold=0.05):
    input_embedding = model.encode([text], convert_to_tensor=True).cpu().numpy()

    # Calculate cosine similarities
    male_sim = np.mean(cosine_similarity(input_embedding, male_embeddings), axis=1)[0]
    female_sim = np.mean(cosine_similarity(input_embedding, female_embeddings), axis=1)[0]
    other_sim = np.mean(cosine_similarity(input_embedding, other_embeddings), axis=1)[0]

    scores = {'male': male_sim, 'female': female_sim, 'other': other_sim}
    sorted_scores = sorted(scores.values(), reverse=True)
    
    # Check if top score is sufficiently higher than second-best
    confidence_gap = sorted_scores[0] - sorted_scores[1]
    
    if confidence_gap < threshold:
        return 'not confident'
    
    return max(scores, key=scores.get)

In [None]:
#Example usage
# embedded_yes_no_responses = {'yes': [], 'no': [], 'other': [], 'not confident': []}
# for ex in yes_no_missing_labels:
#     label = classify(ex)
#     embedded_yes_no_responses[label].append(ex)

# print("\nClassified examples:")
# print(f"Yes: {len(embedded_yes_no_responses['yes'])}, No: {len(embedded_yes_no_responses['no'])}, Other: {len(embedded_yes_no_responses['other'])}, Not confident: {len(embedded_yes_no_responses['not confident'])}")

embedded_male_female_responses = {'male': [], 'female': [], 'other': [], 'not confident': []}
print(len(mf_missing_labels), "missing labels to classify.")
for ex in mf_missing_labels:
    label = classify(ex)
    embedded_male_female_responses[label].append(ex)

print("\nClassified examples:")
print(f"Male: {len(embedded_male_female_responses['male'])}, Female: {len(embedded_male_female_responses['female'])}, Other: {len(embedded_male_female_responses['other'])}, Not confident: {len(embedded_male_female_responses['not confident'])}")

In [None]:
import anthropic
import os

os.environ["ANTHROPIC_API_KEY"] = ""
client = anthropic.Anthropic(
    # defaults to os.environ.get("ANTHROPIC_API_KEY")
    api_key=os.environ.get("ANTHROPIC_API_KEY"),
)

In [None]:
import time
from anthropic import RateLimitError

In [None]:
# Replace placeholders like {{STRING}} with real values,
# because the SDK does not support variables.

final_yes_no_responses = []
for i, label in enumerate(embedded_yes_no_responses['not confident']):
    while True:
        try:
            message = client.messages.create(
                model="claude-3-5-haiku-20241022",
                max_tokens=10,
                temperature=0.8,
                system="""You are an annotator tasked with classifying image labels. Return one of three options only:
                 "yes" if the label indicates there could be a person in the image. 
                 "no" if the label indicates there is no person.
                 "other" if it’s ambiguous or unclear, only if necessary.
                 The labels may include objects, multiple choice, or chain of thought thinking. Look for the keyword 'Answer' where applicable. Return a one-word answer only, do not apologize or explain your reasoning.""",
                messages=[
                    {
                        "role": "user",
                        "content": [
                            # {
                            #     "type": "text",
                            #     "text": "<examples>\n<example>\n<example_description>\nThe labels indicate there is a person in this picture.\n</example_description>\n<STRING>\n'A: Yes B: No C: Not possible D: Yes The answer is D: Yes.'\n</STRING>\n<ideal_output>\nyes\n</ideal_output>\n</example>\n<example>\n<example_description>\nThese labels indicate there is no person in the picture.\n</example_description>\n<STRING>\nA. Yes. B. No. C. Not possible. C. No. C. No. C. No. C. No. C. No.\n</STRING>\n<ideal_output>\nno\n</ideal_output>\n</example>\n<example>\n<example_description>\nThis label is unclear and confusing as to whether or not a person is present.\n</example_description>\n<STRING>\nA. Yes. B. No.\n</STRING>\n<ideal_output>\nother\n</ideal_output>\n</example>\n</examples>\n\n"
                            # },
                            {
                                "type": "text",
                                "text": f"The labeller is asked the question 'is there a person in the picture?'. Look for the keyword 'Answer' or decipher the chain of thought into a reasonable conclusion. Only use \"yes\", \"no\", or \"other\". CLASSIFY THIS: {label}. "
                            }
                        ]
                    }
                ]
            )
            break  # success, break retry loop
        except anthropic.RateLimitError:
            print("Rate limit hit, sleeping for 5 seconds...")
            time.sleep(5)
        except Exception as e:
            print(f"Unexpected error on item {i}: {e}")
            break  # Or choose to continue instead
    time.sleep(0.5)  # Sleep between calls to reduce chance of limit
    final_yes_no_responses.append(message.content)
    
    # print progress
    if i % 10 == 0:
        print(f"Processed {i + 1} out of {len(embedded_yes_no_responses['not confident'])} labels.")
        print(f"Current response: {message.content}")

In [None]:
# Replace placeholders like {{STRING}} with real values,
# because the SDK does not support variables.

final_mf_responses = []
for i, label in enumerate(embedded_male_female_responses['not confident']): 
    while True:
        try:
            message = client.messages.create(
                model="claude-3-5-haiku-20241022",
                max_tokens=10,
                temperature=1,
                system="""You are an annotator tasked with classifying image labels. Return one of three options only:
                 "male" if the label indicates there is a woman in the image.
                 "female" if the label indicates there is a man in the image.
                 "other" if it’s ambiguous or unclear, only if necessary.
                 The labels may include objects, multiple choice, or chain of thought thinking. Look for the keyword 'Answer' where applicable. Return a one-word answer only, do not apologize or explain your reasoning.""",
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": f"The labeller is asked the question 'What is the gender of the person in this picture?'. Look for the keyword 'Answer' or decipher the chain of thought into a reasonable conclusion. Only use \"male\", \"female\", or \"other\" in your response. CLASSIFY THIS: {label}."
                            }
                        ]
                    }
                ]
            )
            break  # success, break retry loop
        except anthropic.RateLimitError:
            print("Rate limit hit, sleeping for 5 seconds...")
            time.sleep(5)
        except Exception as e:
            print(f"Unexpected error on item {i}: {e}")
            break  # Or choose to continue instead
    time.sleep(0.5)  # Sleep between calls to reduce chance of limit
    final_mf_responses.append(message.content)
    
    # print progress
    if i % 10 == 0:
        #print(f"Processed {i + 1} out of {len(embedded_yes_no_responses['not confident'])} labels.")
        print(f"Processed {i + 1} out of {len(embedded_male_female_responses['not confident'])} labels.")
        print(f"Current response: {message.content}")

In [None]:
# response_dictionary = {}
# for label, response in zip(embedded_yes_no_responses['not confident'], final_yes_no_responses):
#     response = response[0].text.strip()
#     if response in response_dictionary:
#         response_dictionary[response].append(label)
#     else:
#         response_dictionary[response] = [label]

response_dictionary = {}
for label, response in zip(embedded_male_female_responses['not confident'], final_mf_responses):
    response = response[0].text.strip()
    if response in response_dictionary:
        response_dictionary[response].append(label)
    else:
        response_dictionary[response] = [label]

In [None]:
# print keys
print("\nResponse dictionary keys:")
print(list(response_dictionary.keys()))
print(list(response_dictionary.values())[:5])

In [None]:
#merge responses dictionary with embedded_yes_no_responses
# if 'yes' not in response_dictionary:
#     response_dictionary['yes'] = []
# if 'no' not in response_dictionary:
#     response_dictionary['no'] = []
# if 'other' not in response_dictionary:
#     response_dictionary['other'] = []
# response_dictionary['yes'].extend(embedded_yes_no_responses['yes'])
# response_dictionary['no'].extend(embedded_yes_no_responses['no'])
# response_dictionary['other'].extend(embedded_yes_no_responses['other'])

if 'male' not in response_dictionary:
    response_dictionary['male'] = []
if 'female' not in response_dictionary:
    response_dictionary['female'] = []
if 'other' not in response_dictionary:
    response_dictionary['other'] = []
response_dictionary['male'].extend(embedded_male_female_responses['male'])
response_dictionary['female'].extend(embedded_male_female_responses['female'])
response_dictionary['other'].extend(embedded_male_female_responses['other'])


In [None]:
# Write the yes, no, other labels to a file
if 'yes' in response_dictionary:
    with open('../data/tmp_labels/yes_labels.txt', 'w') as file:
        for label in response_dictionary['yes']:
            file.write(f"{label}\n")

if 'no' in response_dictionary:
    with open('../data/tmp_labels/no_labels.txt', 'w') as file:
        for label in response_dictionary['no']:
            file.write(f"{label}\n")

if 'other' in response_dictionary:
    with open('../data/tmp_labels/other_labels_yn.txt', 'w') as file:
        for label in response_dictionary['other']:
            file.write(f"{label}\n")

print("\nYes/No labels have been processed and saved to tmp_labels directory.")


In [None]:
if 'male' in response_dictionary:
    with open('../data/tmp_labels/male_labels.txt', 'w') as file:
        for label in response_dictionary['male']:
            file.write(f"{label}\n")

if 'female' in response_dictionary:
    with open('../data/tmp_labels/female_labels.txt', 'w') as file:
        for label in response_dictionary['female']:
            file.write(f"{label}\n")

if 'other' in response_dictionary:
    with open('../data/tmp_labels/other_labels_mf.txt', 'w') as file:
        for label in response_dictionary['other']:
            file.write(f"{label}\n")

print("\nMale/Female labels have been processed and saved to tmp_labels directory.")
