In [1]:
pip install pandas torch torchvision transformers Pillow



In [34]:
import pandas as pd
import base64
from io import BytesIO
from PIL import Image
import requests
from transformers import CLIPProcessor, CLIPModel

# Load CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Define categories with prompts
categories = {
    "safe for children": [
        "This image is suitable for children.",
        "This image contains friendly and educational content.",
        "This image is child-appropriate.",
        "This image contains no harmful or inappropriate content."
    ],
    "violence": [
        "This image depicts violence.",
        "This image contains weapons.",
        "This image depicts fights or bullying.",
        "This image contains blood or gore."
    ],
    "sexual content": [
        "This image contains nudity.",
        "This image is sexually explicit.",
        "This image is pornographic.",
        "This image contains inappropriate sexual content."
    ],
    "scary or shocking": [
        "This image is scary or disturbing.",
        "This image depicts horror or fear.",
        "This image is dark and creepy.",
        "This image is shocking or disgusting."
    ]
}

# Function to process image from "Image URL" (Base64 or HTTP URL)
def process_image(src):
    if src.startswith("data:image"):  # Base64-encoded image
        base64_data = src.split(",")[1]  # Extract Base64 data
        image_data = base64.b64decode(base64_data)
        image = Image.open(BytesIO(image_data)).convert("RGB")
    elif src.startswith("http"):  # URL of the image
        response = requests.get(src, stream=True)
        if response.status_code == 200:
            image = Image.open(response.raw).convert("RGB")
        else:
            raise ValueError(f"Failed to fetch image from URL: {src}")
    else:
        raise ValueError("Unsupported image format.")
    return image


In [35]:

# Function to classify an image using CLIP
def classify_image(image):
    # Flatten prompts
    prompts = []
    labels = []
    for category, descriptions in categories.items():
        prompts.extend(descriptions)
        labels.extend([category] * len(descriptions))

    # Process inputs for CLIP
    inputs = processor(text=prompts, images=image, return_tensors="pt", padding=True)
    outputs = model(**inputs)

    # Get logits and probabilities
    logits_per_image = outputs.logits_per_image  # Logits for image-text pairs
    probs = logits_per_image.softmax(dim=1)  # Convert to probabilities

    # Calculate summed probabilities
    category_probs_sum = {}
    for i, label in enumerate(labels):
        category_probs_sum[label] = category_probs_sum.get(label, 0) + probs[0][i].item()

    # Find the category with the maximum summed probability
    most_likely_label = max(category_probs_sum, key=category_probs_sum.get)
    confidence = category_probs_sum[most_likely_label]

    return most_likely_label, confidence, category_probs_sum

In [38]:

# Load the CSV file
csv_path = "/content/drive/MyDrive/filter_image_metadata.csv"  # Replace with your actual CSV file path
data = pd.read_csv(csv_path)

# Initialize results lists
labels = []
confidences = []
categories_probs = []
safe_60_flags = []
safe_70_flags = []
safe_80_flags = []
safe_50_60_flags = []

# Classify each image
for i, row in data.iterrows():
    try:
        # Process the image and classify
        image = process_image(row["Image URL"])  # Replace "Image URL" with your column name
        label, confidence, category_probs = classify_image(image)

        # Append classification results
        labels.append(label)
        confidences.append(confidence)
        categories_probs.append(category_probs)

        # Apply thresholds
        safe_60_flags.append(label == "safe for children" and confidence >= 0.6)
        safe_70_flags.append(label == "safe for children" and confidence >= 0.7)
        safe_80_flags.append(label == "safe for children" and confidence >= 0.8)
        safe_50_60_flags.append(label == "safe for children" and 0.5 <= confidence < 0.6)

    except Exception as e:
        print(f"Error processing row {i}: {e}")
        labels.append("Error")
        confidences.append(0)
        categories_probs.append({})
        safe_60_flags.append(False)
        safe_70_flags.append(False)
        safe_80_flags.append(False)
        safe_50_60_flags.append(False)

# Add results to the DataFrame
data["most_likely_label"] = labels
data["confidence"] = confidences
data["category_probs"] = categories_probs
data["safe_60"] = safe_60_flags
data["safe_70"] = safe_70_flags
data["safe_80"] = safe_80_flags
data["safe_50_60_flags"] = safe_50_60_flags

# Save the updated DataFrame to a new CSV file
output_csv_path = "/content/drive/MyDrive/filter_classified_images_with_thresholds.csv"
data.to_csv(output_csv_path, index=False)
print(f"Classification completed and saved to {output_csv_path}.")

Classification completed and saved to /content/drive/MyDrive/filter_classified_images_with_thresholds.csv.


In [39]:
print(data.head(5))

  Entity Type  Entity Harmful Type Harmful Term     Search Term  \
0        game  Roblox     violence      torture  Roblox torture   
1        game  Roblox     violence      torture  Roblox torture   
2        game  Roblox     violence      torture  Roblox torture   
3        game  Roblox     violence      torture  Roblox torture   
4        game  Roblox     violence      torture  Roblox torture   

  SafeSearch Mode               File Name  \
0          filter  Roblox torture_image_1   
1          filter  Roblox torture_image_2   
2          filter  Roblox torture_image_3   
3          filter  Roblox torture_image_4   
4          filter  Roblox torture_image_5   

                                           Image URL     X    Y  Width  \
0  data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...    20  161    292   
1  data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...   332  161    293   
2  data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQA...   645  161    293   
3  data:image/jpeg;base64,/9j/

In [None]:

# Load the CSV file
csv_path = "/content/drive/MyDrive/unfilter_image_metadata.csv"  # Replace with your actual CSV file path
data = pd.read_csv(csv_path)

# Initialize results lists
labels = []
confidences = []
categories_probs = []
safe_60_flags = []
safe_70_flags = []
safe_80_flags = []
safe_50_60_flags = []

# Classify each image
for i, row in data.iterrows():
    try:
        # Process the image and classify
        image = process_image(row["Image URL"])  # Replace "Image URL" with your column name
        label, confidence, category_probs = classify_image(image)

        # Append classification results
        labels.append(label)
        confidences.append(confidence)
        categories_probs.append(category_probs)

        # Apply thresholds
        safe_60_flags.append(label == "safe for children" and confidence >= 0.6)
        safe_70_flags.append(label == "safe for children" and confidence >= 0.7)
        safe_80_flags.append(label == "safe for children" and confidence >= 0.8)
        safe_50_60_flags.append(label == "safe for children" and 0.5 <= confidence < 0.6)

    except Exception as e:
        print(f"Error processing row {i}: {e}")
        labels.append("Error")
        confidences.append(0)
        categories_probs.append({})
        safe_60_flags.append(False)
        safe_70_flags.append(False)
        safe_80_flags.append(False)
        safe_50_60_flags.append(False)

# Add results to the DataFrame
data["most_likely_label"] = labels
data["confidence"] = confidences
data["category_probs"] = categories_probs
data["safe_60"] = safe_60_flags
data["safe_70"] = safe_70_flags
data["safe_80"] = safe_80_flags
data["safe_50_60_flags"] = safe_50_60_flags

# Save the updated DataFrame to a new CSV file
output_csv_path = "/content/drive/MyDrive/unfilter_classified_images_with_thresholds.csv"
data.to_csv(output_csv_path, index=False)
print(f"Classification completed and saved to {output_csv_path}.")

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
