In [None]:

!pip install --force-reinstall "numpy<2.0" "pillow<11.0"
!pip install -U torch torchvision opencv-python easyocr exifread
!pip install -U transformers==4.44.2 huggingface_hub==0.24.6
!pip install -U gradio==4.44.0



Collecting numpy<2.0
  Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[31mERROR: Operation cancelled by user[0m[31m
[0m[31mERROR: Operation cancelled by user[0m[31m


In [None]:

import cv2
import torch
import numpy as np
import exifread
import easyocr
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import gradio as gr

In [None]:

from google.colab import files

# =============================
# 3) Load Models
# =============================
# CLIP for semantic content detection
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# EasyOCR for text detection
ocr_reader = easyocr.Reader(["en"])

# =============================
# 4) EXIF Metadata Extractor
# =============================
def extract_metadata(image_path):
    tags = {}
    with open(image_path, "rb") as f:
        exif = exifread.process_file(f)
        for tag in ["GPS GPSLatitude", "GPS GPSLongitude", "Image DateTime"]:
            if tag in exif:
                tags[tag] = str(exif[tag])
    return tags

# =============================
# 5) OCR Text Extractor
# =============================
def extract_text_from_image(image_path):
    results = ocr_reader.readtext(image_path)
    return [res[1] for res in results]  # just text

# =============================
# 6) CLIP Content Detection
# =============================
def analyze_with_clip(image_path):
    image = Image.open(image_path)
    inputs = clip_processor(
        text=["a person face", "an office with logo", "a document", "a group of people", "a street sign"],
        images=image,
        return_tensors="pt",
        padding=True
    )
    outputs = clip_model(**inputs)
    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1).detach().numpy()[0]

    labels = ["face", "office/logo", "document", "group", "street sign"]
    return {labels[i]: float(probs[i]) for i in range(len(labels))}

# =============================
# 7) Risk Scoring System
# =============================
RISK_WEIGHTS = {
    "metadata": 30,
    "ocr_text": 25,
    "face": 35,
    "office/logo": 20,
    "document": 40,
    "group": 15,
    "street sign": 15,
}

def compute_image_exposure_score(evidence):
    total = 0
    details = []

    # Metadata
    if "metadata" in evidence and evidence["metadata"]:
        total += RISK_WEIGHTS["metadata"]
        details.append(("metadata", RISK_WEIGHTS["metadata"]))

    # OCR text
    if "ocr_text" in evidence and evidence["ocr_text"]:
        total += RISK_WEIGHTS["ocr_text"]
        details.append(("ocr_text", RISK_WEIGHTS["ocr_text"]))

    # CLIP categories
    if "clip_analysis" in evidence:
        for label, prob in evidence["clip_analysis"].items():
            if prob > 0.5:  # only strong matches
                contribution = RISK_WEIGHTS[label] * prob
                total += contribution
                details.append((label, round(contribution, 2)))

    score = min(100, round(total, 2))
    return {"exposure_score": score, "details": details}

# =============================
# 8) Main Analyzer
# =============================
def analyze_image(image_path):
    evidence = {}

    # Step 1: EXIF metadata
    metadata = extract_metadata(image_path)
    if metadata:
        evidence["metadata"] = metadata

    # Step 2: OCR text
    text_found = extract_text_from_image(image_path)
    if text_found:
        evidence["ocr_text"] = text_found

    # Step 3: CLIP analysis
    clip_preds = analyze_with_clip(image_path)
    evidence["clip_analysis"] = clip_preds

    # Step 4: Risk score
    score = compute_image_exposure_score(evidence)
    evidence["score"] = score

    return evidence

# =============================
# 9) Colab Upload and Analyze
# =============================
uploaded = files.upload()  # opens file picker

for filename in uploaded.keys():
    print(f"\nAnalyzing {filename}...\n")
    try:
        result = analyze_image(filename)

        print(f"Exposure Score: {result['score']['exposure_score']}/100\n")
        print("Details:")
        for d in result["score"]["details"]:
            print(f"- {d[0]} → +{d[1]}")

        if "metadata" in result:
            print(f"\nMetadata: {result['metadata']}")
        if "ocr_text" in result:
            print(f"OCR Text: {result['ocr_text']}")

    except Exception as e:
        print(f"An error occurred: {e}")






Saving Screenshot 2025-06-09 183150.png to Screenshot 2025-06-09 183150.png

Analyzing Screenshot 2025-06-09 183150.png...





Exposure Score: 52.4/100

Details:
- ocr_text → +25
- face → +27.4
OCR Text: ['HEllO', '0l3', '17', 'SWITCHES', 'Iu', '15']
