In [1]:
!pip install transformers datasets


Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-16.1.0-cp38-cp38-win_amd64.whl.metadata (3.1 kB)
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Using cached dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting pandas (from datasets)
  Downloading pandas-2.0.3-cp38-cp38-win_amd64.whl.metadata (18 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp38-cp38-win_amd64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py38-none-any.whl.metadata (7.1 kB)
Collecting fsspec<=2024.5.0,>=2023.1.0 (from fsspec[http]<=2024.5.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.5.0-py3-none-any.whl.metadata (11 kB)
Collecting aiohttp (from datasets)
  Downloading aiohttp-3.9.5-cp38-cp38-win_amd64.whl.metadata (7.7 kB)
Collecting a

In [10]:
import cv2
import pytesseract
import string
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor
import torch
from PIL import Image
import os

# Set up Tesseract OCR
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'


In [3]:
def preprocess_image(image):
    image = cv2.bilateralFilter(image, 5, 55, 60)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, image = cv2.threshold(image, 240, 255, cv2.THRESH_BINARY)
    return image


In [4]:
def extract_text_from_image(image_path):
    image = cv2.imread(image_path)
    image = preprocess_image(image)
    text = pytesseract.image_to_string(image)
    allowed_chars = string.ascii_letters + string.digits + " "
    filtered_text = "".join(char if char in allowed_chars else " " for char in text).replace("\n", " ")
    return filtered_text


In [5]:
# Load the pretrained model and tokenizer
model_name = "Hate-speech-CNERG/dehatebert-mono-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Create a pipeline for hate speech detection
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)


tokenizer_config.json:   0%|          | 0.00/152 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/669M [00:00<?, ?B/s]

In [11]:
def detect_hate_speech(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    hate_confidence = predictions[0][1].item()  # Assuming the second index corresponds to "hateful"
    classification = "hateful" if hate_confidence > 0.5 else "not hateful"
    confidence = hate_confidence if classification == "hateful" else 1 - hate_confidence
    return classification, confidence

In [12]:
# Load the pretrained image captioning model and tokenizer
caption_model_name = "nlpconnect/vit-gpt2-image-captioning"
caption_model = VisionEncoderDecoderModel.from_pretrained(caption_model_name)
caption_feature_extractor = ViTFeatureExtractor.from_pretrained(caption_model_name)
caption_tokenizer = AutoTokenizer.from_pretrained(caption_model_name)

def generate_image_caption(image_path):
    image = Image.open(image_path).convert("RGB")
    pixel_values = caption_feature_extractor(images=image, return_tensors="pt").pixel_values
    output_ids = caption_model.generate(pixel_values)
    caption = caption_tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return caption


config.json:   0%|          | 0.00/4.61k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


pytorch_model.bin:   0%|          | 0.00/982M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/228 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/241 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/120 [00:00<?, ?B/s]

In [13]:
def analyze_meme_for_toxicity(image_path, output_file):
    extracted_text = extract_text_from_image(image_path)
    image_caption = generate_image_caption(image_path)
    combined_text = extracted_text + " " + image_caption
    
    # Analyze individually and combined
    results = {}
    results["extracted_text"] = detect_hate_speech(extracted_text)
    results["image_caption"] = detect_hate_speech(image_caption)
    results["combined_text"] = detect_hate_speech(combined_text)
    
    # Write results to file
    with open(output_file, "a") as file:
        file.write(f"Image File Name: {os.path.basename(image_path)}\n")
        for text_type, (classification, confidence) in results.items():
            file.write(f"{text_type.capitalize()} - Classification: {classification}, Confidence: {confidence:.4f}\n")
        file.write("\n")


In [14]:
def analyze_folder_for_toxicity(folder_path):
    output_file = "detection_results.txt"
    # Ensure the output file is empty before starting
    open(output_file, 'w').close()
    
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(folder_path, filename)
            analyze_meme_for_toxicity(image_path, output_file)


In [15]:
# Example folder path
folder_path = r'C:\Users\rohit\Desktop\hate\hateful_memes\img\validation\hateful'

# Analyze the folder for toxicity
analyze_folder_for_toxicity(folder_path)


The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.
