In [None]:
import cv2
import torch
import torch.nn as nn
import clip
import numpy as np
from torchvision import transforms

# Load the fine-tuned CLIP model and tokenizer
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-B/32', device)

# Define the classification head
num_classes = 2  # Replace with the number of classes in your dataset
classifier_head = nn.Linear(512, num_classes).to(device)
classifier_head.load_state_dict(torch.load('nature_model.pt'))
classifier_head.eval()

# Define the mapping between class indices and class labels
class_mapping = {
    0: "Islamophobic Image",
    1: "Non Islamophobic Image",
}

# Define the transformation to normalize and convert image to tensor
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Function to predict the class of an image
def predict_class(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))  # Resize image to fit CLIP model input shape
    image = transform(image).unsqueeze(0).to(device)  # Convert image to tensor and add batch dimension

    with torch.no_grad():
        image_features = model.encode_image(image)

    # Convert the data type of image_features to match the classifier's weight matrix
    image_features = image_features.to(classifier_head.weight.dtype)

    logits = classifier_head(image_features)
    probabilities = nn.functional.softmax(logits, dim=1)
    predicted_class_index = torch.argmax(probabilities, dim=1).item()
    predicted_class_label = class_mapping[predicted_class_index]
    
    return predicted_class_label

# Test the prediction function
image_path = 'cnn_dataset/Islamophobic image/islamophobic (1).jpg'  # Replace with the path to your test image
predicted_class = predict_class(image_path)

print(f"Predicted Class: {predicted_class}")
