In [71]:
import pandas as pd

df = pd.DataFrame({
    'image_path': [f'C:/Users/rathe/Desktop/deepfake_images/{Y}.png' for Y in range(0,50)]+[f"C:/Users/rathe/Desktop/Human_Faces_Dataset/Real_Images/0000{X}.jpg" for X in range(10,60)],
    'label': [1 for Y in range(0,50)]+[0 for X in range(0,50)]  # 1 = Fake, 0 = Real
})


In [13]:
def soft_voting_predict(image_path, models):
    import numpy as np
    probs_list = [predict_image(model, extractor, image_path) for model, extractor in models]
    avg_probs = np.mean(probs_list, axis=0)
    return int(np.argmax(avg_probs))


In [14]:
def predict_image(model, extractor, image_path):
    from PIL import Image
    import torch

    image = Image.open(image_path).convert("RGB")
    inputs = extractor(images=image, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=1).numpy()[0]
    return probs


In [15]:
from transformers import AutoModelForImageClassification, AutoFeatureExtractor

def load_all_models(model_names):
    loaded = []
    for name in model_names:
        model = AutoModelForImageClassification.from_pretrained(name)
        extractor = AutoFeatureExtractor.from_pretrained(name)
        model.eval()
        loaded.append((model, extractor))
    return loaded


In [23]:
from transformers import AutoModelForImageClassification, AutoFeatureExtractor
import torch
import numpy as np
from PIL import Image

class ModelPipeline:
    def __init__(self, model_name):
        self.model = AutoModelForImageClassification.from_pretrained(model_name)
        self.extractor = AutoFeatureExtractor.from_pretrained(model_name)
        self.model.eval()

        # Extract and flip label map if needed
        config = self.model.config
        self.label_map = config.id2label
        self.flip = self.label_map[0].upper() == "FAKE"  # If 0 is FAKE, we need to flip

    def predict_proba(self, image_path):
        image = Image.open(image_path).convert("RGB")
        inputs = self.extractor(images=image, return_tensors="pt")
        with torch.no_grad():
            outputs = self.model(**inputs)
            probs = torch.softmax(outputs.logits, dim=1).numpy()[0]
        return probs if not self.flip else probs[::-1]  # Flip if label mapping is reversed

    def predict(self, image_path):
        return int(np.argmax(self.predict_proba(image_path)))


In [27]:
from collections import Counter

class EnsemblePipeline:
    def __init__(self, pipelines, voting='soft'):
        self.pipelines = pipelines
        self.voting = voting

    def predict(self, image_path):
        if self.voting == 'soft':
            all_probs = [p.predict_proba(image_path) for p in self.pipelines]
            avg_probs = np.mean(all_probs, axis=0)
            return int(np.argmax(avg_probs))
        elif self.voting == 'hard':
            all_preds = [p.predict(image_path) for p in self.pipelines]
            return Counter(all_preds).most_common(1)[0][0]
        else:
            raise ValueError("Voting must be 'soft' or 'hard'")


In [72]:
import pandas as pd
from sklearn.metrics import classification_report

# Load your data

# Define model names
model_names = [
    "dima806/deepfake_vs_real_image_detection",
    "prithivMLmods/Deep-Fake-Detector-Model",
    "DaMsTaR/Detecto-DeepFake_Image_Detector",
    "DarkVision/Deepfake_detection_image",
    "Wvolf/ViT_Deepfake_Detection",
    "thembululwa/deepfake_detection",
    "prithivMLmods/Deep-Fake-Detector-v2-Model-ONNX"
]

# Create individual pipelines
pipelines = [ModelPipeline(name) for name in model_names]

# Create ensemble
ensemble = EnsemblePipeline(pipelines, voting='soft')  # or 'hard'

# Predict
df['ensemble_pred'] = df['image_path'].apply(ensemble.predict)

# Evaluate
print(classification_report(df['label'], df['ensemble_pred']))




              precision    recall  f1-score   support

           0       0.94      0.94      0.94        50
           1       0.94      0.94      0.94        50

    accuracy                           0.94       100
   macro avg       0.94      0.94      0.94       100
weighted avg       0.94      0.94      0.94       100



In [75]:
reports = {}
for model_name in model_names:
    print(f"\n==== Model: {model_name} ====")
    try:
        pipeline = ModelPipeline(model_name)
        y_true = df["label"].tolist()
        y_pred = [pipeline.predict(path) for path in df["image_path"]]

        report = classification_report(y_true, y_pred, target_names=["FAKE", "REAL"])
        reports[model_name] = report
        print(report)
    except Exception as e:
        print(f"Error with model {model_name}: {e}")


==== Model: dima806/deepfake_vs_real_image_detection ====




              precision    recall  f1-score   support

        FAKE       0.90      0.94      0.92        50
        REAL       0.94      0.90      0.92        50

    accuracy                           0.92       100
   macro avg       0.92      0.92      0.92       100
weighted avg       0.92      0.92      0.92       100


==== Model: prithivMLmods/Deep-Fake-Detector-Model ====




              precision    recall  f1-score   support

        FAKE       0.49      0.98      0.66        50
        REAL       0.00      0.00      0.00        50

    accuracy                           0.49       100
   macro avg       0.25      0.49      0.33       100
weighted avg       0.25      0.49      0.33       100


==== Model: DaMsTaR/Detecto-DeepFake_Image_Detector ====




              precision    recall  f1-score   support

        FAKE       1.00      0.96      0.98        50
        REAL       0.96      1.00      0.98        50

    accuracy                           0.98       100
   macro avg       0.98      0.98      0.98       100
weighted avg       0.98      0.98      0.98       100


==== Model: DarkVision/Deepfake_detection_image ====




              precision    recall  f1-score   support

        FAKE       1.00      0.96      0.98        50
        REAL       0.96      1.00      0.98        50

    accuracy                           0.98       100
   macro avg       0.98      0.98      0.98       100
weighted avg       0.98      0.98      0.98       100


==== Model: Wvolf/ViT_Deepfake_Detection ====




              precision    recall  f1-score   support

        FAKE       1.00      0.96      0.98        50
        REAL       0.96      1.00      0.98        50

    accuracy                           0.98       100
   macro avg       0.98      0.98      0.98       100
weighted avg       0.98      0.98      0.98       100


==== Model: fabar1/vit-detection-celebdf-deepfake ====




              precision    recall  f1-score   support

        FAKE       0.44      0.78      0.56        50
        REAL       0.00      0.00      0.00        50

    accuracy                           0.39       100
   macro avg       0.22      0.39      0.28       100
weighted avg       0.22      0.39      0.28       100


==== Model: emobobas/celebrity_deepfake_detection ====


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        FAKE       0.00      0.00      0.00        50
        REAL       0.50      1.00      0.67        50

    accuracy                           0.50       100
   macro avg       0.25      0.50      0.33       100
weighted avg       0.25      0.50      0.33       100


==== Model: HrutikAdsare/deepfake-detector-faceforensics ====




              precision    recall  f1-score   support

        FAKE       0.27      0.36      0.31        50
        REAL       0.03      0.02      0.02        50

    accuracy                           0.19       100
   macro avg       0.15      0.19      0.17       100
weighted avg       0.15      0.19      0.17       100


==== Model: Hemg/Deepfake-Detection ====




              precision    recall  f1-score   support

        FAKE       0.49      0.96      0.65        50
        REAL       0.00      0.00      0.00        50

    accuracy                           0.48       100
   macro avg       0.24      0.48      0.32       100
weighted avg       0.24      0.48      0.32       100


==== Model: thembululwa/deepfake_detection ====




              precision    recall  f1-score   support

        FAKE       0.68      1.00      0.81        50
        REAL       1.00      0.54      0.70        50

    accuracy                           0.77       100
   macro avg       0.84      0.77      0.76       100
weighted avg       0.84      0.77      0.76       100



In [76]:
reports

{'dima806/deepfake_vs_real_image_detection': '              precision    recall  f1-score   support\n\n        FAKE       0.90      0.94      0.92        50\n        REAL       0.94      0.90      0.92        50\n\n    accuracy                           0.92       100\n   macro avg       0.92      0.92      0.92       100\nweighted avg       0.92      0.92      0.92       100\n',
 'prithivMLmods/Deep-Fake-Detector-Model': '              precision    recall  f1-score   support\n\n        FAKE       0.49      0.98      0.66        50\n        REAL       0.00      0.00      0.00        50\n\n    accuracy                           0.49       100\n   macro avg       0.25      0.49      0.33       100\nweighted avg       0.25      0.49      0.33       100\n',
 'DaMsTaR/Detecto-DeepFake_Image_Detector': '              precision    recall  f1-score   support\n\n        FAKE       1.00      0.96      0.98        50\n        REAL       0.96      1.00      0.98        50\n\n    accuracy          