In [1]:
!pip install pillow torch torchvision transformers opencv-python





In [2]:
import torch
from transformers import DetrImageProcessor, DetrForObjectDetection
from PIL import Image
import cv2
import imageio
import numpy as np

# Load the DETR model and image processor
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
model.eval()


2024-11-28 12:19:25.286058: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-28 12:19:25.314270: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-28 12:19:25.349385: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-28 12:19:25.359065: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-28 12:19:25.382597: I tensorflow/core/platform/cpu_feature_guar

DetrForObjectDetection(
  (model): DetrModel(
    (backbone): DetrConvModel(
      (conv_encoder): DetrConvEncoder(
        (model): FeatureListNet(
          (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
          (bn1): DetrFrozenBatchNorm2d()
          (act1): ReLU(inplace=True)
          (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
          (layer1): Sequential(
            (0): Bottleneck(
              (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (bn1): DetrFrozenBatchNorm2d()
              (act1): ReLU(inplace=True)
              (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (bn2): DetrFrozenBatchNorm2d()
              (drop_block): Identity()
              (act2): ReLU(inplace=True)
              (aa): Identity()
              (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      

In [3]:
from PIL import Image

def extract_frames_from_webp(webp_path):
    frames = []
    with Image.open(webp_path) as webp:
        try:
            while True:
                frame = webp.convert("RGB")  # Convert to RGB to ensure compatibility
                frames.append(frame)
                webp.seek(webp.tell() + 1)  # Move to the next frame
        except EOFError:
            pass  # End of frames
    return frames


In [4]:
def is_blurred(image):
    # Convert image to grayscale
    gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
    # Compute Laplacian variance
    laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
    return laplacian_var < 100  # Threshold for blur detection


In [5]:
def detect_humans_in_frame(frame):
    # Ensure the frame is in RGB format
    frame_np = np.array(frame)
    print(f"Frame shape: {frame_np.shape}")  # Debugging print to check dimensions
    
    # Preprocess the image
    inputs = processor(images=frame, return_tensors="pt")
    
    # Perform inference
    outputs = model(**inputs)
    
    # Extract detection boxes and labels
    logits = outputs.logits[0]
    boxes = outputs.pred_boxes[0]
    probabilities = logits.softmax(-1)[:, :-1]  # Exclude the "no object" class
    max_prob, labels = probabilities.max(dim=1)
    
    # Filter results for 'person' class (label 1 in COCO)
    person_indices = torch.where(labels == 1)[0]
    person_boxes = boxes[person_indices]
    person_scores = max_prob[person_indices]
    
    return person_scores, person_boxes



In [22]:
def process_webp_with_detr(webp_path):
    frames = extract_frames_from_webp(webp_path)
    results = []

    for i, frame in enumerate(frames):
        # Check if the frame is blurred
        if is_blurred(frame):
            print(f"Frame {i}: Blurry")
            results.append("Blurry")
            # continue
        
        # Detect humans
        scores, boxes = detect_humans_in_frame(frame)
        if len(scores) > 0 and max(scores).item() > 0.8:  # Confidence threshold
            print(f"Frame {i}: Good human detection with score {max(scores).item():.2f}")
            results.append("Good")
        else:
            print(f"Frame {i}: No clear human detected")
            results.append("Bad")

    # Aggregate final results
    if results.count("Good") > len(results) // 2:
        return "Good WebP"
    else:
        return "Bad WebP"



In [33]:
webp_path = "/home/growlt257/Desktop/28_November_test_images/blur_dog.png"
result = process_webp_with_detr(webp_path)
print(f"Final result for WebP: {result}")

Frame 0: Blurry
Frame shape: (271, 327, 3)
Frame 0: No clear human detected
Final result for WebP: Bad WebP


In [9]:
import timm
print(timm.__version__)  # This should print the version of `timm`


1.0.11
