In [None]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from PIL import Image

In [None]:
import cv2
import torch
import torchvision
from torchvision import transforms
from skimage import io
import dlib

# Object Identification with YOLOv3

In [None]:
# Paths
weights_path = "/Users/kyriakospapadopoulos/Desktop/University/Big Blue Data Academy/Personal/Projects/API_Projects/Reddit/Photograph_Analysis/YOLOv3/yolov3.weights"
config_path = "/Users/kyriakospapadopoulos/Desktop/University/Big Blue Data Academy/Personal/Projects/API_Projects/Reddit/Photograph_Analysis/YOLOv3/yolov3.cfg"
directory = "/Users/kyriakospapadopoulos/Desktop/University/Big Blue Data Academy/Personal/Projects/API_Projects/Reddit/Photograph_Analysis/photographs_top_1000"

In [None]:
# Load YOLO model
def load_yolo_model(weights_path, config_path):
    net = cv2.dnn.readNet(weights_path, config_path)
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
    return net, output_layers

### 'load_yolo_model' function breakdown
---
- **`weights_path`**: This parameter specifies the file path to the YOLO model's pre-trained weights file (e.g., `yolov3.weights`).

- **`config_path`**: This parameter specifies the file path to the YOLO model's configuration file (e.g., `yolov3.cfg`). The configuration file contains the model's architecture and other settings.

- **`cv2.dnn.readNet(weights_path, config_path)`**:
  - This line loads the YOLO model using OpenCV's deep neural network (DNN) module. It takes the pre-trained weights and configuration files as inputs to create a network object (`net`) that can be used for object detection.

- **`layer_names = net.getLayerNames()`**:
  - This line retrieves the names of all the layers in the YOLO model. The layer names are stored in a list called `layer_names`. These names are necessary to identify which layers are output layers for detection.

- **`output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]`**:
  - This line identifies the output layers of the YOLO model. The `net.getUnconnectedOutLayers()` function returns the indices of the layers that are not connected to any other layers (i.e., the output layers). The indices are used to extract the corresponding layer names from `layer_names`. The result is stored in the `output_layers` list.

- **`return net, output_layers`**:
  - The function returns two objects: `net`, which is the loaded YOLO model, and `output_layers`, which is a list of the names of the output layers. These are used in subsequent steps to perform object detection with YOLO.
---

In [None]:
# Detect objects in an image with Non-Maximum Suppression (NMS) and prevent duplicate assignments
def detect_objects_yolo(net, output_layers, image, confidence_threshold=0.5, nms_threshold=0.4):
    height, width, channels = image.shape
    blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)
    
    class_ids = []
    confidences = []
    boxes = []
    
    # Load class labels
    with open("coco.names", "r") as f:
        classes = [line.strip() for line in f.readlines()]
    
    detected_objects = set()  # Use a set to avoid duplicates
    
    # Loop over each detection
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > confidence_threshold:  # Confidence threshold
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)
    
    # Apply Non-Maximum Suppression (NMS)
    indices = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold)
    
    if len(indices) > 0:
        for i in indices.flatten():
            detected_objects.add(classes[class_ids[i]])  # Add to set to avoid duplicates
    
    return list(detected_objects)  # Convert set back to list for consistency


### 'detect_objects_yolo' function breakdown
---
- **`height, width, channels = image.shape`**:
  - This line extracts the dimensions of the input image (height, width, and the number of color channels).

- **`blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)`**:
  - The image is preprocessed into a "blob," which is a 4-dimensional binary large object. The blob is scaled by 0.00392 (1/255) to normalize pixel values, resized to 416x416 pixels, and adjusted without cropping. This blob is the input to the YOLO model.

- **`net.setInput(blob)`**:
  - The blob is fed into the YOLO network as input for object detection.

- **`outs = net.forward(output_layers)`**:
  - The network performs a forward pass, outputting the results from the specified output layers. These results contain information about detected objects.

- **`class_ids`, `confidences`, `boxes`**:
  - These lists are initialized to store the class IDs of detected objects, their confidence scores, and the bounding box coordinates, respectively.

- **`with open("coco.names", "r") as f:`**:
  - The class labels (e.g., "person," "bicycle," "car") are loaded from the "coco.names" file into a list called `classes`.

- **`detected_objects = set()`**:
  - A set is used to store detected object labels. The use of a set helps avoid duplicate entries.

- **Loop over each detection**:
  - The outer loop iterates over the detections returned by the model. The inner loop processes each detection, extracting the class ID and confidence score. If the confidence is above the specified threshold (`confidence_threshold`), the bounding box coordinates are calculated and stored in the `boxes` list, while confidence scores and class IDs are stored in `confidences` and `class_ids`.

- **Apply Non-Maximum Suppression (NMS)**:
  - `cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold)` applies Non-Maximum Suppression to filter out overlapping bounding boxes, keeping only the most confident ones. This step helps to reduce duplicate detections of the same object.

- **`detected_objects.add(classes[class_ids[i]])`**:
  - For each remaining detection after NMS, the class label is added to the `detected_objects` set, ensuring that each object type is only added once.

- **`return list(detected_objects)`**:
  - The set `detected_objects` is converted back to a list before being returned. This list contains the unique object labels detected in the image.
---

In [None]:
# Analyze all images in the directory and save results incrementally
def analyze_with_yolo(directory, weights_path, config_path, save_interval=100):
    net, output_layers = load_yolo_model(weights_path, config_path)
    
    # Initialize an empty DataFrame
    results_df = pd.DataFrame(columns=['File Name', 'YOLO results'])
    
    # Loop through all files in the directory with a progress bar
    for idx, filename in enumerate(tqdm(os.listdir(directory), desc="Running YOLO")):
        if filename.endswith((".jpg", ".jpeg", ".png", ".bmp")):  # Adjust based on your file types
            image_path = os.path.join(directory, filename)
            image = cv2.imread(image_path)
            
            if image is None:
                print(f"Failed to load image: {image_path}")
                continue
            
            detected_objects = detect_objects_yolo(net, output_layers, image)
            
            # Append results to the DataFrame
            results_df = results_df.append({
                'File Name': filename,
                'YOLO results': ", ".join(detected_objects)
            }, ignore_index=True)
        
        # Periodically save progress
        if idx % save_interval == 0 and idx != 0:  # Save every `save_interval` images
            results_df.to_csv(os.path.join(directory, "detection_results.csv"), index=False)
    
    # Final save
    results_df.to_csv(os.path.join(directory, "detection_results.csv"), index=False)
    print("Analysis complete. Results saved to detection_results.csv.")

### 'analyze_with_yolo' function breakdown
---
- **`net, output_layers = load_yolo_model(weights_path, config_path)`**:
  - The YOLO model is loaded using the `load_yolo_model` function. This function returns the network (`net`) and the names of the output layers (`output_layers`), which are necessary for object detection.

- **`results_df = pd.DataFrame(columns=['file name', 'objects detected'])`**:
  - An empty pandas DataFrame `results_df` is initialized with columns `'file name'` and `'objects detected'`. This DataFrame will store the detection results for each image.

- **Loop through all files in the directory**:
  - The function iterates over all files in the specified `directory` using a `tqdm` progress bar for visual feedback. Only files with image extensions (e.g., `.jpg`, `.jpeg`, `.png`, `.bmp`) are processed.

- **`image_path = os.path.join(directory, filename)`**:
  - For each image file, the full path is constructed by joining the directory path with the filename.

- **`image = cv2.imread(image_path)`**:
  - The image is loaded using OpenCV's `cv2.imread` function. If the image fails to load (e.g., due to corruption), a warning message is printed, and the loop continues to the next image.

- **`detected_objects = detect_objects_yolo(net, output_layers, image)`**:
  - The function `detect_objects_yolo` is called to detect objects in the image using the YOLO model. This function returns a list of detected object labels.

- **Append results to the DataFrame**:
  - The detection results, including the filename and a comma-separated string of detected objects, are appended to the DataFrame `results_df`.

- **Periodically save progress**:
  - Every `save_interval` images, the current state of the DataFrame is saved to a CSV file (`detection_results.csv`) in the specified directory. This ensures that progress is not lost if the process is interrupted.

- **Final save**:
  - After all images have been processed, the final version of the DataFrame is saved to `detection_results.csv`. A message is printed to confirm that the analysis is complete and the results have been saved.
---

In [None]:
analyze_with_yolo(directory, weights_path, config_path)

# Object Identification with Faster R-CNN

In [None]:
df_path = os.path.join(directory, "detection_results.csv")

In [None]:
# Load the pre-trained Faster R-CNN model
def load_faster_rcnn_model():
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()
    return model

### 'load_faster_rcnn_model' function breakdown
---
- **`model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)`**:
  - This line loads a pre-trained Faster R-CNN model with a ResNet-50 backbone and Feature Pyramid Network (FPN) architecture. The `pretrained=True` argument specifies that the model should be loaded with weights pre-trained on a dataset like COCO, which allows it to detect common objects without additional training.

- **`model.eval()`**:
  - This sets the model to evaluation mode using the `eval()` method. In evaluation mode, certain layers like dropout and batch normalization behave differently compared to training mode. This is essential when using the model for inference (i.e., making predictions on new data).

- **`return model`**:
  - The function returns the loaded and configured Faster R-CNN model, ready to be used for object detection on new images.
---

In [None]:
def load_coco_labels():
    coco_labels = {
        1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane',
        6: 'bus', 7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light',
        11: 'fire hydrant', 13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird',
        17: 'cat', 18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant',
        23: 'bear', 24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella',
        31: 'handbag', 32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis',
        36: 'snowboard', 37: 'sports ball', 38: 'kite', 39: 'baseball bat',
        40: 'baseball glove', 41: 'skateboard', 42: 'surfboard', 43: 'tennis racket',
        44: 'bottle', 46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife',
        50: 'spoon', 51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich',
        55: 'orange', 56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza',
        60: 'donut', 61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant',
        65: 'bed', 67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop',
        74: 'mouse', 75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'microwave',
        79: 'oven', 80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book',
        85: 'clock', 86: 'vase', 87: 'scissors', 88: 'teddy bear', 89: 'hair drier',
        90: 'toothbrush'
    }
    return coco_labels

In [None]:
def detect_objects_faster_rcnn(model, image_path, model_labels, confidence_threshold=0.5):
    image = Image.open(image_path).convert("RGB")
    transform = transforms.Compose([transforms.ToTensor()])
    image_tensor = transform(image).unsqueeze(0)
    
    with torch.no_grad():
        predictions = model(image_tensor)
    
    labels = predictions[0]['labels'].numpy()
    scores = predictions[0]['scores'].numpy()
    
    detected_objects = set()  # Use a set to avoid duplicates
    
    for label, score in zip(labels, scores):
        if score > confidence_threshold:
            detected_objects.add(model_labels.get(label, f"Unknown label {label}"))
    
    return list(detected_objects)  # Convert set back to list for consistency

### 'detect_objects_faster_rcnn' function breakdown
---
- **`image = Image.open(image_path).convert("RGB")`**:
  - The image is loaded from the specified `image_path` using the PIL library's `Image.open` function. The `convert("RGB")` method ensures that the image is in RGB format, which is required for the model.

- **`transform = transforms.Compose([transforms.ToTensor()])`**:
  - A transformation pipeline is created using `transforms.Compose`, which converts the image into a PyTorch tensor. The `ToTensor()` transform scales the pixel values to a range of [0, 1] and rearranges the dimensions to match the format expected by the model.

- **`image_tensor = transform(image).unsqueeze(0)`**:
  - The image is transformed into a tensor and then unsqueezed to add a batch dimension (i.e., changing the shape from `[C, H, W]` to `[1, C, H, W]`), as the model expects a batch of images as input.

- **`with torch.no_grad():`**:
  - This context manager disables gradient computation, which is unnecessary during inference, reducing memory usage and speeding up the process.

- **`predictions = model(image_tensor)`**:
  - The Faster R-CNN model is applied to the image tensor to generate predictions. The output is a dictionary containing various prediction results, such as labels, bounding boxes, and confidence scores.

- **`labels = predictions[0]['labels'].numpy()`**:
  - The predicted class labels for the objects detected in the image are extracted from the model's output and converted to a NumPy array for easier manipulation.

- **`scores = predictions[0]['scores'].numpy()`**:
  - The confidence scores associated with each detected object are also extracted and converted to a NumPy array.

- **`detected_objects = set()`**:
  - A set is initialized to store the detected object labels. The use of a set ensures that each object type is only added once, avoiding duplicates.

- **Loop through labels and scores**:
  - The function iterates over each pair of label and score. If the confidence score exceeds the specified `confidence_threshold`, the corresponding label is added to the `detected_objects` set.

- **`detected_objects.add(model_labels.get(label, f"Unknown label {label}"))`**:
  - For each detected object, the label is looked up in the `model_labels` dictionary to retrieve the human-readable label (e.g., "cat", "dog"). If the label is not found in the dictionary, a placeholder text like `"Unknown label {label}"` is added instead.

- **`return list(detected_objects)`**:
  - The set of detected objects is converted back to a list for consistency before being returned by the function. This list contains the unique object labels detected in the image.
---

In [None]:
def analyze_with_faster_rcnn(directory, model, df_path, save_interval=100):
    # Load the existing DataFrame with YOLO results
    if os.path.exists(df_path):
        df = pd.read_csv(df_path)
    else:
        raise FileNotFoundError(f"The specified DataFrame file does not exist: {df_path}")

    # Load COCO labels once
    model_labels = load_coco_labels()

    # Add a new column for Faster R-CNN results if it doesn't exist
    if 'Faster R-CNN results' not in df.columns:
        df['Faster R-CNN results'] = ""

    # Process each image in the DataFrame
    for idx, row in tqdm(df.iterrows(), total=df.shape[0], desc="Running Faster R-CNN"):
        image_path = os.path.join(directory, row['File Name'])
        
        if os.path.exists(image_path):
            detected_objects = detect_objects_faster_rcnn(model, image_path, model_labels)
            df.at[idx, 'Faster R-CNN results'] = ", ".join(detected_objects)
        
        if idx % save_interval == 0 and idx != 0:
            # Save progress periodically
            df.to_csv(df_path, index=False)
    
    # Save the final updated DataFrame
    df.to_csv(df_path, index=False)
    print(f"Faster R-CNN analysis complete. Results saved to {df_path}")

### 'analyze_with_faster_rcnn'  function breakdown
---
- **`if os.path.exists(df_path): df = pd.read_csv(df_path)`**:
  - The function checks if the specified DataFrame file (`df_path`) exists. If it does, the DataFrame is loaded using `pd.read_csv(df_path)`, which contains results from previous YOLO analysis. If the file does not exist, a `FileNotFoundError` is raised.

- **`model_labels = load_coco_labels()`**:
  - The COCO labels (i.e., human-readable class names) are loaded once using the `load_coco_labels()` function. These labels are used to map the model's predicted class IDs to descriptive names.

- **`if 'Faster R-CNN results' not in df.columns: df['Faster R-CNN results'] = ""`**:
  - The function checks if the DataFrame already contains a column for storing Faster R-CNN results. If the column does not exist, it is added and initialized with empty strings for all rows.

- **`for idx, row in tqdm(df.iterrows(), total=df.shape[0], desc="Running Faster R-CNN"):`**:
  - The function iterates over each row in the DataFrame using `iterrows()` and `tqdm` to display a progress bar. Each row corresponds to an image file previously analyzed by YOLO.

- **`image_path = os.path.join(directory, row['file name'])`**:
  - For each row in the DataFrame, the full path to the corresponding image is constructed by joining the `directory` path with the filename stored in the `'file name'` column.

- **`if os.path.exists(image_path): detected_objects = detect_objects_faster_rcnn(model, image_path, model_labels)`**:
  - If the image file exists at the specified path, the function calls `detect_objects_faster_rcnn()` to perform object detection using the Faster R-CNN model. The detected objects are returned as a list.

- **`df.at[idx, 'Faster R-CNN results'] = ", ".join(detected_objects)`**:
  - The detected objects are stored in the `'Faster R-CNN results'` column of the DataFrame for the corresponding row. The detected object labels are joined into a comma-separated string.

- **`if idx % save_interval == 0 and idx != 0: df.to_csv(df_path, index=False)`**:
  - Every `save_interval` iterations, the function saves the current state of the DataFrame to the specified CSV file (`df_path`). This ensures that progress is saved periodically, reducing the risk of data loss in case of an interruption.

- **`df.to_csv(df_path, index=False)`**:
  - After all images have been processed, the final version of the updated DataFrame is saved to the specified CSV file. A message is printed to confirm that the Faster R-CNN analysis is complete and the results have been saved.
---

In [None]:
# Load Faster R-CNN model and labels
model_labels = load_coco_labels()
model = load_faster_rcnn_model()

In [None]:
analyze_with_faster_rcnn(directory=directory, model=model, df_path=df_path)

---
## Models for future consideration.

### 1. **OpenAI's CLIP**
   - **Description**: CLIP can understand and classify images based on natural language prompts. It's highly versatile and can recognize a wide range of objects and scenes.
   - **Use Case**: It's more versatile and can match images to descriptive text prompts like "a city skyline," "a mountain landscape," or "a person at the beach" ; providing more information can be useful if NLP analysis is performed on the results.
   - **Link**: [CLIP on GitHub](https://github.com/openai/CLIP)

### 2. **Google Vision API (Free Tier)**
   - **Description**: While primarily a cloud service, Google Vision API offers a free tier with limited usage. It can identify a vast array of objects and scenes without any fine-tuning required.
   - **Use Case**: Upload images to the API and receive labels such as "city," "tree," "building," "sea," and more. Can generate many more labels than the models I am using righn now.
   - **Link**: [Google Cloud Vision API](https://cloud.google.com/vision)
---