In [1]:
import socket
import pandas as pd
import requests
import cv2
import os 
import time
import numpy as np
import torch

def dns_lookup(host):
    try:
        ip_address = socket.gethostbyname(host)
        print(f"Resolved {host} to IP address: {ip_address}")
        return ip_address
    except socket.gaierror as e:
        print(f"Failed to resolve {host}: {e}")
        return None

def detect_clothes(image, model, class_names, threshold=0.5):
    """
    Detect if the image contains clothes using the YOLO model.

    Args:
        image (numpy.array): The image in which to detect clothes.
        model (torch.nn.Module): The YOLO model.
        class_names (list): List of class names for the YOLO model.
        threshold (float): Detection threshold.

    Returns:
        bool: True if clothes are detected, False otherwise.
    """
    results = model(image)
    detections = results.xyxy[0]
    for detection in detections:
        class_id = int(detection[5])
        if class_names[class_id] in ["person", "clothing", "dress", "shirt", "pants"]:  # Adjust based on the model's class names
            if detection[4] > threshold:
                return True
    return False

def download_images(df, batch_size, delay, model, class_names):
    """
    Download images in a batch with a delay between batches.

    Args:
        df (pd.DataFrame): Dataframe with image URLs and corresponding IDs.
        batch_size (int): The number of images to be downloaded in a batch.
        delay (int): The delay time in seconds between batches.
        model (torch.nn.Module): The YOLO model for detecting clothes.
        class_names (list): List of class names for the YOLO model.
    """
    # Make sure the images directory exists
    if not os.path.exists("images/original_images"):
        os.makedirs("images/original_images")

    # Iterate over the DataFrame rows with batch control
    for idx, row in df.iterrows():
        image_url = row["image"]
        id = row["id"]
        file_path = f"images/original_images/{id}.jpg"

        # Check if the file already exists
        if os.path.exists(file_path):
            continue

        # Try to request and save image, skip to the next one if there is an issue
        try:
            # Send a HTTP request to the URL of the image
            response = requests.get(image_url)

            # Check if the request is successful
            if response.status_code == 200:
                # Convert bytes to numpy array
                nparr = np.frombuffer(response.content, np.uint8)
                # Decode numpy array into image
                img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
                
                # Check if the image contains clothes
                if detect_clothes(img, model, class_names):
                    # Resize image to 256x256
                    img = cv2.resize(img, (256, 256))
                    
                    # Save the image under the "images" directory and name it with the id
                    cv2.imwrite(file_path, img)
                    
                    # Print download checkpoint
                    print(f"Downloaded image {id}")
                else:
                    print(f"No clothes detected in image {id}")

            # If we've reached the batch limit, sleep for a while
            if (idx + 1) % batch_size == 0:
                time.sleep(delay)
        
        except Exception as e:
            print(f"Error for image {id}: {str(e)}")

def main():
    # Load the YOLO model and class names
    model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
    class_names = model.names

    df = pd.read_csv("only_posts.csv")
    download_images(df=df, batch_size=10, delay=10, model=model, class_names=class_names)

if __name__ == "__main__":
    main()


Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\amish/.cache\torch\hub\master.zip


[31m[1mrequirements:[0m Ultralytics requirements ['pillow>=10.3.0', 'requests>=2.32.0'] not found, attempting AutoUpdate...
Collecting pillow>=10.3.0
  Downloading pillow-10.4.0-cp310-cp310-win_amd64.whl.metadata (9.3 kB)
Collecting requests>=2.32.0
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Downloading pillow-10.4.0-cp310-cp310-win_amd64.whl (2.6 MB)
   ---------------------------------------- 2.6/2.6 MB 1.7 MB/s eta 0:00:00
Downloading requests-2.32.3-py3-none-any.whl (64 kB)
   ---------------------------------------- 64.9/64.9 kB 3.4 MB/s eta 0:00:00
Installing collected packages: requests, pillow
  Attempting uninstall: requests
    Found existing installation: requests 2.31.0
    Uninstalling requests-2.31.0:
      Successfully uninstalled requests-2.31.0
  Attempting uninstall: pillow
    Found existing installation: Pillow 9.5.0
    Uninstalling Pillow-9.5.0:
      Successfully uninstalled Pillow-9.5.0
Successfully installed pillow-10.4.0 requests-2.32.

YOLOv5  2024-7-12 Python-3.10.7 torch-2.3.1+cpu CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|█████████████████████████████████████████████████████████████████████████████| 14.1M/14.1M [00:04<00:00, 3.59MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


Downloaded image 11
Downloaded image 12
Downloaded image 13
No clothes detected in image 14
No clothes detected in image 15
Downloaded image 16
Downloaded image 17
Downloaded image 18
Downloaded image 20
Downloaded image 23
Downloaded image 24
Downloaded image 27
Downloaded image 28
No clothes detected in image 29
Downloaded image 36
No clothes detected in image 37
Downloaded image 38
Downloaded image 39
Downloaded image 41
Downloaded image 44
Downloaded image 45
No clothes detected in image 47
No clothes detected in image 48
Downloaded image 49
Downloaded image 51
Downloaded image 54
Downloaded image 56
Downloaded image 58
Downloaded image 60
Downloaded image 63
Downloaded image 71
Downloaded image 77
No clothes detected in image 78
No clothes detected in image 79
No clothes detected in image 80
Downloaded image 81
Downloaded image 82
Downloaded image 83
Downloaded image 84
Downloaded image 85
Downloaded image 86
Downloaded image 87
Downloaded image 88
Downloaded image 89
Downloaded i