In [2]:
import os
import cv2
import numpy as np
import kagglehub
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle

# Function to detect humans using HOGDescriptor
def detect_humans(image):
    hog = cv2.HOGDescriptor()
    hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    boxes, weights = hog.detectMultiScale(gray_image, winStride=(8, 8), padding=(8, 8), scale=1.05)
    return len(boxes)

# Download dataset using KaggleHub
image_path = kagglehub.dataset_download("constantinwerner/human-detection-dataset")
print("Path to dataset files:", image_path)

# Define dataset path
dataset_version_path = '/root/.cache/kagglehub/datasets/constantinwerner/human-detection-dataset/versions/5'
image_folder = os.path.join(dataset_version_path, 'human detection dataset')

# Initialize feature and label arrays
X = []
y = []

# Verify dataset folder exists
if os.path.exists(image_folder):
    print(f"Dataset folder found at: {image_folder}")
    subfolders = [f for f in os.listdir(image_folder) if os.path.isdir(os.path.join(image_folder, f))]
    print("Subfolders in dataset:", subfolders)

    processed_images = set()

    for subfolder in subfolders:
        subfolder_path = os.path.join(image_folder, subfolder)
        image_files = os.listdir(subfolder_path)
        print(f"Processing {len(image_files)} images in subfolder: {subfolder}")

        for i, image_file in enumerate(image_files):
            image_full_path = os.path.join(subfolder_path, image_file)

            if image_full_path in processed_images:
                continue

            image = cv2.imread(image_full_path)
            if image is None:
                print(f"Warning: Failed to load image from {image_full_path}")
            else:
                # Process image and append features/labels
                human_count = detect_humans(image)
                X.append([human_count])
                y.append(1 if human_count > 0 else 0)
                processed_images.add(image_full_path)

    # Check if data was collected successfully
    if len(X) > 0 and len(y) > 0:
        print(f"Data collection complete. Total samples: {len(X)}.")

        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
        print(f"Training samples: {len(X_train)}, Testing samples: {len(X_test)}")

        # Train Random Forest model
        clf = RandomForestClassifier(n_estimators=100, random_state=42)
        clf.fit(X_train, y_train)
        print("Model training complete.")

        # Evaluate model
        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f"Model Accuracy: {accuracy:.2f}")
        print("Confusion Matrix:")
        print(confusion_matrix(y_test, y_pred))
        print("Classification Report:")
        print(classification_report(y_test, y_pred))

        # Save the trained model
        model_filename = "human_detection_ml_model.pkl"
        with open(model_filename, "wb") as model_file:
            pickle.dump(clf, model_file)
        print(f"Model saved to {model_filename}.")

    else:
        print("No data was collected. Please verify the dataset path and structure.")

else:
    print(f"Dataset folder does not exist at {image_folder}. Please verify the path.")


Path to dataset files: /root/.cache/kagglehub/datasets/constantinwerner/human-detection-dataset/versions/5
Dataset folder found at: /root/.cache/kagglehub/datasets/constantinwerner/human-detection-dataset/versions/5/human detection dataset
Subfolders in dataset: ['1', '0']
Processing 559 images in subfolder: 1


KeyboardInterrupt: 