In [None]:
import os
import cv2
import numpy as np
import kagglehub
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

weights_path = '/content/yolov3.weights'
config_path = '/content/yolov3.cfg'
classes_path = 'coco.names'

net = cv2.dnn.readNet(weights_path, config_path)
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

with open(classes_path, 'r') as f:
    classes = [line.strip() for line in f.readlines()]

def detect_humans(image):
    height, width, _ = image.shape
    blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outputs = net.forward(output_layers)

    human_count = 0
    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5 and classes[class_id] == 'person':
                human_count += 1

    return human_count

image_path = kagglehub.dataset_download("constantinwerner/human-detection-dataset")
print("Path to dataset files:", image_path)

dataset_version_path = '/root/.cache/kagglehub/datasets/constantinwerner/human-detection-dataset/versions/5'
dataset_files = os.listdir(dataset_version_path)
print("Dataset files and folders:", dataset_files)

image_folder = os.path.join(dataset_version_path, 'human detection dataset')

if os.path.exists(image_folder):
    subfolders = os.listdir(image_folder)
    print("Subfolders in image folder:", subfolders)

    X = []
    y = []

    processed_images = set()

    for subfolder in subfolders:
        subfolder_path = os.path.join(image_folder, subfolder)

        if os.path.isdir(subfolder_path):
            image_files = os.listdir(subfolder_path)
            print(f"Files in {subfolder_path}: {image_files}")

            for i, image_file in enumerate(image_files[:5]):
                image_full_path = os.path.join(subfolder_path, image_file)

                if image_full_path in processed_images:
                    continue

                image = cv2.imread(image_full_path)

                if image is None:
                    print(f"Failed to load image from {image_full_path}")
                else:
                    human_count = detect_humans(image)
                    print(f'Number of humans detected in {image_full_path}: {human_count}')

                    X.append([human_count])

                    if human_count > 0:
                        y.append(1)
                    else:
                        y.append(0)

                    processed_images.add(image_full_path)

        else:
            print(f"Subfolder {subfolder} does not contain any image files.")

    if len(X) > 0 and len(y) > 0:

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

        clf = RandomForestClassifier(n_estimators=100, random_state=42)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f'Accuracy of the classifier: {accuracy:.2f}')
    else:
        print("No data to train the model. Please check your dataset.")
else:
    print(f"Image folder does not exist at {image_folder}. Please verify the path.")

from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)