<a href="https://colab.research.google.com/github/lorenzopaoria/Smoking-detection-and-distance-analysis/blob/main/distance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Find the distance between smoker and not

In [7]:
!pip install torch torchvision
!pip install opencv-python
!git clone https://github.com/DepthAnything/Depth-Anything-V2.git
%cd Depth-Anything-V2
!pip install -r requirements.txt

Cloning into 'Depth-Anything-V2'...
remote: Enumerating objects: 142, done.[K
remote: Counting objects: 100% (75/75), done.[K
remote: Compressing objects: 100% (41/41), done.[K
remote: Total 142 (delta 45), reused 34 (delta 34), pack-reused 67 (from 2)[K
Receiving objects: 100% (142/142), 45.17 MiB | 37.63 MiB/s, done.
Resolving deltas: 100% (48/48), done.
/content/Depth-Anything-V2/Depth-Anything-V2/Depth-Anything-V2


In [8]:
import torch
from depth_anything_v2.dpt import DepthAnythingV2
import cv2
import numpy as np
from dataclasses import dataclass
from typing import List, Tuple
import math
import os
import json

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
# carica il modello Depth-Anything-V2
depth_model = DepthAnythingV2()
depth_model.load_state_dict(torch.load('/content/drive/MyDrive/pth_depth_estimation_large/depth_anything_v2_vitl.pth'))
depth_model.eval()

  depth_model.load_state_dict(torch.load('/content/drive/MyDrive/pth_depth_estimation_large/depth_anything_v2_vitl.pth'))


DepthAnythingV2(
  (pretrained): DinoVisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
      (norm): Identity()
    )
    (blocks): ModuleList(
      (0-23): 24 x NestedTensorBlock(
        (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (attn): MemEffAttention(
          (qkv): Linear(in_features=1024, out_features=3072, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=1024, out_features=1024, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): LayerScale()
        (drop_path1): Identity()
        (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
          (drop): Dropout(p=0.0, inplace=Fal

In [11]:
@dataclass
class Person:
    x1: int
    y1: int
    x2: int
    y2: int
    is_smoking: bool
    confidence: float

In [12]:
def calculate_center_point(person: Person) -> Tuple[float, float]:
    """calcola il punto centrale di una bounding box"""
    center_x = (person.x1 + person.x2) / 2
    center_y = (person.y1 + person.y2) / 2
    return (center_x, center_y)

In [13]:
def calculate_depth_map(image):
    """genera una mappa di profondità per l'immagine utilizzando Depth-Anything-V2"""
    image_tensor = torch.from_numpy(image).permute(2, 0, 1).unsqueeze(0).float() / 255.0
    with torch.no_grad():
        depth_map = depth_model(image_tensor)
    return depth_map.squeeze().cpu().numpy()

In [14]:
def calculate_3d_distance(p1: Person, p2: Person, depth_map, focal_length: float, image_width: float):
    """calcola la distanza 3D tra due persone utilizzando la mappa di profondità"""
    c1 = calculate_center_point(p1)
    c2 = calculate_center_point(p2)

    # Ottieni la profondità media per ciascuna persona
    depth1 = np.mean(depth_map[int(c1[1]):int(c2[1]), int(c1[0]):int(c2[0])])
    depth2 = np.mean(depth_map[int(c2[1]):int(c2[1]), int(c2[0]):int(c2[0])])

    # Calcola le coordinate 3D
    x1 = (c1[0] - image_width / 2) * depth1 / focal_length
    y1 = depth1
    x2 = (c2[0] - image_width / 2) * depth2 / focal_length
    y2 = depth2

    # Distanza euclidea 3D
    distance = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
    return distance

In [15]:
def find_smoker_nonsmoker_distances(people: List[Person], depth_map, focal_length: float, image_width: float) -> List[Tuple[Person, Person, float]]:
    """trova tutte le distanze 3D tra fumatori e non fumatori"""
    smokers = [p for p in people if p.is_smoking]
    non_smokers = [p for p in people if not p.is_smoking]
    distances = []

    for smoker in smokers:
        for non_smoker in non_smokers:
            distance = calculate_3d_distance(smoker, non_smoker, depth_map, focal_length, image_width)
            distances.append((smoker, non_smoker, distance))

    return distances

In [16]:
def load_detections_from_json(json_path: str) -> List[Person]:
    """carica le detection dal file JSON e le converte in oggetti Person"""
    with open(json_path, 'r') as f:
        data = json.load(f)

    people = []
    for detection in data['detections']:
        # Classe 2 rappresenta il fumatore, 1 il non fumatore
        is_smoking = detection['class'] == 2
        is_not_smoking = detection['class'] == 1
        bbox = detection['bbox']
        people.append(Person(
            x1=int(bbox[0]),
            y1=int(bbox[1]),
            x2=int(bbox[2]),
            y2=int(bbox[3]),
            is_smoking=is_smoking,
            confidence=detection['confidence']
        ))

    return people

In [17]:
def process_and_save_image(image_path: str, people: List[Person], output_dir: str, focal_length: float) -> None:
    """processa un'immagine disegnando le distanze 3D tra i centri delle bounding box"""
    os.makedirs(output_dir, exist_ok=True)

    # Carica l'immagine
    image = cv2.imread(image_path)
    if image is None:
        print(f"Errore nel caricamento dell'immagine: {image_path}")
        return

    # Genera la mappa di profondità
    depth_map = calculate_depth_map(image)

    # Calcola le distanze 3D
    distances = find_smoker_nonsmoker_distances(people, depth_map, focal_length, image.shape[1])

    # Definizione colori
    YELLOW = (0, 255, 255)  # BGR per giallo (centri)
    BROWN = (42, 42, 165)   # BGR per marrone (linee distanza)
    RED = (0, 0, 255)       # BGR per rosso (fumatori)
    BLUE = (255, 0, 0)      # BGR per blu (non fumatori)

    # Disegna le bounding box e i centri
    for person in people:
        color = RED if person.is_smoking else BLUE
        cv2.rectangle(image, (int(person.x1), int(person.y1)), (int(person.x2), int(person.y2)), color, 2)
        center = calculate_center_point(person)
        cv2.circle(image, (int(center[0]), int(center[1])), 3, YELLOW, -1)

    # Disegna le distanze
    for smoker, non_smoker, distance in distances:
        s_center = calculate_center_point(smoker)
        ns_center = calculate_center_point(non_smoker)
        cv2.line(image, (int(s_center[0]), int(s_center[1])), (int(ns_center[0]), int(ns_center[1])), BROWN, 2)
        mid_point = ((s_center[0] + ns_center[0]) // 2, (s_center[1] + ns_center[1]) // 2)
        cv2.putText(image, f"{distance:.2f}m", (int(mid_point[0]), int(mid_point[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.7, BROWN, 2)

    output_path = os.path.join(output_dir, f"distances_{os.path.basename(image_path)}")
    cv2.imwrite(output_path, image)
    print(f"Distanze 3D calcolate per {image_path}")

In [18]:
def main():
    base_dir = '/content/drive/MyDrive/test_trained_person'
    output_dir = '/content/drive/MyDrive/distance_img_process'
    focal_length = 1000  # da calibrare in base alla videocamera

    # Processa tutte le immagini nella cartella
    images_dir = os.path.join(base_dir, 'images')
    coordinates_dir = os.path.join(base_dir, 'coordinates')

    for filename in os.listdir(images_dir):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(images_dir, filename)
            json_name = f"{os.path.splitext(filename)[0]}.json"
            json_path = os.path.join(coordinates_dir, json_name)

            if os.path.exists(json_path):
                try:
                    people = load_detections_from_json(json_path)
                    process_and_save_image(image_path, people, output_dir, focal_length)
                    print(f"Processata immagine: {filename}")
                except Exception as e:
                    print(f"Errore nel processare {filename}: {str(e)}")
            else:
                print(f"File JSON non trovato per {filename}")

if __name__ == "__main__":
    main()

Errore nel processare trained_40.jpg: Input image height 1153 is not a multiple of patch height 14
Errore nel processare trained_41.jpg: Input image height 1151 is not a multiple of patch height 14
Errore nel processare trained_42.jpg: Input image height 1152 is not a multiple of patch height 14
Errore nel processare trained_43.jpg: Input image height 1153 is not a multiple of patch height 14
Errore nel processare trained_44.jpg: Input image height 1152 is not a multiple of patch height 14
Errore nel processare trained_45.jpg: Input image height 1153 is not a multiple of patch height 14
Errore nel processare trained_46.jpg: Input image height 1153 is not a multiple of patch height 14
Errore nel processare trained_47.jpg: Input image height 1152 is not a multiple of patch height 14
Errore nel processare trained_48.jpg: Input image height 1153 is not a multiple of patch height 14
Errore nel processare trained_49.jpg: Input image height 1153 is not a multiple of patch height 14
Errore nel