# Dependencies

In [33]:
# !pip install ultralytics
from ultralytics import YOLO
from PIL import Image
import matplotlib.pyplot as plt
import os
import pandas as pd

In [22]:
model = YOLO('yolov8s-worldv2.pt')  # You can use 'yolov8.pt' or another YOLO version

# Input

In [23]:
import pandas as pd
train_df = pd.read_csv('./NYC_500.csv')

### Load amenities from certain region

In [24]:
import osmnx as ox

# Define the bounding box or location
place_name = "New York City, USA"  # Example area
tags = {'amenity': ['post_box', 'bicycle_parking', 'bench', 'fast_food', 'waste_basket']}  # Fetch all amenities

# Fetch amenities in the defined area
gdf = ox.features_from_place(place_name, tags)

# Extract latitude and longitude from the geometries
gdf['latitude'] = gdf.geometry.centroid.y
gdf['longitude'] = gdf.geometry.centroid.x

# Filter for relevant columns
amenities_lat_lon_df = gdf[['amenity', 'latitude', 'longitude']]

# Display the DataFrame
print(amenities_lat_lon_df)


  gdf['latitude'] = gdf.geometry.centroid.y


                                 amenity   latitude  longitude
element_type osmid                                            
node         42538083              bench  40.673352 -73.970702
             349323821         fast_food  40.762515 -73.976690
             357588583   bicycle_parking  40.661771 -73.992858
             357618584          post_box  40.682496 -73.962688
             357618608          post_box  40.643833 -73.979460
...                                  ...        ...        ...
way          1340286561        fast_food  40.707104 -73.954532
             1340573916        fast_food  40.692865 -73.757128
             1342240422  bicycle_parking  40.723798 -74.007727
relation     4549669           fast_food  40.709871 -73.859180
             17722706              bench  40.753779 -74.007661

[35988 rows x 3 columns]



  gdf['longitude'] = gdf.geometry.centroid.x


In [25]:
# import pandas as pd
# from geopy.distance import geodesic

# # Initialize an empty list to store the results
# result_list = []

# # Iterate over the rows of the train_df DataFrame
# for _, row in train_df.iterrows():
#     picture_coords = (row['lat'], row['lon'])
    
#     # Filter amenities within 30 meters radius
#     amenities_in_radius = amenities_lat_lon_df[
#         amenities_lat_lon_df.apply(
#             lambda x: geodesic(picture_coords, (x['latitude'], x['longitude'])).meters <= 30,
#             axis=1
#         )
#     ]
    
#     # Create a list of amenities with their details
#     amenities_list = [
#         {
#             'amenity': amenity_row['amenity'],
#             'latitude': amenity_row['latitude'],
#             'longitude': amenity_row['longitude']
#         }
#         for _, amenity_row in amenities_in_radius.iterrows()
#     ]
    
#     # Append the results to the result_list
#     result_list.append({
#         'id': row['bubbleId'],
#         'latitude': row['lat'],
#         'longitude': row['lon'],
#         'amenities': amenities_list
#     })

# # Create a new DataFrame from the results
# final_df = pd.DataFrame(result_list)

# # Display the resulting DataFrame
# print(final_df)


In [26]:
import pandas as pd
from geopy.distance import geodesic
import math

result_list = []

# Define approximate degree deltas for ~30 meters:
# Latitude: ~111 km per degree, so 30 m ~ 0.00027 degrees
lat_delta = 0.0003  # A slightly larger buffer than 0.00027 for safety

# We'll dynamically compute longitude delta to be safe:
# longitude degrees change with latitude, so we compute for each point.
def get_lon_delta(lat):
    return 0.0003 / math.cos(math.radians(lat))  # small approximate buffer

for index, row in train_df.iterrows():
    pic_lat = row['lat']
    pic_lon = row['lon']

    # Compute bounding box around the picture
    lon_delta = get_lon_delta(pic_lat)
    lat_min = pic_lat - lat_delta
    lat_max = pic_lat + lat_delta
    lon_min = pic_lon - lon_delta
    lon_max = pic_lon + lon_delta

    # Filter amenities by bounding box
    subset = amenities_lat_lon_df[
        (amenities_lat_lon_df['latitude'] >= lat_min) &
        (amenities_lat_lon_df['latitude'] <= lat_max) &
        (amenities_lat_lon_df['longitude'] >= lon_min) &
        (amenities_lat_lon_df['longitude'] <= lon_max)
    ]
    
    # Now compute geodesic distance on this filtered subset
    pic_coords = (pic_lat, pic_lon)
    amenities_in_radius = []
    for _, amenity_row in subset.iterrows():
        amenity_coords = (amenity_row['latitude'], amenity_row['longitude'])
        dist = geodesic(pic_coords, amenity_coords).meters
        if dist <= 30:
            amenities_in_radius.append({
                'amenity': amenity_row['amenity'],
                'latitude': amenity_row['latitude'],
                'longitude': amenity_row['longitude']
            })
    
    if amenities_in_radius:
        result_list.append({
            'id': row['bubbleId'],
            'latitude': pic_lat,
            'longitude': pic_lon,
            'amenities': amenities_in_radius
        })

# Create a new DataFrame from the results
final_df = pd.DataFrame(result_list)
print(final_df)


                   id   latitude  longitude  \
0    1033310130003330  40.777104 -73.963860   
1    1033303222012112  40.763575 -73.977703   
2     210002012111211  40.709206 -74.017376   
3    1101202230002123  40.736916 -74.001339   
4     210001333221122  40.747358 -74.005428   
..                ...        ...        ...   
149  1033312220332010  40.746594 -73.982245   
150   210003303023110  40.761672 -73.976693   
151   210003301111123  40.758504 -73.970898   
152  1101202312120133  40.707779 -74.007839   
153  1101123003022331  40.711888 -74.003873   

                                             amenities  
0    [{'amenity': 'waste_basket', 'latitude': 40.77...  
1    [{'amenity': 'fast_food', 'latitude': 40.76360...  
2    [{'amenity': 'bench', 'latitude': 40.7093396, ...  
3    [{'amenity': 'waste_basket', 'latitude': 40.73...  
4    [{'amenity': 'bench', 'latitude': 40.7473199, ...  
..                                                 ...  
149  [{'amenity': 'bicycle_parking',

## Amenities with distances :

In [27]:
from geopy.distance import geodesic
import pandas as pd

# Initialize a list to store the results
distance_results = []

# Iterate over each row in the DataFrame
for _, row in final_df.iterrows():
    # Get the ID point coordinates
    id_coords = (row['latitude'], row['longitude'])
    
    # Prepare a list to store amenities with distances
    amenities_with_distances = []
    
    # Iterate over the amenities for this ID
    for amenity in row['amenities']:
        amenity_coords = (amenity['latitude'], amenity['longitude'])
        distance = geodesic(id_coords, amenity_coords).meters  # Calculate distance in meters
        
        # Add distance information to the amenity dictionary
        amenities_with_distances.append({
            'amenity': amenity['amenity'],
            'latitude': amenity['latitude'],
            'longitude': amenity['longitude'],
            'distance_from_id': distance  # Add distance value
        })
    
    # Append the updated row to the results
    distance_results.append({
        'latitude': row['latitude'],
        'longitude': row['longitude'],
        'id': row['id'],
        'amenities': amenities_with_distances
    })

# Create a new DataFrame with distances
final_with_distances_df = pd.DataFrame(distance_results)

# Display the updated DataFrame
print(final_with_distances_df)


      latitude  longitude                id  \
0    40.777104 -73.963860  1033310130003330   
1    40.763575 -73.977703  1033303222012112   
2    40.709206 -74.017376   210002012111211   
3    40.736916 -74.001339  1101202230002123   
4    40.747358 -74.005428   210001333221122   
..         ...        ...               ...   
149  40.746594 -73.982245  1033312220332010   
150  40.761672 -73.976693   210003303023110   
151  40.758504 -73.970898   210003301111123   
152  40.707779 -74.007839  1101202312120133   
153  40.711888 -74.003873  1101123003022331   

                                             amenities  
0    [{'amenity': 'waste_basket', 'latitude': 40.77...  
1    [{'amenity': 'fast_food', 'latitude': 40.76360...  
2    [{'amenity': 'bench', 'latitude': 40.7093396, ...  
3    [{'amenity': 'waste_basket', 'latitude': 40.73...  
4    [{'amenity': 'bench', 'latitude': 40.7473199, ...  
..                                                 ...  
149  [{'amenity': 'bicycle_parking',

In [28]:
print(str(final_with_distances_df.amenities[0]))

[{'amenity': 'waste_basket', 'latitude': 40.7769217, 'longitude': -73.9640435, 'distance_from_id': 25.49109292574657}]


# Model

In [29]:
from transformers import AutoProcessor, AutoModelForCausalLM
import requests
import copy
import torch

In [30]:
model_id = 'microsoft/Florence-2-large-ft'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).eval()
model = model.to(device)
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
device

device(type='cpu')

In [31]:
def run_inference(image, task_prompt, text_input=None):
    if text_input is None:
        prompt = task_prompt
    else:
        prompt = task_prompt + text_input
    inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
    generated_ids = model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        early_stopping=False,
        do_sample=False,
        num_beams=3,
    )
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = processor.post_process_generation(
        generated_text,
        task=task_prompt,
        image_size=(image.width, image.height)
    )
    return parsed_answer

In [38]:
suffixes = {
    'front': '01_x2.jpg',
    'left': '10_x2.jpg',
    'back': '03_x2.jpg',
    'right': '02_x2.jpg'
}
# Base folder and subfolders
folders = ['front', 'left', 'back', 'right']
base_folder = "./NYC_500/"  # Adjust this path
folder_paths = {f: os.path.join(base_folder, f) for f in folders}

detection_results = []

task_prompt = '<OPEN_VOCABULARY_DETECTION>'

for _, row in final_with_distances_df.iterrows():
    image_id = row['id']

    # The "amenities" column is assumed to be a list of dicts like:
    # [{'amenity': 'waste_basket', 'latitude': ..., 'longitude': ..., 'distance_from_id': ...}, ... ]
    amenities_list = row['amenities']

    # Extract just the amenity names
    amenities_to_detect = [d['amenity'] for d in amenities_list if 'amenity' in d]

    # Skip if there are no amenities to detect
    if not amenities_to_detect:
        continue

    # Construct the text input (e.g. "waste_basket bench bicycle_parking ...")
    text_input = " ".join(amenities_to_detect)

    for folder, suffix in suffixes.items():
        # Construct image path
        image_filename = f"{image_id}{suffix}"
        image_path = os.path.join(folder_paths[folder], image_filename)

        # Check if the file exists
        if not os.path.exists(image_path):
            continue

        # Load image
        with Image.open(image_path) as img:
            # Run Florence 2 inference
            parsed_answer = run_inference(img, task_prompt, text_input=text_input)

            # Extract the open vocabulary detection data
            ovd_data = parsed_answer.get("<OPEN_VOCABULARY_DETECTION>", {})

            # Focus on bboxes and bboxes_labels only
            bboxes = ovd_data.get("bboxes", [])
            bboxes_labels = ovd_data.get("bboxes_labels", [])

            # If the length of bboxes and bboxes_labels are mismatched, handle or skip
            if len(bboxes) != len(bboxes_labels):
                print(f"Warning: Mismatched bboxes and labels for {image_id} in {folder}.")
                continue

            # Go through each bounding box
            for bbox, label in zip(bboxes, bboxes_labels):
                # bbox is [x_min, y_min, x_max, y_max]
                x_min, y_min, x_max, y_max = bbox

                # Append to detection results
                detection_results.append({
                    'id': image_id,
                    'folder': folder,
                    'label': label,
                    'confidence': None,  # If Florence doesn't provide a confidence score
                    'x_min': x_min,
                    'y_min': y_min,
                    'x_max': x_max,
                    'y_max': y_max
                })

# Convert detection results to a DataFrame
detection_df = pd.DataFrame(detection_results)

# Display or save the detection results
print(detection_df)

                   id folder  \
0    1101203010232100   back   
1    1101203010232100  right   
2    1033303303313002   back   
3    1101203010312330  front   
4    1101203010312330   left   
..                ...    ...   
97   1101202312120133   back   
98   1101123003022331  front   
99   1101123003022331   left   
100  1101123003022331   back   
101  1101123003022331  right   

                                                 label confidence       x_min  \
0                                   bench waste_basket       None  120.576004   
1                                   bench waste_basket       None  434.432007   
2    bicycle_parking post_box waste_basket bicycle_...       None  136.448013   
3    bench bench bench bench waste_basket bench ben...       None   31.488001   
4    bench bench bench bench waste_basket bench ben...       None  327.936005   
..                                                 ...        ...         ...   
97   waste_basket waste_basket bench bench bench

In [44]:
detection_df

Unnamed: 0,id,folder,label,confidence,x_min,y_min,x_max,y_max
0,1101203010232100,back,bench waste_basket,,120.576004,430.336029,375.552032,511.232025
1,1101203010232100,right,bench waste_basket,,434.432007,298.240021,472.320007,325.888000
2,1033303303313002,back,bicycle_parking post_box waste_basket bicycle_...,,136.448013,446.720032,409.856018,511.232025
3,1101203010312330,front,bench bench bench bench waste_basket bench ben...,,31.488001,314.624023,110.848007,365.824005
4,1101203010312330,left,bench bench bench bench waste_basket bench ben...,,327.936005,366.336029,511.232025,511.232025
...,...,...,...,...,...,...,...,...
97,1101202312120133,back,waste_basket waste_basket bench bench bench wa...,,121.600006,395.520020,511.232025,511.232025
98,1101123003022331,front,bench,,257.792023,272.640015,272.128021,284.928009
99,1101123003022331,left,bench,,0.256000,312.576019,263.936005,371.968018
100,1101123003022331,back,bench,,314.624023,279.808014,371.968018,307.456024


In [None]:
import os
import matplotlib.pyplot as plt
from PIL import Image

# The suffixes you provided for each folder (direction)
suffixes = {
    'front': '01_x2.jpg',
    'left': '10_x2.jpg',
    'back': '03_x2.jpg',
    'right': '02_x2.jpg'
}

def plot_all_detections(detection_df, base_folder="./NYC_500/"):
    """
    Plots all bounding box detections from detection_df using Matplotlib.
    Each row in detection_df must have these columns:
      - id
      - folder (one of: front, left, back, right)
      - label
      - confidence
      - x_min, y_min, x_max, y_max

    base_folder is where your directional subfolders (front, left, back, right) live.
    """
    # Group the DataFrame by (id, folder) so that we can plot all boxes
    # belonging to the same image on a single figure.
    grouped = detection_df.groupby(['id', 'folder'], as_index=False)

    for (image_id, folder), group_rows in grouped:
        # Build the full path to the image, using the suffixes dict
        suffix = suffixes.get(folder, "")
        image_filename = f"{image_id}{suffix}"
        image_path = os.path.join(base_folder, folder, image_filename)

        # Check if the image exists
        if not os.path.exists(image_path):
            print(f"Image not found: {image_path}")
            continue

        # Load the image
        img = Image.open(image_path)
        fig, ax = plt.subplots(figsize=(10, 8))
        ax.imshow(img)

        # Plot each detection (bounding box)
        for _, row in group_rows.iterrows():
            x_min, y_min, x_max, y_max = row['x_min'], row['y_min'], row['x_max'], row['y_max']

            # Draw the rectangle
            rect = plt.Rectangle(
                (x_min, y_min),
                (x_max - x_min),
                (y_max - y_min),
                fill=False,
                edgecolor='red',
                linewidth=2
            )
            ax.add_patch(rect)

            # Create a label string
            label_str = str(row['label'])
            if row['confidence'] is not None:
                # In case your confidence is None or not numeric, handle gracefully
                try:
                    label_str += f" ({float(row['confidence']):.2f})"
                except ValueError:
                    # Confidence not numeric
                    pass

            # Place label text just above bounding box
            ax.text(
                x_min,
                y_min - 5,
                label_str,
                color='red',
                fontsize=10,
                bbox=dict(facecolor='yellow', alpha=0.5, edgecolor='none')
            )

        # Optional title
        plt.title(f"Detections for ID = {image_id}, Folder = {folder}")
        plt.axis('off')
        plt.show()


plot_all_detections(detection_df, base_folder="./NYC_500/")

### YOLO World Model (deprecated)

In [None]:
from ultralytics import YOLO
import pandas as pd
import os

# Load the YOLO model
model = YOLO('yolov8s-worldv2.pt')  # Use the specialized model

# Define suffixes for each folder
suffixes = {
    'front': '01_x2.jpg',
    'left': '10_x2.jpg',
    'back': '03_x2.jpg',
    'right': '02_x2.jpg'
}

# Paths to folders
folders = ['front', 'left', 'back', 'right']
base_folder = "./NYC_500/"  # Adjust this to your folder structure
folder_paths = {f: os.path.join(base_folder, f) for f in folders}

# Target amenities
target_amenities = {'post_box', 'bicycle_parking', 'bench', 'waste_basket'}

# Results storage
detection_results = []

# Iterate over IDs in the dataset
for _, row in final_with_distances_df.iterrows():
    image_id = row['id']
    
    for folder, suffix in suffixes.items():
        # Construct the image filename with suffix
        image_filename = f"{image_id}{suffix}"
        image_path = os.path.join(folder_paths[folder], image_filename)  # Path to the image
        
        # Check if the image exists
        if os.path.exists(image_path):
            # Run YOLO inference
            results = model(image_path)
            
            # Process detection results
            for result in results:
                for box in result.boxes.data.tolist():
                    class_id = int(box[5])  # Class ID
                    label = model.names[class_id]  # Get label from class ID
                    
                    if label in target_amenities:
                        detection_results.append({
                            'id': image_id,
                            'folder': folder,  # Indicate the source folder
                            'label': label,
                            'confidence': box[4],  # Confidence score
                            'x_min': box[0],
                            'y_min': box[1],
                            'x_max': box[2],
                            'y_max': box[3]
                        })

# Convert results to a DataFrame
detection_df = pd.DataFrame(detection_results)

# Save or display the results
print(detection_df)

In [34]:
print(detection_df)

                 id folder  label  confidence       x_min       y_min  \
0  1033312313110312   left  bench    0.370045  448.528168  301.353638   
1  1033321030321131   back  bench    0.509984  338.523834  343.717712   
2  1033312221032230  right  bench    0.308314   89.116806  320.609619   
3  1033302311321313   left  bench    0.591969    0.000000  316.001801   
4  1033302311321313   back  bench    0.421387  450.672546  314.855133   
5  1101203010311011  right  bench    0.292347  364.066101  341.255371   
6  1033311311331233  right  bench    0.264469   14.500369  292.043610   
7  1033303100003211  right  bench    0.520286  143.815155  308.001282   

        x_max       y_max  
0  482.541443  325.961548  
1  399.499603  381.025726  
2  121.303612  337.528137  
3   21.816593  346.805328  
4  512.000000  344.377533  
5  446.445007  482.324127  
6   40.253227  319.526703  
7  194.154160  322.889587  


# Inference