# Dependencies

In [2]:
# !pip install ultralytics
from ultralytics import YOLO
from PIL import Image
import matplotlib.pyplot as plt

In [3]:
model = YOLO('yolov8s-worldv2.pt')  # You can use 'yolov8.pt' or another YOLO version

# Input

In [4]:
import pandas as pd
train_df = pd.read_csv('./NYC_500.csv')

### Load amenities from certain region

In [5]:
import osmnx as ox

# Define the bounding box or location
place_name = "New York City, USA"  # Example area
tags = {'amenity': ['post_box', 'bicycle_parking', 'bench', 'fast_food', 'waste_basket']}  # Fetch all amenities

# Fetch amenities in the defined area
gdf = ox.features_from_place(place_name, tags)

# Extract latitude and longitude from the geometries
gdf['latitude'] = gdf.geometry.centroid.y
gdf['longitude'] = gdf.geometry.centroid.x

# Filter for relevant columns
amenities_lat_lon_df = gdf[['amenity', 'latitude', 'longitude']]

# Display the DataFrame
print(amenities_lat_lon_df)

  gdf.loc[:, "geometry"] = gdf["geometry"].make_valid()

  gdf['latitude'] = gdf.geometry.centroid.y

  gdf['longitude'] = gdf.geometry.centroid.x


                            amenity   latitude  longitude
element id                                               
node    42538083              bench  40.673352 -73.970702
        349323821         fast_food  40.762515 -73.976690
        357588583   bicycle_parking  40.661771 -73.992858
        357618584          post_box  40.682496 -73.962688
        357618608          post_box  40.643833 -73.979460
...                             ...        ...        ...
way     1319909118            bench  40.720378 -74.010841
        1319909119            bench  40.720343 -74.010985
        1319909120            bench  40.720334 -74.010894
        1319909121            bench  40.720327 -74.010823
        1340286561        fast_food  40.707104 -73.954532

[35912 rows x 3 columns]


In [None]:
# import pandas as pd
# from geopy.distance import geodesic

# # Initialize an empty list to store the results
# result_list = []

# # Iterate over the rows of the train_df DataFrame
# for _, row in train_df.iterrows():
#     picture_coords = (row['lat'], row['lon'])
    
#     # Filter amenities within 30 meters radius
#     amenities_in_radius = amenities_lat_lon_df[
#         amenities_lat_lon_df.apply(
#             lambda x: geodesic(picture_coords, (x['latitude'], x['longitude'])).meters <= 30,
#             axis=1
#         )
#     ]
    
#     # Create a list of amenities with their details
#     amenities_list = [
#         {
#             'amenity': amenity_row['amenity'],
#             'latitude': amenity_row['latitude'],
#             'longitude': amenity_row['longitude']
#         }
#         for _, amenity_row in amenities_in_radius.iterrows()
#     ]
    
#     # Append the results to the result_list
#     result_list.append({
#         'id': row['bubbleId'],
#         'latitude': row['lat'],
#         'longitude': row['lon'],
#         'amenities': amenities_list
#     })

# # Create a new DataFrame from the results
# final_df = pd.DataFrame(result_list)

# # Display the resulting DataFrame
# print(final_df)


KeyboardInterrupt: 

In [None]:
import pandas as pd
from geopy.distance import geodesic

# Initialize an empty list to store the results
result_list = []

# Iterate over the rows of the train_df DataFrame
for _, row in train_df.iterrows():
    picture_coords = (row['lat'], row['lon'])
    
    # Filter amenities within 30 meters radius
    amenities_in_radius = amenities_lat_lon_df[
        amenities_lat_lon_df.apply(
            lambda x: geodesic(picture_coords, (x['latitude'], x['longitude'])).meters <= 30,
            axis=1
        )
    ]
    
    # Create a list of amenities with their details
    amenities_list = [
        {
            'amenity': amenity_row['amenity'],
            'latitude': amenity_row['latitude'],
            'longitude': amenity_row['longitude']
        }
        for _, amenity_row in amenities_in_radius.iterrows()
    ]
    
    # Append the result only if amenities are present
    if amenities_list:
        result_list.append({
            'id': row['bubbleId'],
            'latitude': row['lat'],
            'longitude': row['lon'],
            'amenities': amenities_list
        })

# Create a new DataFrame from the results
final_df = pd.DataFrame(result_list)

# Display the resulting DataFrame
print(final_df)



KeyboardInterrupt: 

## Amenities with distances :

In [20]:
from geopy.distance import geodesic
import pandas as pd

# Initialize a list to store the results
distance_results = []

# Iterate over each row in the DataFrame
for _, row in final_df.iterrows():
    # Get the ID point coordinates
    id_coords = (row['latitude'], row['longitude'])
    
    # Prepare a list to store amenities with distances
    amenities_with_distances = []
    
    # Iterate over the amenities for this ID
    for amenity in row['amenities']:
        amenity_coords = (amenity['latitude'], amenity['longitude'])
        distance = geodesic(id_coords, amenity_coords).meters  # Calculate distance in meters
        
        # Add distance information to the amenity dictionary
        amenities_with_distances.append({
            'amenity': amenity['amenity'],
            'latitude': amenity['latitude'],
            'longitude': amenity['longitude'],
            'distance_from_id': distance  # Add distance value
        })
    
    # Append the updated row to the results
    distance_results.append({
        'latitude': row['latitude'],
        'longitude': row['longitude'],
        'id': row['id'],
        'amenities': amenities_with_distances
    })

# Create a new DataFrame with distances
final_with_distances_df = pd.DataFrame(distance_results)

# Display the updated DataFrame
print(final_with_distances_df)


      latitude  longitude                id  \
0    40.777104 -73.963860  1033310130003330   
1    40.763575 -73.977703  1033303222012112   
2    40.709206 -74.017376   210002012111211   
3    40.736916 -74.001339  1101202230002123   
4    40.747358 -74.005428   210001333221122   
..         ...        ...               ...   
149  40.746594 -73.982245  1033312220332010   
150  40.761672 -73.976693   210003303023110   
151  40.758504 -73.970898   210003301111123   
152  40.707779 -74.007839  1101202312120133   
153  40.711888 -74.003873  1101123003022331   

                                             amenities  
0    [{'amenity': 'waste_basket', 'latitude': 40.77...  
1    [{'amenity': 'fast_food', 'latitude': 40.76360...  
2    [{'amenity': 'bench', 'latitude': 40.7093396, ...  
3    [{'amenity': 'waste_basket', 'latitude': 40.73...  
4    [{'amenity': 'bench', 'latitude': 40.7473199, ...  
..                                                 ...  
149  [{'amenity': 'bicycle_parking',

In [22]:
final_with_distances_df.to_csv("Distances_etc.csv", index=False)

# Model

In [None]:
from ultralytics import YOLO
import pandas as pd
import os

# Load the YOLO model
model = YOLO('yolov8s-worldv2.pt')  # Use the specialized model

# Paths to folders
folders = ['front', 'left', 'back', 'right']
base_folder = "./NYC_500/"  # Adjust this to your folder structure
folder_paths = {f: os.path.join(base_folder, f) for f in folders}

# Target amenities
target_amenities = {'bench', 'bicycle', 'parking_meter', 'trash_bin', 'fast_food'}

# Results storage
detection_results = []

# Iterate over IDs in the dataset
for _, row in final_with_distances_df.iterrows():
    image_id = row['id']
    
    # Iterate over the four folders
    for folder, folder_path in folder_paths.items():
        image_path = os.path.join(folder_path, f"{image_id}.jpg")  # Path to the image
        
        # Check if the image exists
        if os.path.exists(image_path):
            # Run YOLO inference
            results = model(image_path)
            
            # Process detection results
            for result in results:
                for box in result.boxes.data.tolist():
                    class_id = int(box[5])  # Class ID
                    label = model.names[class_id]  # Get label from class ID
                    
                    if label in target_amenities:
                        detection_results.append({
                            'id': image_id,
                            'folder': folder,  # Indicate the source folder
                            'label': label,
                            'confidence': box[4],  # Confidence score
                            'x_min': box[0],
                            'y_min': box[1],
                            'x_max': box[2],
                            'y_max': box[3]
                        })

# Convert results to a DataFrame
detection_df = pd.DataFrame(detection_results)

# Save or display the results
print(detection_df)
# detection_df.to_csv("detection_results_by_folder.csv", index=False)

In [10]:
from ultralytics import YOLO
import pandas as pd
import os
import json

# Load the YOLO model
model = YOLO('yolov8s-worldv2.pt')  # Use the specialized model

#Load Dataset
final_with_distances_df = pd.read_csv("./Distances_etc.csv")

# Paths to folders
folders = ['front', 'left', 'back', 'right']
base_folder = "./NYC_500/"  # Adjust this to your folder structure
folder_paths = {f: os.path.join(base_folder, f) for f in folders}

# Define suffixes for each folder
suffixes = {
    'front': '01_x2.jpg',
    'left': '10_x2.jpg',
    'back': '03_x2.jpg',
    'right': '02_x2.jpg'
}

# # Target amenities
target_amenities = {'bench', 'bicycle_parking', 'post_box', 'waste_basket', 'fast_food', 'bicycle'}

# Results storage
detection_results = []

# Iterate over IDs in the dataset
for _, row in final_with_distances_df.iterrows():
    image_id = row['id']
    
    # Iterate over the four folders
    for folder, suffix in suffixes.items():
        # Construct the image filename with suffix
        image_filename = f"{image_id}{suffix}"
        image_path = os.path.join(folder_paths[folder], image_filename)  # Path to the image
        
        # Check if the image exists
        if os.path.exists(image_path):
            # Run YOLO inference
            results = model(image_path)
            
            # Process detection results
            for result in results:
                for box in result.boxes.data.tolist():
                    class_id = int(box[5])  # Class ID
                    label = model.names[class_id]  # Get label from class ID
                    
                    if label in target_amenities:
                        detection_results.append({
                            'id': image_id,
                            'folder': folder,  # Indicate the source folder
                            'label': label,
                            'confidence': box[4],  # Confidence score
                            'x_min': box[0],
                            'y_min': box[1],
                            'x_max': box[2],
                            'y_max': box[3]
                        })

# Convert results to a DataFrame
detection_df = pd.DataFrame(detection_results)

# Save or display the results
print(detection_df)
detection_df.to_csv("detection_results_by_folder.csv", index=False)



image 1/1 e:\Data5M\NYC_500\front\103331013000333001_x2.jpg: 640x640 1 car, 1 toilet, 26.0ms
Speed: 5.0ms preprocess, 26.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 e:\Data5M\NYC_500\left\103331013000333010_x2.jpg: 640x640 2 buss, 27.0ms
Speed: 4.0ms preprocess, 27.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 e:\Data5M\NYC_500\back\103331013000333003_x2.jpg: 640x640 4 cars, 1 bus, 1 truck, 1 traffic light, 28.0ms
Speed: 4.0ms preprocess, 28.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 e:\Data5M\NYC_500\right\103331013000333002_x2.jpg: 640x640 3 persons, 1 car, 1 bus, 1 traffic light, 23.0ms
Speed: 4.0ms preprocess, 23.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 e:\Data5M\NYC_500\front\103330322201211201_x2.jpg: 640x640 2 cars, 1 toilet, 14.0ms
Speed: 4.0ms preprocess, 14.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

image

# Inference