# Preparing the Data Before Object Detection

## Setup Data Pre-processing


In [1]:
from PIL import Image
import matplotlib.pyplot as plt
import os
import pandas as pd
import osmnx as ox
from shapely.geometry import Point
import math

In [2]:
train_df_1 = pd.read_csv('./G340890_50m_panoid(2).csv')
train_df_1

Unnamed: 0,panoid,lat,lon,var1,heading,var3,var4,year,month
0,004g0nBjiAoWzLLLCT252A,40.862831,-73.907227,33.506706,271.509120,88.789510,1.265002,2022,11
1,004WOqNXaDG1GWxw1aq0kA,40.880501,-73.900636,7.838017,24.733295,90.740005,0.193049,2019,8
2,005FEssqWDoX8qyMsk-7ig,40.854582,-73.854778,30.942800,149.051450,88.802890,1.954567,2015,10
3,005Gu_dGcXTN_PuTJgNAuw,40.899717,-73.843935,36.128685,71.163790,94.162575,359.729920,2018,9
4,005NcqQBhcNxQxN57-zbjQ,40.778504,-73.925649,47.360750,329.386470,91.243830,359.905270,2011,9
...,...,...,...,...,...,...,...,...,...
341848,ZzZOHUzJ5GU8QpQXNpMElA,40.814607,-73.926367,3.481557,299.488500,89.482290,0.835001,2022,10
341849,ZZzsNFVbkbO66_wiCP4dcg,40.817899,-73.916987,12.972485,200.212940,91.807170,1.023868,2017,10
341850,zZzsPdZ9PjRdLSjGvnxYNA,40.850479,-73.883429,25.637087,110.750470,89.122990,357.758760,2017,10
341851,zZZT2DAeoaAGtijlNHq2nQ,40.831027,-73.832043,5.560051,230.695860,90.181076,358.741300,2021,9


## Sample Data

In [3]:
train_df = train_df_1.iloc[:1000].copy()
train_df = train_df.drop(columns = ["var1","var3","var4", "year", "month"])
train_df

Unnamed: 0,panoid,lat,lon,heading
0,004g0nBjiAoWzLLLCT252A,40.862831,-73.907227,271.509120
1,004WOqNXaDG1GWxw1aq0kA,40.880501,-73.900636,24.733295
2,005FEssqWDoX8qyMsk-7ig,40.854582,-73.854778,149.051450
3,005Gu_dGcXTN_PuTJgNAuw,40.899717,-73.843935,71.163790
4,005NcqQBhcNxQxN57-zbjQ,40.778504,-73.925649,329.386470
...,...,...,...,...
995,0AREivS7d4-CKwuGVhhr_A,40.841958,-73.845833,132.483170
996,0Ar-HhiEu76GcBuViO-CNw,40.822442,-73.889417,359.154750
997,0ariJjj0hpbGPh2ita081A,40.829600,-73.915229,23.549890
998,0aRlhbDTDXjTlmhYCoVFWA,40.815147,-73.849237,345.883060


## Fetching OSM Data for Traffic Singals and Stop Signs

In [4]:
# Define OSM tags for traffic signals and stop signs
tags = {'highway': ['traffic_signals', 'stop']}

# Fetch OSM features around the given place
place_name = "New York City, USA"
osm_data = ox.features_from_place(place_name, tags)

# Extract latitude, longitude, and amenity type from the OSM data
osm_data['latitude'] = osm_data.geometry.centroid.y
osm_data['longitude'] = osm_data.geometry.centroid.x
osm_data = osm_data[['highway', 'latitude', 'longitude']]
osm_data.head()


  osm_data['latitude'] = osm_data.geometry.centroid.y

  osm_data['longitude'] = osm_data.geometry.centroid.x


Unnamed: 0_level_0,Unnamed: 1_level_0,highway,latitude,longitude
element,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
node,42421728,traffic_signals,40.798048,-73.960044
node,42421731,traffic_signals,40.798654,-73.961474
node,42421737,traffic_signals,40.799244,-73.962873
node,42421741,traffic_signals,40.800429,-73.965691
node,42421745,traffic_signals,40.801398,-73.967996


## Defining Utility Functions to calculate Distances, Bearing and Relative Directions

### Distances between two points : 

In [5]:
# Haversine Distance calculation
def haversine(lat1, lon1, lat2, lon2):
    R = 6371e3  # Radius of Earth in meters
    phi1, phi2 = math.radians(lat1), math.radians(lat2)
    d_phi = math.radians(lat2 - lat1)
    d_lambda = math.radians(lon2 - lon1)
    a = math.sin(d_phi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(d_lambda / 2) ** 2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c

In [6]:
# GeoDesic distance calculation
from geopy.distance import geodesic

def calculate_geodesic_distance(lat1, lon1, lat2, lon2):
    """
    Calculate geodesic distance between two points (lat1, lon1) and (lat2, lon2).
    
    Args:
        lat1 (float): Latitude of the first point.
        lon1 (float): Longitude of the first point.
        lat2 (float): Latitude of the second point.
        lon2 (float): Longitude of the second point.
        
    Returns:
        float: Distance in meters between the two points.
    """
    point1 = (lat1, lon1)
    point2 = (lat2, lon2)
    return geodesic(point1, point2).meters

### Bearing : Computes the bearing (in degrees) between the reference point and each amenity using the longitude and latitude differences.

In [7]:
def calculate_bearing(lat1, lon1, lat2, lon2):
    d_lon = math.radians(lon2 - lon1)
    phi1, phi2 = math.radians(lat1), math.radians(lat2)
    y = math.sin(d_lon) * math.cos(phi2)
    x = math.cos(phi1) * math.sin(phi2) - math.sin(phi1) * math.cos(phi2) * math.cos(d_lon)
    bearing = math.degrees(math.atan2(y, x))
    return (bearing + 360) % 360

### Relative Direction based on Bearing, Heading and Camera FOV(60)

In [8]:
def relative_direction(bearing, heading, fov=60):
    lower_bound = (heading - fov / 2) % 360
    upper_bound = (heading + fov / 2) % 360

    if lower_bound <= bearing <= upper_bound:
        return 'front'
    elif (lower_bound - 180) % 360 <= bearing <= (upper_bound - 180) % 360:
        return 'back'
    elif bearing < lower_bound or bearing > upper_bound:
        return 'left'
    else:
        return 'right'

## Processing DataFrame to get Ground Truth

In [9]:
# ground_truths = []

# for idx, row in train_df.iterrows():
#     lat, lon, heading = row['lat'], row['lon'], row['heading']
    
#     nearby_amenities = []
#     for _, amenity in osm_data.iterrows():
#         amenity_lat, amenity_lon = amenity['latitude'], amenity['longitude']
#         distance = haversine(lat, lon, amenity_lat, amenity_lon)
        
#         if distance <= 30:  # Within 30 meters
#             bearing = calculate_bearing(lat, lon, amenity_lat, amenity_lon)
#             direction = relative_direction(bearing, heading)
#             nearby_amenities.append({
#                 'amenity': amenity['highway'],
#                 'latitude': amenity_lat,
#                 'longitude': amenity_lon,
#                 'distance': distance,
#                 'direction': direction
#             })
    
#     # Add to ground truth column
#     ground_truths.append(nearby_amenities)

# # Append ground truth to the original DataFrame
# train_df['ground_truth'] = ground_truths

In [12]:
# Using GeoDesic
ground_truths = []
for idx, row in train_df.iterrows():
    lat, lon, heading = row['lat'], row['lon'], row['heading']
    
    nearby_amenities = []
    for _, amenity in osm_data.iterrows():
        amenity_lat, amenity_lon = amenity['latitude'], amenity['longitude']
        distance = calculate_geodesic_distance(lat, lon, amenity_lat, amenity_lon)
        
        if distance <= 30:  # Within 30 meters
            bearing = calculate_bearing(lat, lon, amenity_lat, amenity_lon)  # Replace this with your existing bearing function
            direction = relative_direction(bearing, heading)  # Replace with your relative direction function
            nearby_amenities.append({
                'amenity': amenity['highway'],
                'latitude': amenity_lat,
                'longitude': amenity_lon,
                'distance': distance,
                'direction': direction
            })
    
    # Add to ground truth column
    ground_truths.append(nearby_amenities)

# Append ground truth to the original DataFrame
train_df['ground_truth'] = ground_truths

In [19]:
train_df.head(25)

Unnamed: 0,panoid,lat,lon,heading,ground_truth
0,004g0nBjiAoWzLLLCT252A,40.862831,-73.907227,271.50912,[]
1,004WOqNXaDG1GWxw1aq0kA,40.880501,-73.900636,24.733295,"[{'amenity': 'traffic_signals', 'latitude': 40..."
2,005FEssqWDoX8qyMsk-7ig,40.854582,-73.854778,149.05145,[]
3,005Gu_dGcXTN_PuTJgNAuw,40.899717,-73.843935,71.16379,[]
4,005NcqQBhcNxQxN57-zbjQ,40.778504,-73.925649,329.38647,[]
5,006Cxar1gyEQl27o7DV3yg,40.874148,-73.837941,338.3539,[]
6,006LADBKM1-MyIpjTaAUzQ,40.815919,-73.910161,90.40368,[]
7,007hvZzBhadNByfcR2jcrQ,40.871807,-73.888174,43.721085,[]
8,007K3-zfhsN3--DhrHfvWA,40.887121,-73.906492,16.769444,[]
9,009hzrAM3YZGq6wAXaNSsg,40.876852,-73.834052,8.579955,[]


In [20]:
# Specify the output file path
output_file = "train_with_ground_truth.csv"

# Save the DataFrame to a CSV file
# Ensure `index=False` to avoid saving the DataFrame index as a column
train_df.to_csv(output_file, index=False)

print(f"DataFrame saved to {output_file}")

DataFrame saved to train_with_ground_truth.csv


## Cleaning the Final Dataset

In [6]:
clean_df = pd.read_csv("./train_with_ground_truth.csv")
clean_df = clean_df[clean_df['ground_truth'].notna() & (clean_df['ground_truth'] != '') & (clean_df['ground_truth'].str.strip() != '')]
print(clean_df)

                     panoid        lat        lon     heading  \
0    004g0nBjiAoWzLLLCT252A  40.862831 -73.907227  271.509120   
1    004WOqNXaDG1GWxw1aq0kA  40.880501 -73.900636   24.733295   
2    005FEssqWDoX8qyMsk-7ig  40.854582 -73.854778  149.051450   
3    005Gu_dGcXTN_PuTJgNAuw  40.899717 -73.843935   71.163790   
4    005NcqQBhcNxQxN57-zbjQ  40.778504 -73.925649  329.386470   
..                      ...        ...        ...         ...   
995  0AREivS7d4-CKwuGVhhr_A  40.841958 -73.845833  132.483170   
996  0Ar-HhiEu76GcBuViO-CNw  40.822442 -73.889417  359.154750   
997  0ariJjj0hpbGPh2ita081A  40.829600 -73.915229   23.549890   
998  0aRlhbDTDXjTlmhYCoVFWA  40.815147 -73.849237  345.883060   
999  0aRoIlL3Q6kchtU-ONbk6A  40.860558 -73.914252   45.375774   

                                          ground_truth  
0                                                   []  
1    [{'amenity': 'traffic_signals', 'latitude': 40...  
2                                               

In [7]:
type(clean_df['ground_truth'])

pandas.core.series.Series

In [8]:
print(clean_df['ground_truth'].iloc[0])


[]


In [12]:
clean_df = clean_df[clean_df['ground_truth'] != '[]'].reset_index(drop=True)

In [13]:
clean_df

Unnamed: 0,panoid,lat,lon,heading,ground_truth
0,004WOqNXaDG1GWxw1aq0kA,40.880501,-73.900636,24.733295,"[{'amenity': 'traffic_signals', 'latitude': 40..."
1,00AH1hMHVhIKYF7Dn52gvQ,40.844090,-73.922783,15.979967,"[{'amenity': 'traffic_signals', 'latitude': 40..."
2,00BG2n9PnEFethnMWWu3Zg,40.832972,-73.861525,113.119350,"[{'amenity': 'traffic_signals', 'latitude': 40..."
3,00Dg2wxRvswsNV3cPJ3Fyg,40.839885,-73.909212,24.931100,"[{'amenity': 'stop', 'latitude': 40.8398157, '..."
4,00dWray38mckOF8zp5GdrQ,40.851627,-73.895493,197.573760,"[{'amenity': 'traffic_signals', 'latitude': 40..."
...,...,...,...,...,...
317,0A_-MT5rfMdCLbFLD0z85g,40.865135,-73.840472,90.195915,"[{'amenity': 'stop', 'latitude': 40.8650355, '..."
318,0an0NmniwBdkt3pBWUdbzQ,40.875100,-73.912424,121.658270,"[{'amenity': 'stop', 'latitude': 40.8751267, '..."
319,0ANdDUMOB1wT7twXrr_idQ,40.823026,-73.904487,106.163610,"[{'amenity': 'traffic_signals', 'latitude': 40..."
320,0aq-gOlForlcOnJAwPf-yg,40.828769,-73.888058,359.614720,"[{'amenity': 'traffic_signals', 'latitude': 40..."


In [14]:
# Specify the output file path
output_file = "train_with_ground_truth.csv"

# Save the DataFrame to a CSV file
# Ensure `index=False` to avoid saving the DataFrame index as a column
clean_df.to_csv(output_file, index=False)

print(f"DataFrame saved to {output_file}")

DataFrame saved to train_with_ground_truth.csv
