# Landmarks Data Preparation

This script finds distances between landmarks in landmarks_manual.csv and the cameras that saw them. It also labels each landmark with what pixel those landmarks would appear in within the preprocessed image fed into SmokeyNet. The purpose is to prepare data to be used for testing as tiles are used as inputs for location_estimator, not pixel coordinates. This should be run whenever a new landmark is added to landmarks_manual.csv.

## Import Packages

In [2]:
import pandas as pd
from haversine import haversine, Unit
import cv2

ModuleNotFoundError: No module named 'pandas'

## Find Landmark Distances

In [3]:
lm_df = pd.read_csv("../../../data/raw/landmarks_manual.csv")
cam_meta = pd.read_csv("../../../data/processed/camera_metadata_hpwren.csv")

lm_df = lm_df.merge(cam_meta[['camera_abbrev', 'direction', 'lat', 'long']], left_on=['camera_abbrev', 'direction'], right_on=['camera_abbrev', 'direction'], how='left')
lm_df = lm_df.drop_duplicates().reset_index(drop=True)

lm_df['distance'] = lm_df.apply(lambda x: haversine((x['lat_x'], x['long_y']), (x['lat_y'], x['long_y'])), axis=1)
lm_df = lm_df.rename(columns={"lat_x":"lat", "long_x":"long"})
del lm_df['lat_y']
del lm_df['long_y']

## Find Tile For Each Landmark

In [4]:
def find_tile(x_pix, y_pix, x_tiles = 9, y_tiles = 5, img_size = (3072, 2048), dsize = (1856, 1392), cropped_size = (1856, 1040), tile_size = (224, 224), overlap = 20):
    x_ratio = img_size[0]/dsize[0]
    y_ratio = img_size[1]/dsize[1]
    step_x = (tile_size[0]/2) * x_ratio
    step_y = (tile_size[1]/2) * y_ratio
    y_start = img_size[1] - (img_size[1] * cropped_size[1]/dsize[1])

    x_centers = [step_x + i*(step_x*2 - overlap*x_ratio) for i in range(x_tiles)]
    y_centers = [step_y + y_start + i*(step_y*2 - overlap*y_ratio) for i in range(y_tiles)]
    
    x_scores = [abs(x_cent - x_pix) for x_cent in x_centers]
    y_scores = [abs(y_cent - y_pix) for y_cent in y_centers]
    
    return (y_scores.index(min(y_scores))*(x_tiles)) + (x_scores.index(min(x_scores)))

In [5]:
lm_df['tile'] = lm_df.apply(lambda x: find_tile(x['x_pixel'], x['y_pixel'], img_size = (x['x_res'], x['y_res'])), axis=1)
lm_df.head()

Unnamed: 0,landmark,camera_abbrev,direction,lat,long,x_pixel,y_pixel,x_res,y_res,intersection,distance,tile
0,white building - southwest,om,south,32.593892,-116.845919,2900,1715,3072,2048,0,0.096758,35
1,white building - southwest,om,west,32.593892,-116.845919,423,1685,3072,2048,0,0.096758,28
2,white building - south,om,south,32.594298,-116.844774,1780,1930,3072,2048,0,0.051656,41
3,communication tower building - east,om,north,32.595487,-116.844141,2545,1725,3072,2048,0,0.08056,34
4,large bush,om,east,32.599053,-116.839254,318,1420,3072,2048,0,0.477133,18


## Write New Data Back Out to Dataset

In [7]:
lm_df.to_csv("../../../data/raw/landmarks_manual.csv", index=False)