In [87]:
"""
** Crop Extractor for Project Sidewalk **

Given label metadata from the Project Sidewalk database, this script will
extract JPEG crops of the features that have been labeled. The required metadata
may be obtained by running the SQL query in "samples/getFullLabelList.sql" on the
Sidewalk database, and exporting the results in CSV format. You must supply the
path to the CSV file containing this data below. You can find an example of what
this file should look like in "samples/labeldata.csv".

Additionally, you should have downloaded original panorama
images from Street View using DownloadRunner.py. You will need to supply the
path to the folder containing these files.

"""
import pandas as pd
import csv
import GSVImage
import fnmatch
import logging
from utilities import *
from PIL import Image, ImageDraw
from matplotlib.pyplot import imshow, figure, gcf, gca, show
import numpy as np

import matplotlib.image as mpimg
from tqdm.auto import tqdm


# Mark the center of the crop?
mark_center = True

logging.basicConfig(filename='crop.log', level=logging.DEBUG)

try:
    from xml.etree import cElementTree as ET
except ImportError as e:
    from xml.etree import ElementTree as ET

def add_mark_to_image(x, y, img_path):
    image = mpimg.imread(img_path)
    pts = np.array([[x,y]])

    plt.imshow(image)
    plt.scatter(pts[:, 0], pts[:, 1], marker="x", color="red", s=200)
    plt.show()
    
    
def predict_crop_size(sv_image_y):
    """
    # Calculate distance from point to image center
    dist_to_center = math.sqrt((x-im_width/2)**2 + (y-im_height/2)**2)
    # Calculate distance from point to center of left edge
    dist_to_left_edge = math.sqrt((x-0)**2 + (y-im_height/2)**2)
    # Calculate distance from point to center of right edge
    dist_to_right_edge = math.sqrt((x - im_width) ** 2 + (y - im_height/2) ** 2)

    min_dist = min([dist_to_center, dist_to_left_edge, dist_to_right_edge])

    crop_size = (4.0/15.0)*min_dist + 200

    print("Min dist was "+str(min_dist))
    """
    crop_size = 0
    distance = max(0, 19.80546390 + 0.01523952 * sv_image_y)

    if distance > 0:
        crop_size = 8725.6 * (distance ** -1.192)
    if crop_size > 1500 or distance == 0:
        crop_size = 1500
    if crop_size < 50:
        crop_size = 50

    return crop_size

def make_single_crop(path_to_image, sv_image_x, sv_image_y, PanoYawDeg, output_filename, draw_mark=False):
    """
    Makes a crop around the object of interest
    :param path_to_image: where the GSV pano is stored
    :param sv_image_x: position
    :param sv_image_y: position
    :param PanoYawDeg: heading
    :param output_filename: name of file for saving
    :param draw_mark: if a dot should be drawn in the centre of the object/image
    :return: none
    """
    im = Image.open(path_to_image)
    draw = ImageDraw.Draw(im)

    im_width = im.size[0]
    im_height = im.size[1]
    print(im_width, im_height)

    predicted_crop_size = predict_crop_size(sv_image_y)
    crop_width = predicted_crop_size
    crop_height = predicted_crop_size

    print('x before scaling: ', sv_image_x)
    print('y before scaling: ', sv_image_y)
    # Work out scaling factor based on image dimensions
    scaling_factor = im_width / 13312
    sv_image_x *= scaling_factor
    sv_image_y *= scaling_factor

    print('x after scaling: ', sv_image_x)
    print('y after scaling: ', sv_image_y)
    
    
    x = ((float(PanoYawDeg) / 360) * im_width + sv_image_x) % im_width
    y = im_height / 2 - sv_image_y

    
    print('x final: ', x)
    print('y final: ', y)
    
    add_mark_to_image(x,y,path_to_image)
    print('get_mark_location: ', get_mark_location(x,y,im_width,im_height))
    
    r = 10
    if draw_mark:
        draw.ellipse((x - r, y - r, x + r, y + r), fill=128)

    print("Plotting at " + str(x) + "," + str(y) + " using yaw " + str(PanoYawDeg))

    print(x, y)
    top_left_x = x - crop_width / 2
    top_left_y = y - crop_height / 2
    cropped_square = im.crop((top_left_x, top_left_y, top_left_x + crop_width, top_left_y + crop_height))
    cropped_square.save(output_filename)

    return


def bulk_extract_crops(path_to_db_export, path_to_gsv_scrapes, destination_dir, mark_label=False):
    csv_file = open(path_to_db_export)
    csv_f = csv.reader(csv_file)
    counter = 0
    no_metadata_fail = 0
    no_pano_fail = 0
    lables = []
    for row in csv_f:
        if counter == 0:
            counter += 1
            continue

        pano_id = row[0]
        sv_image_x = float(row[1])
        sv_image_y = float(row[2])
        label_type = int(row[3])
        photographer_heading = float(row[4])
        heading = float(row[5])
        label_id = int(row[7])

        pano_img_path = path_to_gsv_scrapes+'pano_'+pano_id + '.jpg'
        print('pano_img_path is ',pano_img_path)
        print("Photographer heading is " + str(photographer_heading))
        print("Viewer heading is " + str(heading))
        pano_yaw_deg = 180 - photographer_heading

        print("Yaw:" + str(pano_yaw_deg))

        # Extract the crop
        if os.path.exists(pano_img_path):
            counter += 1
            destination_folder = os.path.join(destination_dir, str(label_type))
            if not os.path.isdir(destination_folder):
                os.makedirs(destination_folder)

            crop_destination = destination_dir+ str(label_type)+ str(label_id) + ".jpg"

            make_single_crop(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg, crop_destination, draw_mark=mark_label)
            print("Successfully extracted crop to " + str(label_id) + ".jpg")
            logging.info(str(label_id) + ".jpg" + " " + pano_id + " " + str(sv_image_x)
                         + " " + str(sv_image_y) + " " + str(pano_yaw_deg) + " " + str(label_id))
            logging.info("---------------------------------------------------")
        else:
            no_pano_fail += 1
            print("Panorama image not found.")
            logging.warning("Skipped label id " + str(label_id) + " due to missing image.")

    print("Finished.")
    print(str(no_pano_fail) + " extractions failed because panorama image was not found.")
    print(str(no_metadata_fail) + " extractions failed because metadata was not found.")

def get_x_y_coord(path_to_image, sv_image_x, sv_image_y, PanoYawDeg):
    
    im = Image.open(path_to_image)
    draw = ImageDraw.Draw(im)

    im_width = im.size[0]
    im_height = im.size[1]
    
    predicted_crop_size = predict_crop_size(sv_image_y)
    crop_width = predicted_crop_size
    crop_height = predicted_crop_size

    # Work out scaling factor based on image dimensions
    scaling_factor = im_width / 13312
    sv_image_x *= scaling_factor
    sv_image_y *= scaling_factor

    x = ((float(PanoYawDeg) / 360) * im_width + sv_image_x) % im_width
    y = im_height / 2 - sv_image_y

    
    
    return get_mark_location(x,y,im_width,im_height)
    


def get_mark_location(x,y, img_w, img_h):
    diagonal1_coord_x1 = 0
    diagonal1_coord_y1 = 0
    diagonal1_coord_x2 = img_w
    diagonal1_coord_y2 = img_h
    
    
    diagonal2_coord_x1 = img_w
    diagonal2_coord_y1 = 0
    diagonal2_coord_x2 = 0
    diagonal2_coord_y2 = img_h
    
    
    diagonal1_slop = (float)(diagonal1_coord_y2-diagonal1_coord_y1)/(diagonal1_coord_x2-diagonal1_coord_x1)
    diagonal2_slop = (float)(diagonal2_coord_y2-diagonal2_coord_y1)/(diagonal2_coord_x2-diagonal2_coord_x1)
    
    point_slop1 = (float)(y-diagonal1_coord_y1)/(x-diagonal1_coord_x1)
    point_slop2 = (float)(y-diagonal2_coord_y1)/(x-diagonal2_coord_x1)
    
    above_d1 = False
    above_d2 = False
    
    if(diagonal1_slop > point_slop1):
        above_d1 = True
    
    if(diagonal2_slop > point_slop2):
        above_d2 = True
    
    location = 'Q'
    
    if(above_d1 and above_d2):
        location ='q1'
    
    elif(above_d1 and not above_d2):
        location ='q2'
    
    elif(not above_d1 and not above_d2):
        location ='q3'
    
    else:
        location = 'q4'
    
    return location

    
# *****************************************
# Update paths below                      *
# *****************************************

# Path to CSV data from database - Place in 'metadata'
pano_list_path = './pano_list_path.csv'
csv_export_path = "./github/sidewalk-cv-assets19/new_old_dataset_csvs/Train.csv"
# Path to panoramas downloaded using DownloadRunner.py. Reference correct directory
gsv_pano_path = "./test/"
# Path to location for saving the crops
destination_path = "./crops/"


pano_list_df = pd.read_csv(pano_list_path)
csv_f = pd.read_csv(csv_export_path)
counter = 0
no_metadata_fail = 0
no_pano_fail = 0
labels = []

for pano_id in tqdm(pano_list_df['Pano_ID']):
    df = csv_f.loc[csv_f['Pano_ID'] == pano_id]
    curb_ramps_right = '0' 
    curb_ramps_left = '0'
    curb_ramps_front = '0'
    curb_ramps_back = '0'
    
    missing_curb_ramp_right = '0'
    missing_curb_ramps_left = '0'
    missing_curb_ramp_front = '0'
    missing_curb_ramps_back = '0'
    
    
    obstructions_right = '0'
    obstructions_left = '0'
    obstructions_front = '0'
    obstructions_back = '0'
    
    surface_problems_right = '0'
    surface_problems_left = '0'
    surface_problems_front = '0'
    surface_problems_back = '0'
    

    if os.path.exists('G:/My Drive/ICS504/github/ICS-504/download_images/'+pano_id+'.jpg'):
        for i,row in df.iterrows():
            
            sv_image_x = float(row[1])
            sv_image_y = float(row[2])
            label_type = int(row[3])
            photographer_heading = float(row[4])
            heading = float(row[5])

            pano_yaw_deg = 180 - photographer_heading

            location = get_x_y_coord(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg)

            if (label_type == 1 and location == 'q1'):
                curb_ramps_front = '1'
            elif (label_type == 1 and location == 'q2'):
                curb_ramps_right = '1'
            elif (label_type == 1 and location == 'q3'):
                curb_ramps_bottom = '1'
            elif (label_type == 1 and location == 'q4'):
                curb_ramps_left = '1'

            elif (label_type == 2 and location == 'q1'):
                missing_curb_ramp_front = '1'
            elif (label_type == 2 and location == 'q2'):
                missing_curb_ramp_right = '1'
            elif (label_type == 2 and location == 'q3'):
                missing_curb_ramp_bottom = '1'
            elif (label_type == 2 and location == 'q4'):
                missing_curb_ramp_left = '1'

            elif (label_type == 3 and location == 'q1'):
                obstructions_front = '1'
            elif (label_type == 3 and location == 'q2'):
                obstructions_right = '1'
            elif (label_type == 3 and location == 'q3'):
                obstructions_bottom = '1'
            elif (label_type == 3 and location == 'q4'):
                obstructions_left = '1'

            elif (label_type == 4 and location == 'q1'):
                surface_problems_front = '1'
            elif (label_type == 4 and location == 'q2'):
                surface_problems_right = '1'
            elif (label_type == 4 and location == 'q3'):
                surface_problems_bottom = '1'
            elif (label_type == 4 and location == 'q4'):
                surface_problems_left = '1'

        label = curb_ramps_right+curb_ramps_left+curb_ramps_front+curb_ramps_back+missing_curb_ramp_right+missing_curb_ramps_left+missing_curb_ramp_front+missing_curb_ramps_back+obstructions_right+obstructions_left+obstructions_front+obstructions_back+surface_problems_right+surface_problems_left+surface_problems_front+surface_problems_back    
    else:
        label = 'no image'
        
    labels.append(label)    
# bulk_extract_crops('./Test.csv', gsv_pano_path, destination_path, mark_label=False)
# crop_box_helper(gsv_pano_path, csv_export_path) # not tested and validated with new code

pano_list_df['label'] = labels


  0%|          | 0/6885 [00:00<?, ?it/s]

In [88]:
pano_list_df.head
pano_list_df.to_csv('./pano_list_path3.csv')

In [97]:
import shutil
print(pano_list_df.label.unique())
for i,row in tqdm(pano_list_df.iterrows(), total=pano_list_df.shape[0]):
    old_file = row['path']
    new_file = 'G:/My Drive/ICS504/github/ICS-504/download_images/'+row['label']+'/'+row['Pano_ID']+'.jpg'
    if not os.path.isdir('G:/My Drive/ICS504/github/ICS-504/download_images/'+row['label']):
        print('creating dir: ',row['label'])
        os.makedirs('G:/My Drive/ICS504/github/ICS-504/download_images/'+row['label'])
    
    if os.path.exists(old_file):
        try:
            shutil.move(old_file, new_file)
        except:
            print("Something went wrong: ", row['Pano_ID'])

['0010000000000000' '0000000000000010' '0000000001100000'
 '0000000001000000' '0000001000000000' '0000000000000000'
 '0110000000000000' '0100000000000000' '0100001000000000'
 '0100000001100000' '0000000000100000' '0000001000100000'
 '0000001000000010' '0010001000000000' '0010000000000010'
 '0110000000000010' '0000000001100010' '0100000001000000'
 '0010000000100000' '0000000000100010' '0110000000100010'
 '0010000001000000' '0000000000100100' '0110001000000000'
 '0110000001000010' '0000000000000100' '0100000000000100'
 '0110000000100000' '0010001001000100' '0010000001000100'
 '0100000001000100' '0110001000100010' '0010000000000100'
 '0010001000100000' '0100000000100000' '0010000001100000'
 '0010001000000010' '0100000000000010' '0110000000000100'
 '0000000001000100' '0110000001100000' '0010001001000000'
 '0110000001000000' '0110001000100000' '0000000000000110'
 '0110001000000110' '0100001000000010' '0100001000100000'
 '0000001000000100' '0100000000100010' '0010000000100010'
 '001000100010

  0%|          | 0/6885 [00:00<?, ?it/s]

Something went wrong:  P2WxMntfQNLqOLx-mhfS4g
creating dir:  0100001000000000
creating dir:  0100000001100000
creating dir:  0000000000100000
creating dir:  0000001000100000
creating dir:  0000001000000010
creating dir:  0010001000000000
creating dir:  0010000000000010
creating dir:  0110000000000010
creating dir:  0000000001100010
creating dir:  0100000001000000
creating dir:  0010000000100000
creating dir:  0000000000100010
creating dir:  0110000000100010
creating dir:  0010000001000000
creating dir:  0000000000100100
creating dir:  0110001000000000
creating dir:  0110000001000010
creating dir:  0000000000000100
creating dir:  0100000000000100
creating dir:  0110000000100000
creating dir:  0010001001000100
creating dir:  0010000001000100
creating dir:  0100000001000100
creating dir:  0110001000100010
creating dir:  0010000000000100
creating dir:  0010001000100000
creating dir:  0100000000100000
creating dir:  0010000001100000
creating dir:  0010001000000010
creating dir:  01000000000

In [103]:
# Path to CSV data from database - Place in 'metadata'
pano_list_path = 'G:/My Drive/ICS504/github/ICS-504/download_images/pano_list.csv'
csv_export_path = "./github/sidewalk-cv-assets19/new_old_dataset_csvs/Val.csv"
# Path to panoramas downloaded using DownloadRunner.py. Reference correct directory
gsv_pano_path = "./test/"
# Path to location for saving the crops
destination_path = "./crops/"


pano_list_df = pd.read_csv(pano_list_path)
csv_f = pd.read_csv(csv_export_path)
counter = 0
no_metadata_fail = 0
no_pano_fail = 0
labels = []

for pano_id in tqdm(pano_list_df['Pano_ID']):
    df = csv_f.loc[csv_f['Pano_ID'] == pano_id]
    curb_ramps_right = '0' 
    curb_ramps_left = '0'
    curb_ramps_front = '0'
    curb_ramps_back = '0'
    
    missing_curb_ramp_right = '0'
    missing_curb_ramps_left = '0'
    missing_curb_ramp_front = '0'
    missing_curb_ramps_back = '0'
    
    
    obstructions_right = '0'
    obstructions_left = '0'
    obstructions_front = '0'
    obstructions_back = '0'
    
    surface_problems_right = '0'
    surface_problems_left = '0'
    surface_problems_front = '0'
    surface_problems_back = '0'
    

    if os.path.exists('G:/My Drive/ICS504/github/ICS-504/download_images/0000000000000000/'+pano_id+'.jpg'):
        for i,row in df.iterrows():
            
            sv_image_x = float(row[1])
            sv_image_y = float(row[2])
            label_type = int(row[3])
            photographer_heading = float(row[4])
            heading = float(row[5])

            pano_yaw_deg = 180 - photographer_heading

            location = get_x_y_coord(pano_img_path, sv_image_x, sv_image_y, pano_yaw_deg)

            if (label_type == 1 and location == 'q1'):
                curb_ramps_front = '1'
            elif (label_type == 1 and location == 'q2'):
                curb_ramps_right = '1'
            elif (label_type == 1 and location == 'q3'):
                curb_ramps_bottom = '1'
            elif (label_type == 1 and location == 'q4'):
                curb_ramps_left = '1'

            elif (label_type == 2 and location == 'q1'):
                missing_curb_ramp_front = '1'
            elif (label_type == 2 and location == 'q2'):
                missing_curb_ramp_right = '1'
            elif (label_type == 2 and location == 'q3'):
                missing_curb_ramp_bottom = '1'
            elif (label_type == 2 and location == 'q4'):
                missing_curb_ramp_left = '1'

            elif (label_type == 3 and location == 'q1'):
                obstructions_front = '1'
            elif (label_type == 3 and location == 'q2'):
                obstructions_right = '1'
            elif (label_type == 3 and location == 'q3'):
                obstructions_bottom = '1'
            elif (label_type == 3 and location == 'q4'):
                obstructions_left = '1'

            elif (label_type == 4 and location == 'q1'):
                surface_problems_front = '1'
            elif (label_type == 4 and location == 'q2'):
                surface_problems_right = '1'
            elif (label_type == 4 and location == 'q3'):
                surface_problems_bottom = '1'
            elif (label_type == 4 and location == 'q4'):
                surface_problems_left = '1'

        label = curb_ramps_right+curb_ramps_left+curb_ramps_front+curb_ramps_back+missing_curb_ramp_right+missing_curb_ramps_left+missing_curb_ramp_front+missing_curb_ramps_back+obstructions_right+obstructions_left+obstructions_front+obstructions_back+surface_problems_right+surface_problems_left+surface_problems_front+surface_problems_back    
    else:
        label = 'no image'
        
    labels.append(label)    
# bulk_extract_crops('./Test.csv', gsv_pano_path, destination_path, mark_label=False)
# crop_box_helper(gsv_pano_path, csv_export_path) # not tested and validated with new code

pano_list_df['label2'] = labels

  0%|          | 0/51597 [00:00<?, ?it/s]

In [104]:
pano_list_df.to_csv('./pano_list_path5.csv')

In [101]:
print(pano_list_df.label2.unique())

TypeError: 'Series' objects are mutable, thus they cannot be hashed