# Step 4. Sample locations for analysis

In [None]:
import os
import datetime

import numpy as np
import scipy
import fiona
import statistics
import math

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString, shape, mapping, Point, Polygon, MultiPolygon
from shapely.ops import cascaded_union, transform
import pyproj

import matplotlib.pyplot as plt
from matplotlib import colors, cm, style
import matplotlib.patches as mpatches
# from descartes import PolygonPatch

import osmnx as ox
import networkx as nx

import rasterio
from rasterio import MemoryFile
from rasterio.plot import show
from rasterio.mask import mask
import json

import contextily as cx
import folium
from folium.features import DivIcon

import random

In [None]:
from getpass import getpass

import requests
from requests import Request, Session

import hashlib
import hmac
import base64
import urllib.parse as urlparse

from datetime import date
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

## Define city and other settings

#### Place |Country    |CRS
Amsterdam  | Rotterdam | The Hague    (The Netherlands)    EPSG:28992

Madrid     | Barcelona | Valencia     (Spain)              EPSG:25830

Stockholm  | Goteborg  | Malmo        (Sweden)             EPSG:3006

In [None]:
place_name = 'Barcelona'
local_crs = 'EPSG:25830'

In [None]:
export_folder = 'data'
export_osm_sub_folder = os.path.join('OSM', '{}_15Mar2023'.format(place_name.split(',')[0].replace(' ', '')), 'enriched')
export_sampled_sub_folder = os.path.join('OSM', '{}_15Mar2023'.format(place_name.split(',')[0].replace(' ', '')), 'sampled')

In [None]:
place = ox.geocode_to_gdf(place_name).to_crs(local_crs)

In [None]:
random_state = 42
random.seed(random_state)

In [None]:
if place_name == 'Barcelona':
    gsv_crs = 'EPSG:4326'
    
    obtain_imagery = False
    
    api_key = getpass("API key: ")
    if obtain_imagery:
        secret = getpass("URL signing secret: ")
        
    meta_base = 'https://maps.googleapis.com/maps/api/streetview/metadata?'
    pic_base = 'https://maps.googleapis.com/maps/api/streetview?'

## Read data

### Read OSM data enriched with NDVI and GSV columns: these are candidate locations

In [None]:
pocket_greenspaces = gpd.read_file(os.path.join(export_folder, export_osm_sub_folder, 'pocket_greenspaces_enriched.geojson'), driver='GeoJSON')
regular_greenspaces = gpd.read_file(os.path.join(export_folder, export_osm_sub_folder, 'regular_greenspaces_enriched.geojson'))
squares = gpd.read_file(os.path.join(export_folder, export_osm_sub_folder, 'squares_enriched.geojson'))
playspaces = gpd.read_file(os.path.join(export_folder, export_osm_sub_folder, 'playspaces_enriched.geojson'))
streets = gpd.read_file(os.path.join(export_folder, export_osm_sub_folder, 'streets_enriched.geojson'))
parkings = gpd.read_file(os.path.join(export_folder, export_osm_sub_folder, 'parkings_enriched.geojson'))

In [None]:
def ensure_unique_panoramas(gdf):
    # randomize order
    gdf = gdf.sample(frac=1, random_state=random_state)
    # drop duplicate panorama id's and keep only the first
    gdf = gdf.drop_duplicates(subset=['gsv_pano_id'], keep='first')

    return gdf

In [None]:
def check_data(gdf):
    # candidates only when: NDVI data present, GSV available
    gdf = gdf[gdf.ndvi_mean.notna()]
    gdf = gdf[gdf.ndvi_median.notna()]
    gdf = gdf[gdf.ndvi_min.notna()]
    gdf = gdf[gdf.ndvi_max.notna()]
    gdf = gdf[gdf.ndvi_stdev.notna()]
    gdf = gdf[gdf.ndvi_var.notna()]

    gdf = gdf[gdf.gsv_suitability=='True']
    gdf = gdf[gdf.gsv_pano_id.notna()]
    gdf = gdf[gdf.gsv_pano_id!='False']
    gdf = gdf[gdf.gsv_pano_id!=False]
    gdf = gdf[gdf.gsv_lat.notna()]
    gdf = gdf[gdf.gsv_lat!='False']
    gdf = gdf[gdf.gsv_lat!='False']
    gdf = gdf[gdf.gsv_lng.notna()]
    gdf = gdf[gdf.gsv_lng!='False']
    gdf = gdf[gdf.gsv_lng!=False]
    
    gdf['area'] = gdf.geometry.area
    gdf['ndvi_mean'] = gdf['ndvi_mean'].astype(float)
    
    # verify again also type and area known
    gdf = gdf[gdf.category.notna()]
    gdf = gdf[gdf.area.notna()]
    
    gdf = ensure_unique_panoramas(gdf)
    
    return gdf

In [None]:
# alternative candidate playspaces for Barcelona manual sampling
if place_name == 'Barcelona':
    alternative_playspaces = playspaces.copy()

In [None]:
pocket_greenspaces = check_data(pocket_greenspaces)
regular_greenspaces = check_data(regular_greenspaces)
squares = check_data(squares)
playspaces = check_data(playspaces)
streets = check_data(streets)
parkings = check_data(parkings)

## Sample from candidates and generate iframes

In [None]:
# sample per category: up to 8
n_sample = 8

In [None]:
candidate_pocket_greenspaces = pocket_greenspaces.copy()
candidate_regular_greenspaces = regular_greenspaces.copy()
candidate_squares = squares.copy()
candidate_playspaces = playspaces.copy()
candidate_streets = streets.copy()
candidate_parkings = parkings.copy()

candidate_all = pd.concat([
    candidate_pocket_greenspaces, 
    candidate_regular_greenspaces, 
    candidate_squares,
    candidate_playspaces,
    candidate_streets,
    candidate_parkings])

In [None]:
def compose_iframe_string(row):
    
    src = 'https://www.google.com/maps/embed?pb=!4v1663685727477!6m8!1m7!{}!2m2!1d{}!2d{}!3f234.88633007037052!4f0.4221917676174485!5f0.7820865974627469'.format(
        row.gsv_pano_id, row.gsv_lat, row.gsv_lng)
    iframe = '<iframe height="450" loading="lazy" src="{}" style="border:0;"width="800"></iframe>'.format(src)
    iframe_html = '<div style="text-align: center;">{}<br /></div>,\n'.format(iframe)
    
    return iframe_html

In [None]:
def take_sample(gdf, quantile, n, print_iframes=True):

    if len(gdf[gdf.ndvi_quantile==quantile]) > n:
        gdf = gdf[gdf.ndvi_quantile==quantile].sample(n=n, random_state=random_state)
    else:
        gdf = gdf[gdf.ndvi_quantile==quantile]
    
    # compose and print iframes
    if len(gdf):
        gdf['gsv_iframe_html'] = gdf.apply(lambda row: compose_iframe_string(row), axis=1, result_type="expand")
        if print_iframes:
            for i in gdf.index:
                print(gdf.category[i], gdf.ndvi_quantile[i], i, gdf.gsv_pano_id[i])
                print(gdf.gsv_iframe_html[i])
    else:
        print('no more places could be sampled')
        
    return gdf

In [None]:
def exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes=True):
    # maintain ndvi quantiles list of excluded places
    ndvi_quantiles_excluded = sampled[sampled.gsv_pano_id.isin(pano_ids_to_exclude)].ndvi_quantile.values.tolist()
    
    # exclude these panoramas from gdf and candidates
    remaining_sampled = sampled[~sampled.gsv_pano_id.isin(pano_ids_to_exclude)]
    remaining_candidates = candidates[~candidates.gsv_pano_id.isin(pano_ids_to_exclude)]
    
    # replacements per ndvi quantile
    replacements = gpd.GeoDataFrame()
    for quantile in set(ndvi_quantiles_excluded):
        n_replacement = ndvi_quantiles_excluded.count(quantile)
        replacements = pd.concat([replacements, take_sample(remaining_candidates, quantile, n_replacement, print_iframes=print_iframes)])
        
    # omit also replacements from remaining candidates
    remaining_candidates = pd.concat([remaining_candidates, replacements]).drop_duplicates(subset=['gsv_pano_id'], keep=False)
    
    sampled = pd.concat([remaining_sampled, replacements])
    
    return sampled, remaining_candidates

#### Pocket greenspaces

In [None]:
candidates = candidate_pocket_greenspaces.copy()

In [None]:
# set ndvi quantiles
ndvi_quantiles = candidates['ndvi_mean'].quantile([0, 0.25, 0.50, 0.75, 1])

candidates.loc[candidates.ndvi_mean<ndvi_quantiles[0.25], 'ndvi_quantile'] = 'lower'
candidates.loc[
    (candidates.ndvi_mean>=ndvi_quantiles[0.25]) &
    (candidates.ndvi_mean<ndvi_quantiles[0.5]), 'ndvi_quantile'] = 'second'
candidates.loc[
    (candidates.ndvi_mean>=ndvi_quantiles[0.5]) &
    (candidates.ndvi_mean<ndvi_quantiles[0.75]), 'ndvi_quantile'] = 'third'
candidates.loc[candidates.ndvi_mean>=ndvi_quantiles[0.75], 'ndvi_quantile'] = 'upper'

In [None]:
# sample places
print_iframes = False
sampled = pd.concat([
    take_sample(candidates, 'lower', n_sample, print_iframes), 
    take_sample(candidates, 'second', n_sample, print_iframes), 
    take_sample(candidates, 'third', n_sample, print_iframes), 
    take_sample(candidates, 'upper', n_sample, print_iframes)])

# omit sampled places from remaining candidates
candidates = pd.concat([candidates, sampled]).drop_duplicates(subset=['gsv_pano_id'], keep=False)

In [None]:
# after manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = [
        'CAoSLEFGMVFpcE1NdEctMFRKUGtGcVhxZkxjUUpQMjJ4cEZVYU9xejlSQ01YR1Bv',
        'CAoSLEFGMVFpcE9oS2o1Qm5TS2ZaRUlQbkJUd0FQWjJLTlRxWnRXYVVMN3R0bmxF',
        'IBQwXcItjXij5rm3lrSkwQ',  # no longer?
        'CAoSLEFGMVFpcE9na0hvSE9jVTNTM1AxTWxTMmJlUFhIV0gzNU5OZjB3dmlwZnVw',
        'CAoSLEFGMVFpcE5OZXVHM2V2TlZyM1BVYzFCRDdaVlBFUG9lT1pWVVh6OEViWmN3',
        'H4-QZDuDYPN8iKFWBB1DCA',  # no longer?
        'wnOF19Np_h34c70HVwzjUA',
        'uZfTJELGuwVqa5Mf0Q04Sg',
        'zfpLLVyA7NUmTKohfokDyA']
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = [
        'oU6BbhSf9b6d6dLZ3-VquA',
        '0IG0xXCMxrbLBowtrq7bPg']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = [
        'NTQHbTFYmZU1B-cUmgkM6Q',
        '4HS6qgS6VFl-oheGUAXRUg',
        'CAoSLEFGMVFpcE0wNlYyQUxfZ1FrT2pETjAyYWlzX01wREtsSHAtRV96ZXgwUWxF',
        'CAoSLEFGMVFpcFBJZFZMOXRMUE5NaVJsX2U1c2otMU5sbG91dTM0LWh5c2Z2Unda']
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after the final co-author check, we exclude the following panoramas
# after manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = []
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = []

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = ['Ty732DL2KBUaxIT4lkTx4g']
    
print_iframes = True
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
print_iframes = False
if print_iframes:
    for i in sampled.index:
        print(sampled.category[i], sampled.ndvi_quantile[i], i, sampled.gsv_pano_id[i])
        print(sampled.gsv_iframe_html[i])

In [None]:
sampled_pocket_greenspaces = sampled.copy()

In [None]:
candidate_pocket_greenspaces = candidates.copy()

#### Regular greenspace - low, medium, high NDVI

In [None]:
candidates = candidate_regular_greenspaces.copy()

In [None]:
# set ndvi quantiles
ndvi_quantiles = candidates['ndvi_mean'].quantile([0, 0.25, 0.50, 0.75, 1])

candidates.loc[candidates.ndvi_mean<ndvi_quantiles[0.25], 'ndvi_quantile'] = 'lower'
candidates.loc[
    (candidates.ndvi_mean>=ndvi_quantiles[0.25]) &
    (candidates.ndvi_mean<ndvi_quantiles[0.5]), 'ndvi_quantile'] = 'second'
candidates.loc[
    (candidates.ndvi_mean>=ndvi_quantiles[0.5]) &
    (candidates.ndvi_mean<ndvi_quantiles[0.75]), 'ndvi_quantile'] = 'third'
candidates.loc[candidates.ndvi_mean>=ndvi_quantiles[0.75], 'ndvi_quantile'] = 'upper'

In [None]:
# sample places
print_iframes = False
sampled = pd.concat([
    take_sample(candidates, 'lower', n_sample, print_iframes), 
    take_sample(candidates, 'second', n_sample, print_iframes), 
    take_sample(candidates, 'third', n_sample, print_iframes), 
    take_sample(candidates, 'upper', n_sample, print_iframes)])

# omit sampled places from remaining candidates
candidates = pd.concat([candidates, sampled]).drop_duplicates(subset=['gsv_pano_id'], keep=False)

In [None]:
# after manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = [
        'nRtowiOk5nmdTyZZM9sXAA',
        'CAoSLEFGMVFpcE42aXRfTUc2TkVxbXc1NGZUVW8xeTFIcW9VOWRJR0hMVkxIM2dH',
        'CAoSLEFGMVFpcE9ISk56WnJqT3AzOFR3UVdNZ19PVEdtdXppb0NtelVxbGdBczRy',
        'CAoSLEFGMVFpcE56TTF4cjFCajUyVmFfaUtNRzRBeHBxbXVYWWlLcjljTm16a3JX',
        'zaSHT5HIsoMQRDdvStLe_w',
        'R3faCfC4a71sXJytEUrPpg',
        'clVKp2ERAnMN228vvLkn-w',
        'P2A2sJiTIMN6lgXjZFR3VA',
        'Up2fJQJP2aAxdLf6tBEV2Q']
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = [
        'FOOr0tex4yNTP8_NYI6n5A',
        'LOOjfkCZgR3rfLe435549Q',
        'UbIr3kBjK7hFy8K8rtdfaA',
        'gzoPujPqZ7EgrOfd5o6Yjw']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = [
        'gxZgkOvUxkYCPxHDXVK9Eg',
        'lQ4vNiyAUaBECFKuOO57WA',
        'hXo4MOvJlMATviBfYkBNUg',
        'CAoSLEFGMVFpcE1yR2F4M1VKazdWcW5NM0ZHWk5OS3FXMlA0b0tFbFRHSEEyQjRV']
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after a second manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = [
        'DXyo_eZVigP8Nps-i6XQfw',
        'CAoSLEFGMVFpcE5nanl5X1c4YVNLZm5UVkxmMjFFamMwaWFLZndtalJvWEZIRGxs',
        'CAoSLEFGMVFpcE01U3lub3NLQkY0QjlSR3pvb3l3NU1SMTVsaFlZVTl1cGlpRnE2',
        'CJQNapyo1vm0VU3kp89iHA']
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = [
        'lZBiPLwkuubBQLhA1zNuYQ',
        '7SWCdDNsYyVS2KWdE_SIqA',
        'njROp2rHEgA3PjM1U9MiIg']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = []
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after a third manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = [
        'zQBItDE4VMWujacbOcNYCA']
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = []

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = []
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after the final co-author check, we exclude the following panoramas
# after manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = [
        'b39Rbx7G9B9kTQA2Tesf_w',
        'CAoSLEFGMVFpcE5qZnlIamwwM2h2X3dvVE1nbEJiZndlY05MblJSdGxpWHBEX1Br',
        'CAoSLEFGMVFpcE43TjRRam9FcXFLOTJlb3daLUhjVmxER1dJcjgtcXAtbWdwUTRf']
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = ['GcoIP9IxWhPFmREztodjxg']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = []
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after the second final co-author check, we exclude the following panoramas
# after manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = ['CAoSLEFGMVFpcE1IR2dYRWQ3RkRST1pfcE5fODB1aU9xNkVHZHlHcVd5aWMxZ1RY']
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = []

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = []
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
print_iframes = False
if print_iframes:
    for i in sampled.index:
        print(sampled.category[i], sampled.ndvi_quantile[i], i, sampled.gsv_pano_id[i])
        print(sampled.gsv_iframe_html[i])

In [None]:
sampled_regular_greenspaces = sampled.copy()

In [None]:
candidate_regular_greenspaces = candidates.copy()

#### Square - low, medium, high NDVI

In [None]:
candidates = candidate_squares.copy()

In [None]:
# set ndvi quantiles
ndvi_quantiles = candidates['ndvi_mean'].quantile([0, 0.25, 0.50, 0.75, 1])

candidates.loc[candidates.ndvi_mean<ndvi_quantiles[0.25], 'ndvi_quantile'] = 'lower'
candidates.loc[
    (candidates.ndvi_mean>=ndvi_quantiles[0.25]) &
    (candidates.ndvi_mean<ndvi_quantiles[0.5]), 'ndvi_quantile'] = 'second'
candidates.loc[
    (candidates.ndvi_mean>=ndvi_quantiles[0.5]) &
    (candidates.ndvi_mean<ndvi_quantiles[0.75]), 'ndvi_quantile'] = 'third'
candidates.loc[candidates.ndvi_mean>=ndvi_quantiles[0.75], 'ndvi_quantile'] = 'upper'

In [None]:
# sample places
print_iframes = False
sampled = pd.concat([
    take_sample(candidates, 'lower', n_sample, print_iframes), 
    take_sample(candidates, 'second', n_sample, print_iframes), 
    take_sample(candidates, 'third', n_sample, print_iframes), 
    take_sample(candidates, 'upper', n_sample, print_iframes)])

# omit sampled places from remaining candidates
candidates = pd.concat([candidates, sampled]).drop_duplicates(subset=['gsv_pano_id'], keep=False)

In [None]:
# after manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = [
        'CAoSLEFGMVFpcE4yUjhST2xhQ3RUNVU3ci1Ea05XeTFLdnpMWFM5YWFCM2wyZHMy',
        'CAoSLEFGMVFpcE0wdU9ZRXBVZFhzSnAybWZpOHM4a2pKMmliZmdJVUdUT21hdzhv',
        'CAoSLEFGMVFpcE5ZYW83bXNmQ1hyem5aLWozdjJKVlR1dmVzWDcxUlNFWjN0TjBG',
        'CAoSLEFGMVFpcFB3WE1IdzYtekpRMkF5UUdNMVFrNEhON3paTDkxdXgtTWt3allr',
        'CAoSLEFGMVFpcFBWbkFnTXhjY1JtOUI5X09pNHhSbktsU0VCQkZiQnE2bnZfVUJW',
        'tAhWS_V4o4BqZB_N6s3bJg',
        'ghkll_liR1S2eh5rBhhPXg',
        'CAoSLEFGMVFpcE9CWExaQlp1b1F5ZnBUd0xRRkhZSVhFdl9sSnlTdndMaGRQZVI1']
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = [
        'CAoSLEFGMVFpcE9yTVZFdDhiNjRPZi1pYXVIVUxBcXpqeW8ycVRYd2VsSk5TTkp1',
        'CAoSLEFGMVFpcE5ZcGtGNnJ5NEFXMnpXOFg2dDhSSmUyWVd6Q3FXUXV2ZlkxRFB5',
        'CAoSLEFGMVFpcFBSem5zTDRwVkY2ZDd3NEFfQjhSZVhtdF9ZSnZQSEt5eC1XeHN5',
        'QiWXjBR7HE4vK6-lt5dlMw',
        'WyxryYTz8hSOkq0sp-UqZQ']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = [
        '29edRoSEI7Yd7Zepsw6WNg',
        'yKmkmDdvzAgYpzot-7qKKg',
        'CAoSLEFGMVFpcFBmSmFJTG5tc01UNDFqczBjRGlIZEkxdVk0eXNCcHB5MW9ldmFu',
        '4JsrjAWYZ9h1osgZWRaF5A',
        'CAoSLEFGMVFpcE16UlNHeU1jcENycVdXRUFkUko2NEVWVmo4dEw0aERzYUFTbFRz']
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after a second manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = []
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = [
        'CAoSLEFGMVFpcE4ySHFkaDF2YXJWdUF1UHhXNGJsbjA2NUNLTWJUbmZibndPTEFq']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = [
        'CAoSLEFGMVFpcE1oT1Z4c0xtekpxbDdmRjNINmZVSjNEZmhtR0tqODJHQTJWeDY1',
        'CAoSLEFGMVFpcE9yX1Jvb2lHV3Z6ZHRPdVBBVzhyb0o2STYzSll3QWpjSUNidkVN']
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after the final co-author check, we exclude the following panoramas
# after manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = [
        'CAoSLEFGMVFpcE44XzQtVEhNZXotQ1RTT1lwWXBnNGhQam14Z1lsbm1FbHZsUzhr',
        'RX-Be6Eq1Uey5-kQ3MyiXw',
        'fl4WqvAjRgOXUHJg-_PXCA',
        'CAoSLEFGMVFpcE1Wa0d2Q1Q3N3RGaXctM3BYams2WGstNHZyUFZRQkRmTlZzYTBX']
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = [
        '5tZZ3XtmGcbK9gQQ3dxUTg',
        'veiSAJpwAW_IeOI3bzkEdQ',
        '6KGst5UHyYhIJiCKSPqedg']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = []
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after the second final co-author check, we exclude the following panoramas
# after manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = [
        'M7GXUvAVpKJt4V7KsEqZwg',
        'CgHLA8y-pzPVgeguraGX4g']
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = ['Xuo-8CxiNo8mhcD6dZk-oA']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = []
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
print_iframes = False
if print_iframes:
    for i in sampled.index:
        print(sampled.category[i], sampled.ndvi_quantile[i], i, sampled.gsv_pano_id[i])
        print(sampled.gsv_iframe_html[i])

In [None]:
sampled_squares = sampled.copy()

In [None]:
candidate_squares = candidates.copy()

#### Play & recreation - low, medium, high NDVI

In [None]:
candidates = candidate_playspaces.copy()

In [None]:
# set ndvi quantiles
ndvi_quantiles = candidates['ndvi_mean'].quantile([0, 0.25, 0.50, 0.75, 1])

candidates.loc[candidates.ndvi_mean<ndvi_quantiles[0.25], 'ndvi_quantile'] = 'lower'
candidates.loc[
    (candidates.ndvi_mean>=ndvi_quantiles[0.25]) &
    (candidates.ndvi_mean<ndvi_quantiles[0.5]), 'ndvi_quantile'] = 'second'
candidates.loc[
    (candidates.ndvi_mean>=ndvi_quantiles[0.5]) &
    (candidates.ndvi_mean<ndvi_quantiles[0.75]), 'ndvi_quantile'] = 'third'
candidates.loc[candidates.ndvi_mean>=ndvi_quantiles[0.75], 'ndvi_quantile'] = 'upper'

In [None]:
# sample places
print_iframes = False
sampled = pd.concat([
    take_sample(candidates, 'lower', n_sample, print_iframes), 
    take_sample(candidates, 'second', n_sample, print_iframes), 
    take_sample(candidates, 'third', n_sample, print_iframes), 
    take_sample(candidates, 'upper', n_sample, print_iframes)])

# omit sampled places from remaining candidates
candidates = pd.concat([candidates, sampled]).drop_duplicates(subset=['gsv_pano_id'], keep=False)

In [None]:
# after manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = [
        'Z7YDsTPJJzQzTPb1nnzcmg',
        'CAoSLEFGMVFpcE9oS2o1Qm5TS2ZaRUlQbkJUd0FQWjJLTlRxWnRXYVVMN3R0bmxF',
        'sEc6G6maK-0eG4FVP4-lIA',
        'GrGx_fucjKKakpRDHI2Zhg',
        '3bmGneH2JRUNtk2LquCXkg',
        'CAoSLEFGMVFpcE5nS25fRWIxaEg0YmtrLWFOZEFHMzJTT1NKbFRXZnBFMkw1Sncy',
        'CAoSLEFGMVFpcE5maEVnYUZ0NFpJUl9FWFFYcFE0Rk5qMnBnRUNrMnctdTB4dkFn',
        'CAoSLEFGMVFpcE5wdW00WENBc2UtVDI3WmhUbDBtdTBJb0lDeGI1ZUptQXh6T212',
        'cZPvG2pvDiLJTPgvIqTBUw',
        'CAoSLEFGMVFpcE8yQWg2MzRBaWhjdk1OWnd1Wmc4ajZMN0thWUJNenpjd2ZrMDZm']
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = [
        'V2Mb2WDWl2x6rHHbokBJcQ',
        'bsaw5di6TZ-snYPBmAyoJg',
        'NVcBa4xkt7coQCKkvrYYLQ',
        'k-g-d7zif_p4mVo0Ur4FRA',
        '1chx4hDNz51MCbB5f57Yvg',
        'Lm88EYF11DuXTS-Yk0hwfg',
        'zTvjltgIl3jWJ6Shbf3j-Q']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = [
        'hyNh2B3bzI2IrcxWSBKnyQ',
        'BOAbdVrixcaNKp9BE2dXEQ',
        '2KSxuNpbbbiLL_J6xTBn9A',
        's5xFkvsTle5O1fN7_8S5vA']
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after a second manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = []
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = [
        'CBXlXxsNmGz_cKhcIdQ9YA']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = []
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after a third manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = []
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = [
        'aJhOYNJg85uiVcNQR8_R6g']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = []
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after the final co-author check, we exclude the following panoramas
# after manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = []
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = [
        '1M7w2rf6ZPRU8tkVy8rALQ',
        'nAzwYb-UmjKyXGSdJMAsdA',
        'LKhdmX5efGehsNCoye6wYA']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = [
        'gMfk5ce6uVjwBzT_EVvklQ',
        'sySrWw5ZLESc6U1lE6GvVw']
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after the second final co-author check, we exclude the following panoramas
# after manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = []
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = ['tJlTnZ0mTLfOAUvX9SFWEg']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = ['Ci6Igygu5a-k9cqU5NbS8g']
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

#### manually add alternatives for excluded images

In [None]:
if place_name == 'Barcelona':
    
    alternative_candidates = alternative_playspaces[alternative_playspaces.ndvi_mean!='nan'].copy()
    alternative_candidates['ndvi_mean'] = alternative_candidates['ndvi_mean'].astype(float)

In [None]:
if place_name == 'Barcelona':
    
    # assign ndvi quantile group based on the original candidates
    alternative_candidates.loc[alternative_candidates.ndvi_mean<ndvi_quantiles[0.25], 'ndvi_quantile'] = 'lower'
    alternative_candidates.loc[
        (alternative_candidates.ndvi_mean>=ndvi_quantiles[0.25]) &
        (alternative_candidates.ndvi_mean<ndvi_quantiles[0.5]), 'ndvi_quantile'] = 'second'
    alternative_candidates.loc[
        (alternative_candidates.ndvi_mean>=ndvi_quantiles[0.5]) &
        (alternative_candidates.ndvi_mean<ndvi_quantiles[0.75]), 'ndvi_quantile'] = 'third'
    alternative_candidates.loc[alternative_candidates.ndvi_mean>=ndvi_quantiles[0.75], 'ndvi_quantile'] = 'upper'

In [None]:
# For Barcelona, our sample does not include enough spaces. 
# we manually add alternative for excluded playspace panoramas
# for two places that didn't pass our manual check, alternative GSV is available

if place_name == 'Barcelona':
    
    manual_1 = alternative_candidates[alternative_candidates.gsv_pano_id=='Z7YDsTPJJzQzTPb1nnzcmg'].copy()
    manual_1['gsv_year'], manual_1['gsv_month'], manual_1['gsv_pano_id'], manual_1['gsv_lat'], manual_1['gsv_lng'] = [
        '2020', '06', '1sCAoSLEFGMVFpcE4tUUNOZUtJTzJRSWllMDJJdi1Nakt5TFBjdWN5czZYREZVbW9i', '41.4122036', '2.1950182']
    
    manual_2 = alternative_candidates[alternative_candidates.gsv_pano_id=='CAoSLEFGMVFpcE5nS25fRWIxaEg0YmtrLWFOZEFHMzJTT1NKbFRXZnBFMkw1Sncy'].copy()
    manual_2['gsv_year'], manual_2['gsv_month'], manual_2['gsv_pano_id'], manual_2['gsv_lat'], manual_2['gsv_lng'] = [
        '2019', '06', '1suaabd4ZDju63-8QPbRiFTg', '41.37932026998091', '2.191328257807365']
    
    manual_1to2 = pd.concat([manual_1, manual_2])
    manual_1to2['gsv_iframe_html'] = manual_1to2.apply(lambda row: compose_iframe_string(row), axis=1, result_type="expand")
    
    sampled = pd.concat([sampled, manual_1to2])

In [None]:
if place_name == 'Barcelona':
    
    print('play/recreation grounds, sampled places:\n{} low + {} second + {} third + {} upper quantile ndvi\nTOTAL:{}'.format(
        len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='lower')]),
        len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='second')]),
        len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='third')]),
        len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='upper')]),
        len(sampled)))

#### manually add other alternatives for excluded images

In [None]:
def points_in_polygon(polygon, n):
    # generate n random points withn the polygon
    points = []
    min_x, min_y, max_x, max_y = polygon.bounds
    i = 0
    while i < n:
        point = Point(random.uniform(min_x, max_x), random.uniform(min_y, max_y))
        if polygon.contains(point):
            points.append(point)
            i += 1
    return points

In [None]:
def obtain_metadata(location, radius):    
    meta_params = {
        'key': api_key,
        'location': location,
        'radius': radius}
    
    # obtain the metadata of the request (this is free)
    meta_response = requests.get(meta_base, params=meta_params)
    return meta_response.json()

In [None]:
def get_gsv_availability(row, radius, n):
    
    status = False
    year = None
    month = None
    suitability = False
    pano_id = False
    lat = False
    lng = False
    
    points = points_in_polygon(row.geometry, n)
    
    for point in points:
        location = '{},{}'.format(point.y, point.x)
    
        meta = obtain_metadata(location, radius)

        # check status
        if meta['status'] == 'OK':
            status = True

            # find date and year
            if 'date' in meta:
                date = meta['date'].split('-')
                year = date[0]
                if len(date) > 1:
                    month = date[1]

                    # just store all this time    
                    pano_id = meta['pano_id']
                    lat = meta['location']['lat']
                    lng = meta['location']['lng']
                        
        if suitability:
            break
    
    return suitability, status, year, month, pano_id, lat, lng

In [None]:
if place_name == 'Barcelona':
    gsv_cols = ['gsv_suitability', 'gsv_status', 'gsv_year', 'gsv_month', 'gsv_pano_id', 'gsv_lat', 'gsv_lng']
    
    # GSV radius 50m default, we go with less, e.g., 15, 29 or 43
    gsv_radius = 15
    # look for GSV imagery around n sample points within each place
    n = 10

In [None]:
if place_name == 'Barcelona':
        
    print('play/recreation grounds, sampled places:\n{} low + {} second + {} third + {} upper quantile ndvi\nTOTAL:{}'.format(
        len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='lower')]),
        len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='second')]),
        len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='third')]),
        len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='upper')]),
        len(sampled)))

In [None]:
# For Barcelona, our sample does not include enough spaces. 
# we manually add alternative for excluded playspace panoramas
# we need to find GSV panoramas for 14 more playspaces
# we go through the list of playspaces with imagery from winter season 2022

if place_name == 'Barcelona':
    
    alternative_candidates = alternative_candidates[alternative_candidates.gsv_suitability=='False']
    alternative_candidates = alternative_candidates[(alternative_candidates.gsv_year=='2021') | (alternative_candidates.gsv_year=='2022')]

    # randomize order
    alternative_candidates = alternative_candidates.sample(frac=1, random_state=random_state)
    
    # add their gsv metadata and iframes
    alternative_candidates[gsv_cols] = alternative_candidates.to_crs(gsv_crs).apply(lambda row: get_gsv_availability(row, gsv_radius, n), axis=1, result_type="expand")
    alternative_candidates['gsv_iframe_html'] = alternative_candidates.apply(lambda row: compose_iframe_string(row), axis=1, result_type="expand")

In [None]:
# and check one by one if alternative GSV from earlier year summer season is available

if place_name == 'Barcelona':
    
    # we need THREE more for the LOWER ndvi quantile
    alternative_candidates_lower = alternative_candidates[alternative_candidates.ndvi_quantile=='lower']
    print(alternative_candidates_lower.index)
    
    # index 40 -> success
    lower_1 = alternative_candidates_lower[alternative_candidates_lower.index==40].copy()
    lower_1['gsv_year'], lower_1['gsv_month'], lower_1['gsv_pano_id'], lower_1['gsv_lat'], lower_1['gsv_lng'] = [
        '2021', '07', '1sEoaS1PBxkSQWW-jF9Noyug', '41.40317637487394', '2.170437365902761']
    # index 33 -> success
    lower_2 = alternative_candidates_lower[alternative_candidates_lower.index==33].copy()
    lower_2['gsv_year'], lower_2['gsv_month'], lower_2['gsv_pano_id'], lower_2['gsv_lat'], lower_2['gsv_lng'] = [
        '2019', '05', '1squYpZ01-dcATT5xKJ6EO6g', '41.4142310498055', '2.105479602696434']
    # index 53 -> no GSV found
    # index 75 -> success
    lower_3 = alternative_candidates_lower[alternative_candidates_lower.index==75].copy()
    lower_3['gsv_year'], lower_3['gsv_month'], lower_3['gsv_pano_id'], lower_3['gsv_lat'], lower_3['gsv_lng'] = [
        '2019', '06', '1s083YDKSCgNbG_WG8pwFZ5w', '41.41853160256081', '2.174154451476914']
    # THREE ALTERNATIVE FOUND
    
    # generate new iframes correspondingly
    manual_lower = pd.concat([lower_1, lower_2, lower_3])
    manual_lower['gsv_iframe_html'] = manual_lower.apply(lambda row: compose_iframe_string(row), axis=1, result_type="expand")
    
    sampled = pd.concat([sampled, manual_lower])

In [None]:
# and check one by one if alternative GSV from earlier year summer season is available

if place_name == 'Barcelona':
    
    # we need TWO more for the SECOND ndvi quantile
    alternative_candidates_second = alternative_candidates[alternative_candidates.ndvi_quantile=='second']
    print(alternative_candidates_second.index)
    # index 62 -> major construction work
    # index 65 -> obstructed, wall
    # index 87 -> success
    second_1 = alternative_candidates_second[alternative_candidates_second.index==87].copy()
    second_1['gsv_year'], second_1['gsv_month'], second_1['gsv_pano_id'], second_1['gsv_lat'], second_1['gsv_lng'] = [
        '2018', '05', '1sw6yCwvSJLbaXMeoquzdbPg', '41.36145686202342', '2.134887110925353']
    # index 70 -> no GSV found
    # index 29 -> success
    second_2 = alternative_candidates_second[alternative_candidates_second.index==29].copy()
    second_2['gsv_year'], second_2['gsv_month'], second_2['gsv_pano_id'], second_2['gsv_lat'], second_2['gsv_lng'] = [
        '2019', '08', '1sjscrWNNaZKnZ0p6E4wxyrQ', '41.41549059579653', '2.135170371713212']
    # TWO ALTERNATIVE FOUND

    # generate new iframes correspondingly
    manual_second = pd.concat([second_1, second_2])
    manual_second['gsv_iframe_html'] = manual_second.apply(lambda row: compose_iframe_string(row), axis=1, result_type="expand")
    
    sampled = pd.concat([sampled, manual_second])

In [None]:
# and check one by one if alternative GSV from earlier year summer season is available

if place_name == 'Barcelona':
    
    # we need TWO more for the THIRD ndvi quantile
    alternative_candidates_third = alternative_candidates[alternative_candidates.ndvi_quantile=='third']
    print(alternative_candidates_third.index)
    # index 82 -> success
    third_1 = alternative_candidates_third[alternative_candidates_third.index==82].copy()
    third_1['gsv_year'], third_1['gsv_month'], third_1['gsv_pano_id'], third_1['gsv_lat'], third_1['gsv_lng'] = [
        '2019', '05', '1sqcYSoxGnrlGxjCHPCCqY_g', '41.44482927642666', '2.178932791211979']
    # index 34 -> obstructed, wall
    # index 93 -> success
    third_2 = alternative_candidates_third[alternative_candidates_third.index==93].copy()
    third_2['gsv_year'], third_2['gsv_month'], third_2['gsv_pano_id'], third_2['gsv_lat'], third_2['gsv_lng'] = [
        '2019', '06', '1sZXmDjFlE2OEY5mAbxXijyA', '41.41490898416049', '2.202985605009758']    
    # TWO ALTERNATIVE FOUND

    # generate new iframes correspondingly
    manual_third = pd.concat([third_1, third_2])
    manual_third['gsv_iframe_html'] = manual_third.apply(lambda row: compose_iframe_string(row), axis=1, result_type="expand")
    
    sampled = pd.concat([sampled, manual_third])

In [None]:
# and check one by one if alternative GSV from earlier year summer season is available

if place_name == 'Barcelona':
    
    # we need THREE more for the UPPER ndvi quantile
    alternative_candidates_upper = alternative_candidates[alternative_candidates.ndvi_quantile=='upper']
    print(alternative_candidates_upper.index)
    # index 74 -> success
    upper_1 = alternative_candidates_upper[alternative_candidates_upper.index==74].copy()
    upper_1['gsv_year'], upper_1['gsv_month'], upper_1['gsv_pano_id'], upper_1['gsv_lat'], upper_1['gsv_lng'] = [
        '2019', '05', '1sGwIFhZIEfrZEUGxMcU95rA', '41.4304813430121', '2.164266744491995'] 
    # index 48 -> obstructed, wall
    # index 69 -> success
    upper_2 = alternative_candidates_upper[alternative_candidates_upper.index==69].copy()
    upper_2['gsv_year'], upper_2['gsv_month'], upper_2['gsv_pano_id'], upper_2['gsv_lat'], upper_2['gsv_lng'] = [
        '2019', '05', '1s_ip2cXRhCjK8V_gSqh9ATg', '41.41488422719582', '2.21378095264952']
    # index 51 -> success
    upper_3 = alternative_candidates_upper[alternative_candidates_upper.index==51].copy()
    upper_3['gsv_year'], upper_3['gsv_month'], upper_3['gsv_pano_id'], upper_3['gsv_lat'], upper_3['gsv_lng'] = [
        '2019', '05', '1srNQe8lkn3Y3Kg1xeL7EVxw', '41.38495650485547', '2.102653666794794']
    # THREE ALTERNATIVE FOUND

    # generate new iframes correspondingly
    manual_upper = pd.concat([upper_1, upper_2, upper_3])
    manual_upper['gsv_iframe_html'] = manual_upper.apply(lambda row: compose_iframe_string(row), axis=1, result_type="expand")
    
    sampled = pd.concat([sampled, manual_upper])

In [None]:
if place_name == 'Barcelona':
    
    print('play/recreation grounds, sampled places:\n{} low + {} second + {} third + {} upper quantile ndvi\n'.format(
        len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='lower')]),
        len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='second')]),
        len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='third')]),
        len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='upper')])))

In [None]:
print_iframes = False
if print_iframes:
    for i in sampled.index:
        print(sampled.category[i], sampled.ndvi_quantile[i], i, sampled.gsv_pano_id[i])
        print(sampled.gsv_iframe_html[i])

In [None]:
sampled_playspaces = sampled.copy()

#### Street - low, medium, high NDVI

In [None]:
candidates = candidate_streets.copy()

In [None]:
# set ndvi quantiles
ndvi_quantiles = candidates['ndvi_mean'].quantile([0, 0.25, 0.50, 0.75, 1])

candidates.loc[candidates.ndvi_mean<ndvi_quantiles[0.25], 'ndvi_quantile'] = 'lower'
candidates.loc[
    (candidates.ndvi_mean>=ndvi_quantiles[0.25]) &
    (candidates.ndvi_mean<ndvi_quantiles[0.5]), 'ndvi_quantile'] = 'second'
candidates.loc[
    (candidates.ndvi_mean>=ndvi_quantiles[0.5]) &
    (candidates.ndvi_mean<ndvi_quantiles[0.75]), 'ndvi_quantile'] = 'third'
candidates.loc[candidates.ndvi_mean>=ndvi_quantiles[0.75], 'ndvi_quantile'] = 'upper'

In [None]:
# sample places
print_iframes = False
sampled = pd.concat([
    take_sample(candidates, 'lower', n_sample, print_iframes), 
    take_sample(candidates, 'second', n_sample, print_iframes), 
    take_sample(candidates, 'third', n_sample, print_iframes), 
    take_sample(candidates, 'upper', n_sample, print_iframes)])

# omit sampled places from remaining candidates
candidates = pd.concat([candidates, sampled]).drop_duplicates(subset=['gsv_pano_id'], keep=False)

In [None]:
# after manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = [
        'r5CU6FVQCAk7MWsoCxR79w',
        'PlTz3UlqD2yjaFcxg4pn_w',
        'pt2JtNOqNWcGPqt6-Zq9fQ',
        'xIoUdbVCSwtHh0ilG5ZOkQ',
        'CAoSLEFGMVFpcE84eENGWDZBWXhud0pyUHloZlBPUXAyTkdUcF9RcUt4T1QybWhH',
        'hJqCu6adnCt48V8Ug7ueZw',
        'CAoSLEFGMVFpcE5QSEZXYnFBSDZYRWJXRlZodklxelQ2UVRpUTlGcXg2cTJxdFRr']
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = [
        '5PCDWBtaiQorATgouKTUsg',
        'H6ZlRfQFuSZ1DOzxlcglrQ',
        '8iqwXyFukYwATzdqem0H8A',
        'L-hsoR0HlMBiEuji3lWsqg',
        'CAoSLEFGMVFpcE1KS3pmeHpMQmJtWGpKa2ZORkZjcG4ybUtLaHFfMzQtdXY1c2xH',
        'pTsRsnXv4GD9z7oS1aO8yQ',
        'CAoSLEFGMVFpcFBXRExqeDBCUlFlaEUwUmJIRUVxb3c0Mm9GVjdwa2RiZmZGME9W']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = [
        'lIdRsIpDNnMOnEllA2Vaag']
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after a second manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = [
        'zYkvFyKoEVy2YXx0mDfbnA']
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = [
        'U6Ko9BSaPeTFtQimtX3SPA']

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = []
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
# after a third manual check, we exclude the following panoramas
if place_name == 'Barcelona':
    pano_ids_to_exclude = [
        'YungAd1uF7VznCVySuU-lw']
    
elif place_name == 'Goteborg':
    pano_ids_to_exclude = [
        ]

elif place_name == 'Rotterdam':
    pano_ids_to_exclude = []
    
print_iframes = False
sampled, candidates = exclude_and_replace(sampled, pano_ids_to_exclude, candidates, print_iframes)

In [None]:
print_iframes = False
if print_iframes:
    for i in sampled.index:
        print(sampled.category[i], sampled.ndvi_quantile[i], i, sampled.gsv_pano_id[i])
        print(sampled.gsv_iframe_html[i])

In [None]:
sampled_streets = sampled.copy()

In [None]:
candidate_streets = candidates.copy()

#### Tunnels and highways, for reference

In [None]:
if place_name == 'Rotterdam':
    pano_ids_for_reference = [
        '29edRoSEI7Yd7Zepsw6WNg',
        'lQ4vNiyAUaBECFKuOO57WA',
        'lIdRsIpDNnMOnEllA2Vaag']
elif place_name == 'Goteborg':
    pano_ids_for_reference = [
        'oU6BbhSf9b6d6dLZ3-VquA',
        'WyxryYTz8hSOkq0sp-UqZQ',
        'Xuo-8CxiNo8mhcD6dZk-oA']

elif place_name == 'Barcelona':
    pano_ids_for_reference = [
        'IBQwXcItjXij5rm3lrSkwQ',
        'H4-QZDuDYPN8iKFWBB1DCA',
        'tAhWS_V4o4BqZB_N6s3bJg']

In [None]:
for_reference = candidate_all[candidate_all.gsv_pano_id.isin(pano_ids_for_reference)].copy()

In [None]:
for_reference['gsv_iframe_html'] = for_reference.apply(lambda row: compose_iframe_string(row), axis=1, result_type="expand")

In [None]:
for_reference['category'] = 'for_reference'
for_reference['ndvi_quantile'] = None

In [None]:
print_iframes = False
if print_iframes:
    for i in for_reference.index:
        print(for_reference.category[i], for_reference.ndvi_quantile[i], i, for_reference.gsv_pano_id[i])
        print(for_reference.gsv_iframe_html[i])

## Check for duplicates and overall count

In [None]:
sampled = pd.concat([
    sampled_pocket_greenspaces,
    sampled_regular_greenspaces,
    sampled_squares,
    sampled_playspaces,
    sampled_streets,
    for_reference
])

In [None]:
# print pano_ids that appear more than once
appearing_once = []
for pano_id in sampled.gsv_pano_id:
    if pano_id not in appearing_once:
        appearing_once.append(pano_id)
    else:
        print(pano_id)

In [None]:
if place_name == 'Barcelona':
    # no duplicates in Rotterdam and Goteborg
    
    # for Barcelona pano_id CAoSLEFGMVFpcE0zQXNHZ3FDdy1DWnNXWi1LTFlWWlBvRnhPU3VwblJ4VkNQOW82 appears three times
    # replace two and keep one
    # see sampled[sampled.gsv_pano_id=='CAoSLEFGMVFpcE0zQXNHZ3FDdy1DWnNXWi1LTFlWWlBvRnhPU3VwblJ4VkNQOW82']
    print_iframes = False
    # 1: regular_greenspace
    sampled_regular_greenspaces, candidates = exclude_and_replace(
        sampled_regular_greenspaces, ['CAoSLEFGMVFpcE0zQXNHZ3FDdy1DWnNXWi1LTFlWWlBvRnhPU3VwblJ4VkNQOW82'], candidate_regular_greenspaces, print_iframes=print_iframes)
    # 2: square
    sampled_squares, candidate_squares = exclude_and_replace(
        sampled_squares, ['CAoSLEFGMVFpcE0zQXNHZ3FDdy1DWnNXWi1LTFlWWlBvRnhPU3VwblJ4VkNQOW82'], candidate_squares, print_iframes=print_iframes)
    
    # for Barcelona, pano_id CAoSLEFGMVFpcE43UHRiQUExQXMzRzVoNG1XbzRueUdJVVprWmM0aGN0OTlXeU9q appears two times
    # replace one and keep one
    # see sampled[sampled.gsv_pano_id=='CAoSLEFGMVFpcE43UHRiQUExQXMzRzVoNG1XbzRueUdJVVprWmM0aGN0OTlXeU9q']
    print_iframes = False
    sampled_regular_greenspaces, candidates = exclude_and_replace(
        sampled_regular_greenspaces, ['CAoSLEFGMVFpcE43UHRiQUExQXMzRzVoNG1XbzRueUdJVVprWmM0aGN0OTlXeU9q'], candidate_regular_greenspaces, print_iframes=print_iframes)
    

In [None]:
if place_name == 'Barcelona':
    # one of these does not pass our manual check, sample again
    print_iframes = True
    sampled_squares, candidate_squares = exclude_and_replace(
        sampled_squares, ['M7GXUvAVpKJt4V7KsEqZwg'], candidate_squares, print_iframes=print_iframes)

In [None]:
sampled = pd.concat([
    sampled_pocket_greenspaces,
    sampled_regular_greenspaces,
    sampled_squares,
    sampled_playspaces,
    sampled_streets,
    for_reference
])

In [None]:
# print pano_ids that appear more than once
appearing_once = []
for pano_id in sampled.gsv_pano_id:
    if pano_id not in appearing_once:
        appearing_once.append(pano_id)
    else:
        print(pano_id)

## Eventually, we decided to go with 7 places each, not 8

In [None]:
n_sample = 7

In [None]:
sampled_pocket_greenspaces = pd.concat([
    sampled_pocket_greenspaces[sampled_pocket_greenspaces.ndvi_quantile=='lower'].sample(n=n_sample, random_state=random_state),
    sampled_pocket_greenspaces[sampled_pocket_greenspaces.ndvi_quantile=='second'].sample(n=n_sample, random_state=random_state),
    sampled_pocket_greenspaces[sampled_pocket_greenspaces.ndvi_quantile=='third'].sample(n=n_sample, random_state=random_state),
    sampled_pocket_greenspaces[sampled_pocket_greenspaces.ndvi_quantile=='upper'].sample(n=n_sample, random_state=random_state),
])

In [None]:
sampled_regular_greenspaces = pd.concat([
    sampled_regular_greenspaces[sampled_regular_greenspaces.ndvi_quantile=='lower'].sample(n=n_sample, random_state=random_state),
    sampled_regular_greenspaces[sampled_regular_greenspaces.ndvi_quantile=='second'].sample(n=n_sample, random_state=random_state),
    sampled_regular_greenspaces[sampled_regular_greenspaces.ndvi_quantile=='third'].sample(n=n_sample, random_state=random_state),
    sampled_regular_greenspaces[sampled_regular_greenspaces.ndvi_quantile=='upper'].sample(n=n_sample, random_state=random_state),
])

In [None]:
sampled_squares = pd.concat([
    sampled_squares[sampled_squares.ndvi_quantile=='lower'].sample(n=n_sample, random_state=random_state),
    sampled_squares[sampled_squares.ndvi_quantile=='second'].sample(n=n_sample, random_state=random_state),
    sampled_squares[sampled_squares.ndvi_quantile=='third'].sample(n=n_sample, random_state=random_state),
    sampled_squares[sampled_squares.ndvi_quantile=='upper'].sample(n=n_sample, random_state=random_state),
])

In [None]:
sampled_playspaces = pd.concat([
    sampled_playspaces[sampled_playspaces.ndvi_quantile=='lower'].sample(n=n_sample, random_state=random_state),
    sampled_playspaces[sampled_playspaces.ndvi_quantile=='second'].sample(n=n_sample, random_state=random_state),
    sampled_playspaces[sampled_playspaces.ndvi_quantile=='third'].sample(n=n_sample, random_state=random_state),
    sampled_playspaces[sampled_playspaces.ndvi_quantile=='upper'].sample(n=n_sample, random_state=random_state),
])

In [None]:
sampled_streets = pd.concat([
    sampled_streets[sampled_streets.ndvi_quantile=='lower'].sample(n=n_sample, random_state=random_state),
    sampled_streets[sampled_streets.ndvi_quantile=='second'].sample(n=n_sample, random_state=random_state),
    sampled_streets[sampled_streets.ndvi_quantile=='third'].sample(n=n_sample, random_state=random_state),
    sampled_streets[sampled_streets.ndvi_quantile=='upper'].sample(n=n_sample, random_state=random_state),
])

In [None]:
sampled = pd.concat([
    sampled_pocket_greenspaces,
    sampled_regular_greenspaces,
    sampled_squares,
    sampled_playspaces,
    sampled_streets,
    for_reference
])

sampled.reset_index(inplace=True)

In [None]:
print('pocket greenspaces, sampled places:\n{} low + {} second + {} third + {} upper quantile ndvi\n'.format(
    len(sampled[(sampled.category=='pocket_greenspace') & (sampled.ndvi_quantile=='lower')]),
    len(sampled[(sampled.category=='pocket_greenspace') & (sampled.ndvi_quantile=='second')]),
    len(sampled[(sampled.category=='pocket_greenspace') & (sampled.ndvi_quantile=='third')]),
    len(sampled[(sampled.category=='pocket_greenspace') & (sampled.ndvi_quantile=='upper')])))
print('regular greenspaces, sampled places:\n{} low + {} second + {} third + {} upper quantile ndvi\n'.format(
    len(sampled[(sampled.category=='regular_greenspace') & (sampled.ndvi_quantile=='lower')]),
    len(sampled[(sampled.category=='regular_greenspace') & (sampled.ndvi_quantile=='second')]),
    len(sampled[(sampled.category=='regular_greenspace') & (sampled.ndvi_quantile=='third')]),
    len(sampled[(sampled.category=='regular_greenspace') & (sampled.ndvi_quantile=='upper')])))
print('squares, sampled places:\n{} low + {} second + {} third + {} upper quantile ndvi\n'.format(
    len(sampled[(sampled.category=='square') & (sampled.ndvi_quantile=='lower')]),
    len(sampled[(sampled.category=='square') & (sampled.ndvi_quantile=='second')]),
    len(sampled[(sampled.category=='square') & (sampled.ndvi_quantile=='third')]),
    len(sampled[(sampled.category=='square') & (sampled.ndvi_quantile=='upper')])))
print('play/recreation grounds, sampled places:\n{} low + {} second + {} third + {} upper quantile ndvi\n'.format(
    len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='lower')]),
    len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='second')]),
    len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='third')]),
    len(sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='upper')])))
print('streets, sampled places:\n{} low + {} second + {} third + {} upper quantile ndvi\n'.format(
    len(sampled[(sampled.category=='street') & (sampled.ndvi_quantile=='lower')]),
    len(sampled[(sampled.category=='street') & (sampled.ndvi_quantile=='second')]),
    len(sampled[(sampled.category=='street') & (sampled.ndvi_quantile=='third')]),
    len(sampled[(sampled.category=='street') & (sampled.ndvi_quantile=='upper')])))
print('parkings, sampled places:\n{}\n'.format(
    len(sampled[sampled.category=='for_reference'])))
print('TOTAL SAMPLED: {}'.format(len(sampled)))

In [None]:
print_iframes = False
if print_iframes:
    for i in sampled.index:
        print(sampled.category[i], sampled.ndvi_quantile[i], i, sampled.gsv_pano_id[i])
        print(sampled.gsv_iframe_html[i])

## Plot

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=1, figsize=(12,12))

sampled[(sampled.category=='pocket_greenspace') & (sampled.ndvi_quantile=='lower')].centroid.plot(ax=axs, color='#b2e2e2', marker='s', markersize=20)
sampled[(sampled.category=='pocket_greenspace') & (sampled.ndvi_quantile=='second')].centroid.plot(ax=axs, color='#66c2a4', marker='s', markersize=20)
sampled[(sampled.category=='pocket_greenspace') & (sampled.ndvi_quantile=='third')].centroid.plot(ax=axs, color='#66c2a4', marker='s', markersize=20)
sampled[(sampled.category=='pocket_greenspace') & (sampled.ndvi_quantile=='upper')].centroid.plot(ax=axs, color='#2ca25f', marker='s', markersize=20)

sampled[(sampled.category=='regular_greenspace') & (sampled.ndvi_quantile=='lower')].centroid.plot(ax=axs, color='#b2e2e2', marker='s', markersize=40)
sampled[(sampled.category=='regular_greenspace') & (sampled.ndvi_quantile=='second')].centroid.plot(ax=axs, color='#66c2a4', marker='s', markersize=40)
sampled[(sampled.category=='regular_greenspace') & (sampled.ndvi_quantile=='third')].centroid.plot(ax=axs, color='#66c2a4', marker='s', markersize=40)
sampled[(sampled.category=='regular_greenspace') & (sampled.ndvi_quantile=='upper')].centroid.plot(ax=axs, color='#2ca25f', marker='s', markersize=40)

sampled[(sampled.category=='square') & (sampled.ndvi_quantile=='lower')].centroid.plot(ax=axs, color='#b2e2e2', marker='o', markersize=40)
sampled[(sampled.category=='square') & (sampled.ndvi_quantile=='second')].centroid.plot(ax=axs, color='#66c2a4', marker='o', markersize=40)
sampled[(sampled.category=='square') & (sampled.ndvi_quantile=='third')].centroid.plot(ax=axs, color='#66c2a4', marker='o', markersize=40)
sampled[(sampled.category=='square') & (sampled.ndvi_quantile=='upper')].centroid.plot(ax=axs, color='#2ca25f', marker='o', markersize=40)

sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='lower')].centroid.plot(ax=axs, color='#b2e2e2', marker='*', markersize=40)
sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='second')].centroid.plot(ax=axs, color='#66c2a4', marker='*', markersize=40)
sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='third')].centroid.plot(ax=axs, color='#66c2a4', marker='*', markersize=40)
sampled[(sampled.category=='playspace') & (sampled.ndvi_quantile=='upper')].centroid.plot(ax=axs, color='#2ca25f', marker='*', markersize=40)

sampled[(sampled.category=='street') & (sampled.ndvi_quantile=='lower')].plot(ax=axs, color='#b2e2e2', linewidth=3)
sampled[(sampled.category=='street') & (sampled.ndvi_quantile=='second')].plot(ax=axs, color='#66c2a4', linewidth=3)
sampled[(sampled.category=='street') & (sampled.ndvi_quantile=='third')].plot(ax=axs, color='#66c2a4', linewidth=3)
sampled[(sampled.category=='street') & (sampled.ndvi_quantile=='upper')].plot(ax=axs, color='#2ca25f', linewidth=3)

sampled[sampled.category=='for_reference'].centroid.plot(ax=axs, color='#2ca25f', marker='$P$', markersize=40)

place.plot(ax=axs, facecolor='none', edgecolor='#dd1c77', linestyle='-', linewidth=5)
cx.add_basemap(ax=axs, crs=place.crs, source=cx.providers.OpenStreetMap.Mapnik, alpha=0.2, zorder=0)

bbox = sampled.total_bounds
margin = 500
xlim = ([bbox[0]-margin, bbox[2]+margin])
ylim = ([bbox[1]-margin, bbox[3]+margin])
axs.set_xlim(xlim)
axs.set_ylim(ylim)
plt.axis('off')

plt.show()

## Randomize order of sampled places

In [None]:
# randomize order
sampled_randomorder = sampled.sample(frac=1, random_state=random_state)

## Export output of sampled locations with iframes

In [None]:
export_sub_folder = os.path.join(export_folder, export_sampled_sub_folder)

In [None]:
if not os.path.exists(export_sub_folder):
    os.mkdir(export_sub_folder)

In [None]:
output_file = os.path.join(export_sub_folder, 'city_perc_locations_{}.txt'.format(place_name))
with open(output_file, "w") as f:
    for loc in sampled_randomorder['gsv_iframe_html']:
        f.write(loc)

In [None]:
output_file = os.path.join(export_sub_folder, 'sampled.geojson')
sampled_export = sampled_randomorder.apply(lambda c: c.astype(str) if c.name != "geometry" else c, axis=0)
sampled_export.to_file(output_file, driver='GeoJSON')