# Step 7. Quantitative analysis of results

In [1]:
import os
import datetime

import numpy as np
import scipy
import fiona
import statistics
import math
import itertools

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString, shape, mapping, Point, Polygon, MultiPolygon
from shapely.ops import cascaded_union, transform
import pyproj

import matplotlib.pyplot as plt
from matplotlib import colors, cm, style
import matplotlib.patches as mpatches
# from descartes import PolygonPatch
from matplotlib.colors import LinearSegmentedColormap
from matplotlib_venn import venn2, venn3, venn2_circles, venn2_unweighted

import osmnx as ox
import networkx as nx

import rasterio
from rasterio import MemoryFile
from rasterio.plot import show
from rasterio.mask import mask
import json

import contextily as cx
import folium
from folium.features import DivIcon

import random

In [2]:
from getpass import getpass

import requests
from requests import Request, Session

import hashlib
import hmac
import base64
import urllib.parse as urlparse

from datetime import date
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [3]:
from scipy import stats
import seaborn as sns
from tabulate import tabulate

## Define city and other settings

In [4]:
place_names = ['Barcelona', 'Rotterdam', 'Goteborg']

In [5]:
radius = 300

In [6]:
print_and_plot = False

In [7]:
print_qualitative_locations = False

In [8]:
print_qualitative_perceptions = False

In [9]:
print_qualitative_html = False

In [10]:
confidential_folder = os.path.expanduser('~/confidential_folder')
confidential_folder = os.path.expanduser('/Users/roosteeuwen/surfdrive/User studies/Crowd-sourcing/City perception')

In [11]:
# set random seed for generating random numbers
# and for sampling rows from geodataframes
random_state = 42
random.seed(random_state)

## Read and preprocess data

In [12]:
activity_perceptions = gpd.GeoDataFrame()
greenness_perceptions = gpd.GeoDataFrame()
activity_perception_points = gpd.GeoDataFrame()
greenness_perception_points = gpd.GeoDataFrame()

In [13]:
for place_name in place_names:

    activity_perceptions_place = gpd.read_file(os.path.join(confidential_folder, 'preprocessed', 'radius_{}'.format(radius), 'activity_perceptions_{}.geojson'.format(place_name)))
    greenness_perceptions_place = gpd.read_file(os.path.join(confidential_folder, 'preprocessed', 'radius_{}'.format(radius), 'greenness_perceptions_{}.geojson'.format(place_name)))
    activity_perception_points_place = gpd.read_file(os.path.join(confidential_folder, 'preprocessed', 'radius_{}'.format(radius), 'activity_perception_points_{}.geojson'.format(place_name)))
    greenness_perception_points_place = gpd.read_file(os.path.join(confidential_folder, 'preprocessed', 'radius_{}'.format(radius), 'greenness_perception_points_{}.geojson'.format(place_name)))

    activity_perceptions_place['place_name'] = place_name
    greenness_perceptions_place['place_name'] = place_name
    activity_perception_points_place['place_name'] = place_name
    greenness_perception_points_place['place_name'] = place_name

    activity_perceptions = pd.concat([activity_perceptions, activity_perceptions_place])#.reset_index(drop=True)
    greenness_perceptions = pd.concat([greenness_perceptions, greenness_perceptions_place])#.reset_index(drop=True)
    activity_perception_points = pd.concat([activity_perception_points, activity_perception_points_place])#.reset_index(drop=True)
    greenness_perception_points = pd.concat([greenness_perception_points, greenness_perception_points_place])#.reset_index(drop=True)

In [14]:
activity_perceptions = activity_perceptions[activity_perceptions.place_category!='for_reference']
greenness_perceptions = greenness_perceptions[greenness_perceptions.place_category!='for_reference']
activity_perception_points = activity_perception_points[activity_perception_points.place_category!='for_reference']
greenness_perception_points = greenness_perception_points[greenness_perception_points.place_category!='for_reference']

In [15]:
num_fields = ['clicks', 'physical_rating_num', 'social_rating_num', 'relax_rating_num', 'commute_rating_num', 'children_rating_num']
for nf in num_fields:
    activity_perceptions[nf] = activity_perceptions[nf].astype(float)
    activity_perceptions[nf] = activity_perceptions[nf].astype(int)
# activity_perceptions['ndvi_median'] = activity_perceptions['ndvi_median'].astype(float)
# activity_perceptions['ndvi_max'] = activity_perceptions['ndvi_max'].astype(float)
activity_perceptions.replace({
    'near_regular_greenspace': {'True': True, 'False': False}, 
    'near_pocket_greenspace': {'True': True, 'False': False}, 
    'near_square': {'True': True, 'False': False}, 
    'near_playspace': {'True': True, 'False': False}, 
    'near_street': {'True': True, 'False': False}}, 
    inplace=True)

In [16]:
num_fields = ['clicks', 'greenness_rating_num']
for nf in num_fields:
    greenness_perceptions[nf] = greenness_perceptions[nf].astype(float)
    greenness_perceptions[nf] = greenness_perceptions[nf].astype(int)
# greenness_perceptions['ndvi_median'] = greenness_perceptions['ndvi_median'].astype(float)
# greenness_perceptions['ndvi_max'] = greenness_perceptions['ndvi_max'].astype(float)
greenness_perceptions.replace({
    'near_regular_greenspace': {'True': True, 'False': False}, 
    'near_pocket_greenspace': {'True': True, 'False': False}, 
    'near_square': {'True': True, 'False': False}, 
    'near_playspace': {'True': True, 'False': False}, 
    'near_street': {'True': True, 'False': False}}, 
    inplace=True)

In [17]:
num_fields = ['clicks', 'physical_rating_num', 'social_rating_num', 'relax_rating_num', 'commute_rating_num', 'children_rating_num']
for nf in num_fields:
    activity_perception_points[nf] = activity_perception_points[nf].astype(float)
    activity_perception_points[nf] = activity_perception_points[nf].astype(int)
activity_perception_points['ndvi_median'] = activity_perception_points['ndvi_median'].astype(float)
activity_perception_points['ndvi_max'] = activity_perception_points['ndvi_max'].astype(float)
activity_perception_points.replace({
    'near_regular_greenspace': {'True': True, 'False': False}, 
    'near_pocket_greenspace': {'True': True, 'False': False}, 
    'near_square': {'True': True, 'False': False}, 
    'near_playspace': {'True': True, 'False': False}, 
    'near_street': {'True': True, 'False': False}}, 
    inplace=True)

In [18]:
num_fields = ['clicks', 'greenness_rating_num']
for nf in num_fields:
    greenness_perception_points[nf] = greenness_perception_points[nf].astype(float)
    greenness_perception_points[nf] = greenness_perception_points[nf].astype(int)
greenness_perception_points['ndvi_median'] = greenness_perception_points['ndvi_median'].astype(float)
greenness_perception_points['ndvi_max'] = greenness_perception_points['ndvi_max'].astype(float)
greenness_perception_points.replace({
    'near_regular_greenspace': {'True': True, 'False': False}, 
    'near_pocket_greenspace': {'True': True, 'False': False}, 
    'near_square': {'True': True, 'False': False}, 
    'near_playspace': {'True': True, 'False': False}, 
    'near_street': {'True': True, 'False': False}}, 
    inplace=True)

## Aggregate per place

In [19]:
def perceptions_per_place(gdf, rating_cols):
    
    gdf = gdf.copy()
    
    cols = [
        'gsv_pano_id', 'iframe', 'ndvi_median', 'place_name',
        'near_regular_greenspace', 'near_pocket_greenspace', 'near_square', 'near_playspace', 'near_street', 
        'geometry']
    cols = cols + rating_cols
    if 'ndvi_max' in gdf.columns.to_list():
        cols = cols + ['ndvi_max']
    
    for rating_col in rating_cols:
        gdf[rating_col] = gdf[rating_col].astype(float)
        
    gdf['near_regular_greenspace'] = gdf['near_regular_greenspace'].astype(float)
    gdf['near_pocket_greenspace'] = gdf['near_pocket_greenspace'].astype(bool)
    gdf['near_square'] = gdf['near_square'].astype(bool)
    gdf['near_playspace'] = gdf['near_playspace'].astype(bool)
    gdf['near_street'] = gdf['near_street'].astype(bool)
            
    gdf_perplace = gdf[cols].groupby(['gsv_pano_id']).median()
        
    gdf_perplace['near_regular_greenspace'] = gdf_perplace['near_regular_greenspace'].astype(bool)
    gdf_perplace['near_pocket_greenspace'] = gdf_perplace['near_pocket_greenspace'].astype(bool)
    gdf_perplace['near_square'] = gdf_perplace['near_square'].astype(bool)
    gdf_perplace['near_playspace'] = gdf_perplace['near_playspace'].astype(bool)
    gdf_perplace['near_street'] = gdf_perplace['near_street'].astype(bool)
        
    gdf_perplace = gdf_perplace.merge(gdf[['gsv_pano_id', 'place_name', 'geometry']], how='left', left_on='gsv_pano_id', right_on='gsv_pano_id')
    gdf_perplace.drop_duplicates(subset=['gsv_pano_id'], keep='first', inplace=True)
        
    return gdf_perplace

In [20]:
activity_perceptions_perplace = perceptions_per_place(activity_perceptions, ['physical_rating_num', 'social_rating_num', 'relax_rating_num', 'commute_rating_num', 'children_rating_num'])

In [21]:
greenness_perceptions_perplace = perceptions_per_place(greenness_perceptions, ['greenness_rating_num'])

In [22]:
activity_perception_points_perplace = perceptions_per_place(activity_perception_points, ['physical_rating_num', 'social_rating_num', 'relax_rating_num', 'commute_rating_num', 'children_rating_num'])

In [23]:
greenness_perception_points_perplace = perceptions_per_place(greenness_perception_points, ['greenness_rating_num'])

In [24]:
# places with 5 or more ratings
gsv_pano_ids_5plusratings = pd.Series(greenness_perceptions.gsv_pano_id.value_counts()>=5)
greenness_perception_points_perplace_5plusratings = greenness_perception_points_perplace[greenness_perception_points_perplace.gsv_pano_id.isin(gsv_pano_ids_5plusratings[gsv_pano_ids_5plusratings].index)]

In [25]:
if print_and_plot:
    gdf = greenness_perception_points_perplace
    print('n={}'.format(len(gdf)))
    print('n greenness perceptions={}'.format(len(greenness_perception_points)))
    print('n activity perceptions={}'.format(len(activity_perception_points)))
    print('ndvi median {}; min {}; max {}'.format(round(gdf.ndvi_median.median(), 3), round(gdf.ndvi_median.min(), 3), round(gdf.ndvi_median.max(), 3)))
    print('ndvi max {}; min {}; max {}'.format(round(gdf.ndvi_max.median(), 3), round(gdf.ndvi_max.min(), 3), round(gdf.ndvi_max.max(), 3)))
    print('regular gs, n={}'.format(len(gdf[gdf.near_regular_greenspace])))
    print('pocket gs, n={}'.format(len(gdf[gdf.near_pocket_greenspace])))
    print('square, n={}'.format(len(gdf[gdf.near_square])))
    print('playspace, n={}'.format(len(gdf[gdf.near_playspace])))
    print('street, n={}'.format(len(gdf[gdf.near_street])))

In [26]:
if print_and_plot:
    for place_name in place_names:
        gdf = greenness_perception_points_perplace[greenness_perception_points_perplace.place_name==place_name]
        gdf_green_perc = greenness_perception_points[greenness_perception_points.place_name==place_name]
        gdf_act_perc = activity_perception_points[activity_perception_points.place_name==place_name]
        print(place_name)
        print('n={}'.format(len(gdf)))
        print('n greenness perceptions={}'.format(len(gdf_green_perc)))
        print('n activity perceptions={}'.format(len(gdf_act_perc)))
        print('ndvi median {}; min {}; max {}'.format(round(gdf.ndvi_median.median(), 3), round(gdf.ndvi_median.min(), 3), round(gdf.ndvi_median.max(), 3)))
        print('ndvi max {}; min {}; max {}'.format(round(gdf.ndvi_max.median(), 3), round(gdf.ndvi_max.min(), 3), round(gdf.ndvi_max.max(), 3)))
        print('regular gs, n={}'.format(len(gdf[gdf.near_regular_greenspace])))
        print('pocket gs, n={}'.format(len(gdf[gdf.near_pocket_greenspace])))
        print('square, n={}'.format(len(gdf[gdf.near_square])))
        print('playspace, n={}'.format(len(gdf[gdf.near_playspace])))
        print('street, n={}'.format(len(gdf[gdf.near_street])))
        print('')

# Plot

In [27]:
mycmap = LinearSegmentedColormap.from_list('mycmap', [(0, '#d01c8b'), (0.5, '#fdb863'), (1, '#4dac26')])

In [28]:
if print_and_plot:
    fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(30,13))
    fig.suptitle('Sampled locations and their median greenness rating', fontsize=40, y=1)

    greenness_perception_points_perplace[greenness_perception_points_perplace.place_name=='Barcelona'].set_geometry(greenness_perception_points_perplace.geometry.centroid).plot(
        ax=axs[0], column='greenness_rating_num', cmap=mycmap, categorical=True, markersize=100)
    cx.add_basemap(ax=axs[0], crs='epsg:25830', source=cx.providers.OpenStreetMap.Mapnik, alpha=0.5, zorder=0)
    axs[0].set_title('Barcelona', fontsize=25)

    greenness_perception_points_perplace[greenness_perception_points_perplace.place_name=='Rotterdam'].set_geometry(greenness_perception_points_perplace.geometry.centroid).plot(
        ax=axs[1], column='greenness_rating_num', cmap=mycmap, categorical=True, markersize=100)
    cx.add_basemap(ax=axs[1], crs='epsg:28992', source=cx.providers.OpenStreetMap.Mapnik, alpha=0.5, zorder=0)
    axs[1].set_title('Rotterdam', fontsize=25)

    greenness_perception_points_perplace[greenness_perception_points_perplace.place_name=='Goteborg'].set_geometry(greenness_perception_points_perplace.geometry.centroid).plot(
        ax=axs[2], column='greenness_rating_num', cmap=mycmap, categorical=True, markersize=100)
    cx.add_basemap(ax=axs[2], crs='epsg:3006', source=cx.providers.OpenStreetMap.Mapnik, alpha=0.5, zorder=0)
    axs[2].set_title('Gothenburg', fontsize=25)

    # bbox = place.total_bounds
    # margin = 1000
    # xlim = ([bbox[0]-margin, bbox[2]+margin])
    # ylim = ([bbox[1]-margin, bbox[3]+margin])
    # axs.set_xlim(xlim)
    # axs.set_ylim(ylim)

    notatall = mpatches.Patch(color=mycmap(0.0), label='Not at all (0)')
    alittle = mpatches.Patch(color=mycmap(0.25), label='A little (1)')
    neutral = mpatches.Patch(color=mycmap(0.5), label='Neutral (2)')
    fairly = mpatches.Patch(color=mycmap(0.75), label='Fairly (3)')
    very = mpatches.Patch(color=mycmap(1.0), label='Very (4)')

    for ax in axs:
        ax.axis('off')
        ax.legend(handles=[very, fairly, neutral, alittle, notatall], fontsize=15)
        ax.set_anchor('N')

    plt.show()

## H1: Correlation NDVI and perceived greenness

In [29]:
if print_and_plot:  
    
    fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(12,4), sharex=True, sharey=True)
    
    
    x = greenness_perception_points.greenness_rating_num
    y = greenness_perception_points.ndvi_median
    sns.regplot(ax=axs[0], x=x, y=y, scatter_kws={'alpha':0.02})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[0].set_title('All perceptions\nGreenness rating & median NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(greenness_perception_points)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[0].set_title('All perceptions\nGreenness rating & median NDVI at point\nInsignificant')
        
        
    x = greenness_perception_points_perplace.greenness_rating_num
    y = greenness_perception_points_perplace.ndvi_median
    sns.regplot(ax=axs[1], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[1].set_title('Median perception per place\nGreenness rating & median NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(greenness_perception_points_perplace)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[1].set_title('Median perception per place\nGreenness rating & median NDVI at point\nInsignificant')
        
    
    x = greenness_perception_points_perplace_5plusratings.greenness_rating_num
    y = greenness_perception_points_perplace_5plusratings.ndvi_median
    sns.regplot(ax=axs[2], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[2].set_title('Median perception per place (rated >4x)\nGreenness rating & median NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(greenness_perception_points_perplace_5plusratings)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[2].set_title('Median perception per place (rated >4x)\nGreenness rating & median NDVI at point\nInsignificant')    
        
    plt.show()

In [30]:
# per city
if print_and_plot:
    for place_name in place_names:
        x = greenness_perception_points_perplace[greenness_perception_points_perplace.place_name==place_name].greenness_rating_num
        y = greenness_perception_points_perplace[greenness_perception_points_perplace.place_name==place_name].ndvi_median
        correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
        print('{} - correlation greenness and NDVI (median per place): {} (pvalue: {})'.format(place_name, round(correlation, 3), round(pvalue, 3)))

In [31]:
if print_and_plot:  
    
    fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(12,4), sharex=True, sharey=True)
    
    
    x = greenness_perception_points.greenness_rating_num
    y = greenness_perception_points.ndvi_max
    sns.regplot(ax=axs[0], x=x, y=y, scatter_kws={'alpha':0.02})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[0].set_title('All perceptions\nGreenness rating & max NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(greenness_perception_points)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[0].set_title('All perceptions\nGreenness rating & max NDVI at point\nInsignificant')
        
        
    x = greenness_perception_points_perplace.greenness_rating_num
    y = greenness_perception_points_perplace.ndvi_max
    sns.regplot(ax=axs[1], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[1].set_title('Median perception per place\nGreenness rating & max NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(greenness_perception_points_perplace)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[1].set_title('Median perception per place\nGreenness rating & max NDVI at point\nInsignificant')
        
    
    x = greenness_perception_points_perplace_5plusratings.greenness_rating_num
    y = greenness_perception_points_perplace_5plusratings.ndvi_max
    sns.regplot(ax=axs[2], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[2].set_title('Median perception per place (rated >4x)\nGreenness rating & max NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(greenness_perception_points_perplace_5plusratings)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[2].set_title('Median perception per place (rated >4x)\nGreenness rating & max NDVI at point\nInsignificant')    
        
    plt.show()

In [32]:
# per city
if print_and_plot:
    for place_name in place_names:
        x = greenness_perception_points_perplace[greenness_perception_points_perplace.place_name==place_name].greenness_rating_num
        y = greenness_perception_points_perplace[greenness_perception_points_perplace.place_name==place_name].ndvi_max
        correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
        print('{} - correlation greenness and NDVI (max per place): {} (pvalue: {})'.format(place_name, round(correlation, 3), round(pvalue, 3)))

### Cases for further qualitative analysis

In [33]:
ndvi_quantiles = greenness_perception_points_perplace.ndvi_median.quantile([0.0, 0.25, 0.5, 0.75, 1.0])
if print_and_plot:
    print('Overall NDVI quantiles')
    print(ndvi_quantiles)

In [34]:
# NDVI values normally distributed?
if print_and_plot:
    for place_name in place_names:
        print('Kolmogorov-Smirnov test, NDVI, all data: ', stats.kstest(greenness_perception_points_perplace.ndvi_median, 'norm'))
        print('Kolmogorov-Smirnov test, NDVI, {}: '.format(place_name), stats.kstest(greenness_perception_points_perplace[greenness_perception_points_perplace.place_name==place_name].ndvi_median, 'norm'))
    # -> no, differing from normal distribution

In [35]:
# Are NDVI distributions comparable for Rotterdam and Barcelona?
if print_and_plot:
    if 'Barcelona' in place_names and 'Rotterdam' in place_names:
        statistic, pvalue = stats.mannwhitneyu(
            greenness_perception_points_perplace[greenness_perception_points_perplace.place_name=='Barcelona'].ndvi_median,
            greenness_perception_points_perplace[greenness_perception_points_perplace.place_name=='Rotterdam'].ndvi_median)
        print('Barcelona vs. Rotterdam: {} (pvalue {})'.format(round(statistic, 3), round(pvalue, 3)))
        # -> no, they differ from each other
    if 'Barcelona' in place_names and 'Goteborg' in place_names:
        statistic, pvalue = stats.mannwhitneyu(
            greenness_perception_points_perplace[greenness_perception_points_perplace.place_name=='Barcelona'].ndvi_median,
            greenness_perception_points_perplace[greenness_perception_points_perplace.place_name=='Goteborg'].ndvi_median)
        print('Barcelona vs. Goteborg: {} (pvalue {})'.format(round(statistic, 3), round(pvalue, 3)))
        # -> yes, they are comparable
    if 'Goteborg' in place_names and 'Rotterdam' in place_names:
        statistic, pvalue = stats.mannwhitneyu(
            greenness_perception_points_perplace[greenness_perception_points_perplace.place_name=='Goteborg'].ndvi_median,
            greenness_perception_points_perplace[greenness_perception_points_perplace.place_name=='Rotterdam'].ndvi_median)
        print('Goteborg vs. Rotterdam: {} (pvalue {})'.format(round(statistic, 3), round(pvalue, 3)))
        # -> no, they differ from each other

In [36]:
# identify interesting cases per city separately, as NDVI quantiles differ

#### Barcelona

In [37]:
place_name = 'Barcelona'
ndvi_quantiles_bcn = greenness_perception_points_perplace[greenness_perception_points_perplace.place_name==place_name].ndvi_median.quantile([0.0, 0.25, 0.5, 0.75, 1.0])

In [38]:
if print_and_plot:
    print('{} NDVI quantiles'.format(place_name))
    print(ndvi_quantiles_bcn)

In [39]:
# perceived as green (3-4), but lower quartile NDVI
h1_perceived_greener_than_ndvi_ids_bcn = greenness_perception_points_perplace[
    (greenness_perception_points_perplace.place_name==place_name) &
    (greenness_perception_points_perplace.greenness_rating_num>=3.0) & 
    (greenness_perception_points_perplace.ndvi_median<ndvi_quantiles_bcn[0.25])].gsv_pano_id

if print_and_plot:
    if print_qualitative_locations:
    
        cases = h1_perceived_greener_than_ndvi_ids_bcn
        print('{} places in {} perceived as green, while lower NDVI quartile:\n'.format(len(cases), place_name))

        for gsv_pano_id in cases:
            # print the pano id, iframe, and ratings and reasons people gave
            data = greenness_perception_points[greenness_perception_points.gsv_pano_id==gsv_pano_id].reset_index(drop=True)
            if print_qualitative_html:
                print(data.iframe.iloc[0])
            print(gsv_pano_id)
            if print_qualitative_html:
                    print('<br />')
            if print_qualitative_perceptions:
                for i in data.index:
                    print('{} ({}) = {}'.format(
                        data.greenness_rating[i],
                        data.greenness_rating_num[i],
                        data.greenness_reason[i].replace('\n', '')))
                    if print_qualitative_html:
                        print('<br />')
            if print_qualitative_html:
                print('<br />-----<br />')
            else:
                print('')

In [40]:
# perceived as not-green (0-1), but upper quartile NDVI
h1_perceived_lessgreen_than_ndvi_ids_bcn = greenness_perception_points_perplace[
    (greenness_perception_points_perplace.place_name==place_name) &
    (greenness_perception_points_perplace.greenness_rating_num<=1.0) & 
    (greenness_perception_points_perplace.ndvi_median>ndvi_quantiles_bcn[0.75])].gsv_pano_id

if print_and_plot:
    if print_qualitative_locations:
        cases = h1_perceived_lessgreen_than_ndvi_ids_bcn
        print('{} places in {} perceived as not-green, while upper NDVI quartile:\n'.format(len(cases), place_name))

        for gsv_pano_id in cases:
            # print the pano id, iframe, and ratings and reasons people gave
            data = greenness_perception_points[greenness_perception_points.gsv_pano_id==gsv_pano_id].reset_index(drop=True)
            if print_qualitative_html:
                print(data.iframe.iloc[0])
            print(gsv_pano_id)
            if print_qualitative_html:
                    print('<br />')
            if print_qualitative_perceptions:
                for i in data.index:
                    print('{} ({}) = {}'.format(
                        data.greenness_rating[i],
                        data.greenness_rating_num[i],
                        data.greenness_reason[i].replace('\n', '')))
                    if print_qualitative_html:
                        print('<br />')
            if print_qualitative_html:
                print('<br />-----<br />')
            else:
                print('')

#### Rotterdam

In [41]:
place_name = 'Rotterdam'
ndvi_quantiles_rtd = greenness_perception_points_perplace[greenness_perception_points_perplace.place_name==place_name].ndvi_median.quantile([0.0, 0.25, 0.5, 0.75, 1.0])

In [42]:
if print_and_plot:
    print('{} NDVI quantiles'.format(place_name))
    print(ndvi_quantiles_rtd)

In [43]:
# perceived as green (3-4), but lower quartile NDVI
h1_perceived_greener_than_ndvi_ids_rtd = greenness_perception_points_perplace[
    (greenness_perception_points_perplace.place_name==place_name) &
    (greenness_perception_points_perplace.greenness_rating_num>=3.0) & 
    (greenness_perception_points_perplace.ndvi_median<ndvi_quantiles_rtd[0.25])].gsv_pano_id

if print_and_plot:
    if print_qualitative_locations:
        
        cases = h1_perceived_greener_than_ndvi_ids_rtd
        print('{} places in {} perceived as green, while lower NDVI quartile:\n'.format(len(cases), place_name))

        for gsv_pano_id in cases:
            # print the pano id, iframe, and ratings and reasons people gave
            data = greenness_perception_points[greenness_perception_points.gsv_pano_id==gsv_pano_id].reset_index(drop=True)
            if print_qualitative_html:
                print(data.iframe.iloc[0])
            print(gsv_pano_id)
            if print_qualitative_html:
                print('<br />')
            if print_qualitative_perceptions:
                for i in data.index:
                    print('{} ({}) = {}'.format(
                        data.greenness_rating[i],
                        data.greenness_rating_num[i],
                        data.greenness_reason[i].replace('\n', '')))
                    if print_qualitative_html:
                        print('<br />')
            if print_qualitative_html:
                print('<br />-----<br />')
            else:
                print('')

In [44]:
# perceived as not-green (0-1), but upper quartile NDVI
h1_perceived_lessgreen_than_ndvi_ids_rtd = greenness_perception_points_perplace[
    (greenness_perception_points_perplace.place_name==place_name) &
    (greenness_perception_points_perplace.greenness_rating_num<=1.0) & 
    (greenness_perception_points_perplace.ndvi_median>ndvi_quantiles_rtd[0.75])].gsv_pano_id

if print_and_plot:
    if print_qualitative_locations:
        
        cases = h1_perceived_lessgreen_than_ndvi_ids_rtd
        print('{} places in {} perceived as not-green, while upper NDVI quartile:\n'.format(len(cases), place_name))

        for gsv_pano_id in cases:
            # print the pano id, iframe, and ratings and reasons people gave
            data = greenness_perception_points[greenness_perception_points.gsv_pano_id==gsv_pano_id].reset_index(drop=True)
            if print_qualitative_html:
                print(data.iframe.iloc[0])
            print(gsv_pano_id)
            if print_qualitative_html:
                print('<br />')
            if print_qualitative_perceptions:    
                for i in data.index:
                    print('{} ({}) = {}'.format(
                        data.greenness_rating[i],
                        data.greenness_rating_num[i],
                        data.greenness_reason[i].replace('\n', '')))
                    if print_qualitative_html:
                        print('<br />')
            if print_qualitative_html:
                print('<br />-----<br />')
            else:
                print('')

#### Goteborg

In [45]:
place_name = 'Goteborg'
ndvi_quantiles_got = greenness_perception_points_perplace[greenness_perception_points_perplace.place_name==place_name].ndvi_median.quantile([0.0, 0.25, 0.5, 0.75, 1.0])

In [46]:
if print_and_plot:
    print('{} NDVI quantiles'.format(place_name))
    print(ndvi_quantiles_got)

In [47]:
# perceived as green (3-4), but lower quartile NDVI
h1_perceived_greener_than_ndvi_ids_got = greenness_perception_points_perplace[
    (greenness_perception_points_perplace.place_name==place_name) &
    (greenness_perception_points_perplace.greenness_rating_num>=3.0) & 
    (greenness_perception_points_perplace.ndvi_median<ndvi_quantiles_got[0.25])].gsv_pano_id

if print_and_plot:
    if print_qualitative_locations:
        
        cases = h1_perceived_greener_than_ndvi_ids_got
        print('{} places in {} perceived as green, while lower NDVI quartile:\n'.format(len(cases), place_name))

        for gsv_pano_id in cases:
            # print the pano id, iframe, and ratings and reasons people gave
            data = greenness_perception_points[greenness_perception_points.gsv_pano_id==gsv_pano_id].reset_index(drop=True)
            if print_qualitative_html:
                print(data.iframe.iloc[0])
            print(gsv_pano_id)
            if print_qualitative_html:
                print('<br />')
            if print_qualitative_perceptions:    
                for i in data.index:
                    print('{} ({}) = {}'.format(
                        data.greenness_rating[i],
                        data.greenness_rating_num[i],
                        data.greenness_reason[i].replace('\n', '')))
                    if print_qualitative_html:
                        print('<br />')
            if print_qualitative_html:
                print('<br />-----<br />')
            else:
                print('')

In [48]:
# perceived as not-green (0-1), but upper quartile NDVI
h1_perceived_lessgreen_than_ndvi_ids_got = greenness_perception_points_perplace[
    (greenness_perception_points_perplace.place_name==place_name) &
    (greenness_perception_points_perplace.greenness_rating_num<=1.0) & 
    (greenness_perception_points_perplace.ndvi_median>ndvi_quantiles_got[0.75])].gsv_pano_id

if print_and_plot:
    if print_qualitative_locations:
        
        cases = h1_perceived_lessgreen_than_ndvi_ids_got
        print('{} places in {} perceived as not-green, while upper NDVI quartile:\n'.format(len(cases), place_name))

        for gsv_pano_id in cases:
            # print the pano id, iframe, and ratings and reasons people gave
            data = greenness_perception_points[greenness_perception_points.gsv_pano_id==gsv_pano_id].reset_index(drop=True)
            if print_qualitative_html:
                print(data.iframe.iloc[0])
            print(gsv_pano_id)
            if print_qualitative_html:
                print('<br />')
            if print_qualitative_perceptions:
                for i in data.index:
                    print('{} ({}) = {}'.format(
                        data.greenness_rating[i],
                        data.greenness_rating_num[i],
                        data.greenness_reason[i].replace('\n', '')))
                    if print_qualitative_html:
                        print('<br />')
            if print_qualitative_html:
                print('<br />-----<br />')
            else:
                print('')

## H2: Perceived greenness per OSM category

In [49]:
# Are there significant differences in greenspace perceptions between Regular Greenspaces and other place-categories? For which pairs?

In [50]:
if print_and_plot:
    gdf = greenness_perception_points_perplace
    col = 'greenness_rating_num'

    reg_gs = gdf[gdf.near_regular_greenspace][col]
    poc_gs = gdf[gdf.near_pocket_greenspace][col]
    square = gdf[gdf.near_square][col]
    playsp = gdf[gdf.near_playspace][col]
    street = gdf[gdf.near_street][col]
    print(stats.kruskal(reg_gs, poc_gs, square, playsp, street, nan_policy='omit'))

    headers = ['', 'near_regular_greenspace']
    rows = []
    cat_cols = ['near_pocket_greenspace', 'near_square', 'near_playspace', 'near_street']
    col_a = 'near_regular_greenspace'
    for col_b in cat_cols:
        x = gdf[gdf[col_a]][col]
        y = gdf[gdf[col_b]][col]
        statistic, pvalue = stats.mannwhitneyu(x, y)
        statistic = str(round(statistic, 3))
        if pvalue <= 0.05:
            statistic = str(statistic)+'*'
            if pvalue <= 0.01:
                statistic = statistic+'*'
        print(statistic, pvalue)
        row = [col_b, statistic + ' (p: {})'.format(pvalue)]
        rows.append(row)
    print(tabulate(rows, headers=headers))

In [51]:
# How are these perceptions distributed per place-category, and what are the medians?

In [52]:
if print_and_plot:
    fig, axs = plt.subplots(nrows=1, ncols=5, figsize=(20,4))
    
    
    # GROUPED BY PLACE
    gdf = greenness_perception_points_perplace
    bins = [-0.25, 0.25, 0.75, 1.25, 1.75, 2.25, 2.75, 3.25, 3.75, 4.25]
    bins = [-0.5, 0.5, 1.5, 2.5, 3.5, 4.5]
    
    col = 'near_regular_greenspace'
    data = gdf[gdf[col]]
    sns.histplot(ax=axs[0], data=data, x='greenness_rating_num', bins=bins, element='step')
    axs[0].set_title('Median perception per place\nRegular greenspaces\nN: {}; Median: {}'.format(len(data), str(data.greenness_rating_num.median())))
    
    col = 'near_pocket_greenspace'
    data = gdf[gdf[col]]
    sns.histplot(ax=axs[1], data=data, x='greenness_rating_num', bins=bins, element='step')
    axs[1].set_title('Median perception per place\nPocket greenspaces\nN: {}; Median: {}'.format(len(data), str(data.greenness_rating_num.median())))

    col = 'near_square'
    data = gdf[gdf[col]]
    sns.histplot(ax=axs[2], data=data, x='greenness_rating_num', bins=bins, element='step')
    axs[2].set_title('Median perception per place\nSquares\nN: {}; Median: {}'.format(len(data), str(data.greenness_rating_num.median())))

    col = 'near_playspace'
    data = gdf[gdf[col]]
    sns.histplot(ax=axs[3], data=data, x='greenness_rating_num', bins=bins, element='step')
    axs[3].set_title('Median perception per place\nPlayspaces\nN: {}; Median: {}'.format(len(data), str(data.greenness_rating_num.median())))

    col = 'near_street'
    data = gdf[gdf[col]]
    sns.histplot(ax=axs[4], data=data, x='greenness_rating_num', bins=bins, element='step')
    axs[4].set_title('Median perception per place\nStreets\nN: {}; Median: {}'.format(len(data), str(data.greenness_rating_num.median())))
    
    
    for ax in axs:
        ax.set_xticks([0.0, 1.0, 2.0, 3.0, 4.0])
        ax.tick_params(axis='x', rotation=90)
    plt.subplots_adjust(wspace=0.4, hspace=0.4)

    plt.show()

In [53]:
# what percentage of places is perceived as greenspace, per place-type?

In [54]:
def print_perc_greenspace(gdf, category, place_names=None, exclusive=False):
    
    if exclusive:
        gdf_perceivedasgreen = gdf[(gdf[category]) & ~(gdf.near_regular_greenspace) & (gdf.greenness_rating_num>=3)]
    else:
        gdf_perceivedasgreen = gdf[(gdf[category]) & (gdf.greenness_rating_num>=3)]
    
    print('{}% of type {} perceived as greenspace'.format(round(100*len(gdf_perceivedasgreen)/len(gdf[gdf[category]]), 3), category))
    
    for place_name in place_names:
        gdf_place = gdf[gdf.place_name==place_name]
        gdf_place_perceivedasgreen = gdf_place[(gdf_place[category]) & (gdf_place.greenness_rating_num>=3)]
        print('\t(For {}: {}% of type {} perceived as greenspace)'.format(place_name, round(100*len(gdf_place_perceivedasgreen)/len(gdf_place[gdf_place[category]]), 2), category))

In [55]:
category = 'near_regular_greenspace'

if print_and_plot:
    print_perc_greenspace(greenness_perception_points_perplace, category, place_names=place_names)

In [56]:
category = 'near_pocket_greenspace'

if print_and_plot:
    print_perc_greenspace(greenness_perception_points_perplace, category, place_names=place_names)
    
    print('\nWhen excluding overlap with OSM regular greenspaces:')
    print_perc_greenspace(greenness_perception_points_perplace, category, place_names=place_names, exclusive=True)

In [57]:
category = 'near_square'

if print_and_plot:
    print_perc_greenspace(greenness_perception_points_perplace, category, place_names=place_names)
    
    print('\nWhen excluding overlap with OSM regular greenspaces:')
    print_perc_greenspace(greenness_perception_points_perplace, category, place_names=place_names, exclusive=True)

In [58]:
category = 'near_playspace'

if print_and_plot:
    print_perc_greenspace(greenness_perception_points_perplace, category, place_names=place_names)
    
    print('\nWhen excluding overlap with OSM regular greenspaces:')
    print_perc_greenspace(greenness_perception_points_perplace, category, place_names=place_names, exclusive=True)

In [59]:
category = 'near_street'

if print_and_plot:
    print_perc_greenspace(greenness_perception_points_perplace, category, place_names=place_names)
    
    print('\nWhen excluding overlap with OSM regular greenspaces:')
    print_perc_greenspace(greenness_perception_points_perplace, category, place_names=place_names, exclusive=True)

In [60]:
# what percentage of places is regular-size greenspace?

In [61]:
gdf_greenspace = greenness_perception_points_perplace[greenness_perception_points_perplace['near_regular_greenspace']]
print('{}% of spaces is regular-size greenspace'.format(round(100*len(gdf_greenspace)/len(greenness_perception_points_perplace), 3)))

83.777% of spaces is regular-size greenspace


In [62]:
gdf_greenspace = greenness_perception_points_perplace[(greenness_perception_points_perplace['near_regular_greenspace']) | (greenness_perception_points_perplace['near_pocket_greenspace'])]
print('{}% of spaces is any-size greenspace'.format(round(100*len(gdf_greenspace)/len(greenness_perception_points_perplace), 3)))

95.884% of spaces is any-size greenspace


In [63]:
# what percentage of regular-size greenspaces is perceived as green by people?

In [64]:
gdf_greenspace = greenness_perception_points_perplace[greenness_perception_points_perplace['near_regular_greenspace']]
gdf_greenspace_perceivedasgreen = gdf_greenspace[gdf_greenspace.greenness_rating_num>=3]

print('{}% of regular-size greenspaces perceived as green'.format(round(100*len(gdf_greenspace_perceivedasgreen)/len(gdf_greenspace), 3)))

47.11% of regular-size greenspaces perceived as green


In [65]:
# what percentage of any-size greenspaces in OSM is perceived as green by people?

In [66]:
gdf_greenspace = greenness_perception_points_perplace[(greenness_perception_points_perplace['near_regular_greenspace']) | (greenness_perception_points_perplace['near_pocket_greenspace'])]
gdf_greenspace_perceivedasgreen = gdf_greenspace[gdf_greenspace.greenness_rating_num>=3]

print('{}% of any-size greenspaces perceived as green'.format(round(100*len(gdf_greenspace_perceivedasgreen)/len(gdf_greenspace), 3)))

44.697% of any-size greenspaces perceived as green


In [67]:
# what percentage of places not tagged regular-size greenspace in OSM is still perceived as green by people?

In [68]:
gdf_notgreenspace = greenness_perception_points_perplace[~greenness_perception_points_perplace['near_regular_greenspace']]
gdf_notgreenspace_perceivedasgreen = gdf_notgreenspace[gdf_notgreenspace.greenness_rating_num>=3]

print('{}% of not-OSM-regular-size-greenspaces still perceived as green'.format(round(100*len(gdf_notgreenspace_perceivedasgreen)/len(gdf_notgreenspace), 3)))

25.373% of not-OSM-regular-size-greenspaces still perceived as green


In [69]:
# what percentage of places not tagged any-size greenspace in OSM is still perceived as green by people?

In [70]:
gdf_notgreenspace = greenness_perception_points_perplace[~greenness_perception_points_perplace['near_regular_greenspace']]
gdf_notgreenspace = gdf_notgreenspace[~gdf_notgreenspace['near_pocket_greenspace']]
gdf_notgreenspace_perceivedasgreen = gdf_notgreenspace[gdf_notgreenspace.greenness_rating_num>=3]
    
print('{}% of not-OSM-any-size-greenspaces still perceived as green'.format(round(100*len(gdf_notgreenspace_perceivedasgreen)/len(gdf_notgreenspace), 3)))

17.647% of not-OSM-any-size-greenspaces still perceived as green


### Cases for further qualitative analysis

#### Barcelona

In [71]:
place_name = 'Barcelona'

In [72]:
# perceived as green (3-4), but not tagged as regular greenspace in OSM
h1_perceived_greener_than_osm_ids_bcn = greenness_perception_points_perplace[
    (greenness_perception_points_perplace.place_name==place_name) &
    (greenness_perception_points_perplace.greenness_rating_num>=3.0) & 
    (greenness_perception_points_perplace.near_regular_greenspace==False)].gsv_pano_id

if print_and_plot:
    if print_qualitative_locations:
        
        cases = h1_perceived_greener_than_osm_ids_bcn
        print('{} places in {} perceived as green, while not tagged as regular greenspace:\n'.format(len(cases), place_name))

        for gsv_pano_id in cases:
            # print the pano id, iframe, and ratings and reasons people gave
            data = greenness_perception_points[greenness_perception_points.gsv_pano_id==gsv_pano_id].reset_index(drop=True)
            if print_qualitative_html:
                print(data.iframe.iloc[0])
            print(gsv_pano_id)
            if print_qualitative_html:
                    print('<br />')
            near_categories = []
            if data.near_regular_greenspace.iloc[0]:
                near_categories.append('regular greenspace')
            if data.near_pocket_greenspace.iloc[0]:
                near_categories.append('pocket greenspace')
            if data.near_square.iloc[0]:
                near_categories.append('square')
            if data.near_playspace.iloc[0]:
                near_categories.append('playspace')
            if data.near_street.iloc[0]:
                near_categories.append('street')
            print('OSM categories:', ' & '.join(near_categories))
            if print_qualitative_html:
                print('<br />')
            if print_qualitative_perceptions:
                for i in data.index:
                    print('{} ({}) = {}'.format(
                        data.greenness_rating[i],
                        data.greenness_rating_num[i],
                        data.greenness_reason[i].replace('\n', '')))
                    if print_qualitative_html:
                        print('<br />')
            if print_qualitative_html:
                print('<br />-----<br />')
            else:
                print('')

In [73]:
# perceived as not-green (0-1), but tagged as regular greenspace in OSM
h1_perceived_lessgreen_than_osm_ids_bcn = greenness_perception_points_perplace[
    (greenness_perception_points_perplace.place_name==place_name) &
    (greenness_perception_points_perplace.greenness_rating_num<=1.0) & 
    (greenness_perception_points_perplace.near_regular_greenspace==True)].gsv_pano_id

if print_and_plot:
    if print_qualitative_locations:
        
        cases = h1_perceived_lessgreen_than_osm_ids_bcn
        print('{} places in {} perceived as not-green, tagged as regular greenspace:\n'.format(len(cases), place_name))

        for gsv_pano_id in cases:
            # print the pano id, iframe, and ratings and reasons people gave
            data = greenness_perception_points[greenness_perception_points.gsv_pano_id==gsv_pano_id].reset_index(drop=True)
            if print_qualitative_html:
                print(data.iframe.iloc[0])
            print(gsv_pano_id)
            if print_qualitative_html:
                    print('<br />')
            near_categories = []
            if data.near_regular_greenspace.iloc[0]:
                near_categories.append('regular greenspace')
            if data.near_pocket_greenspace.iloc[0]:
                near_categories.append('pocket greenspace')
            if data.near_square.iloc[0]:
                near_categories.append('square')
            if data.near_playspace.iloc[0]:
                near_categories.append('playspace')
            if data.near_street.iloc[0]:
                near_categories.append('street')
            print('OSM categories:', ' & '.join(near_categories))
            if print_qualitative_html:
                print('<br />')
            if print_qualitative_perceptions:
                for i in data.index:
                    print('{} ({}) = {}'.format(
                        data.greenness_rating[i],
                        data.greenness_rating_num[i],
                        data.greenness_reason[i].replace('\n', '')))
                    if print_qualitative_html:
                        print('<br />')
                if print_qualitative_html:
                    print('<br />-----<br />')
            else:
                print('')

#### Rotterdam

In [74]:
place_name = 'Rotterdam'

In [75]:
# perceived as green (3-4), but not tagged as regular greenspace in OSM
h1_perceived_greener_than_osm_ids_rtd = greenness_perception_points_perplace[
    (greenness_perception_points_perplace.place_name==place_name) &
    (greenness_perception_points_perplace.greenness_rating_num>=3.0) & 
    (greenness_perception_points_perplace.near_regular_greenspace==False)].gsv_pano_id

if print_and_plot:
    if print_qualitative_locations:
        
        cases = h1_perceived_greener_than_osm_ids_rtd
        print('{} places in {} perceived as green, while not tagged as regular greenspace:\n'.format(len(cases), place_name))

        for gsv_pano_id in cases:
            # print the pano id, iframe, and ratings and reasons people gave
            data = greenness_perception_points[greenness_perception_points.gsv_pano_id==gsv_pano_id].reset_index(drop=True)
            if print_qualitative_html:
                print(data.iframe.iloc[0])
            print(gsv_pano_id)
            if print_qualitative_html:
                    print('<br />')
            near_categories = []
            if data.near_regular_greenspace.iloc[0]:
                near_categories.append('regular greenspace')
            if data.near_pocket_greenspace.iloc[0]:
                near_categories.append('pocket greenspace')
            if data.near_square.iloc[0]:
                near_categories.append('square')
            if data.near_playspace.iloc[0]:
                near_categories.append('playspace')
            if data.near_street.iloc[0]:
                near_categories.append('street')
            print('OSM categories:', ' & '.join(near_categories))
            if print_qualitative_html:
                print('<br />')
            if print_qualitative_perceptions:
                for i in data.index:
                    print('{} ({}) = {}'.format(
                        data.greenness_rating[i],
                        data.greenness_rating_num[i],
                        data.greenness_reason[i].replace('\n', '')))
                    if print_qualitative_html:
                        print('<br />')
                if print_qualitative_html:
                    print('<br />-----<br />')
            else:
                print('')

In [76]:
# perceived as not-green (0-1), but tagged as regular greenspace in OSM
h1_perceived_lessgreen_than_osm_ids_rtd = greenness_perception_points_perplace[
    (greenness_perception_points_perplace.place_name==place_name) &
    (greenness_perception_points_perplace.greenness_rating_num<=1.0) & 
    (greenness_perception_points_perplace.near_regular_greenspace==True)].gsv_pano_id

if print_and_plot:
    if print_qualitative_locations:
        
        cases = h1_perceived_lessgreen_than_osm_ids_rtd
        print('{} places in {} perceived as not-green, tagged as regular greenspace:\n'.format(len(cases), place_name))

        for gsv_pano_id in cases:
            # print the pano id, iframe, and ratings and reasons people gave
            data = greenness_perception_points[greenness_perception_points.gsv_pano_id==gsv_pano_id].reset_index(drop=True)
            if print_qualitative_html:
                print(data.iframe.iloc[0])
            print(gsv_pano_id)
            if print_qualitative_html:
                    print('<br />')
            near_categories = []
            if data.near_regular_greenspace.iloc[0]:
                near_categories.append('regular greenspace')
            if data.near_pocket_greenspace.iloc[0]:
                near_categories.append('pocket greenspace')
            if data.near_square.iloc[0]:
                near_categories.append('square')
            if data.near_playspace.iloc[0]:
                near_categories.append('playspace')
            if data.near_street.iloc[0]:
                near_categories.append('street')
            print('OSM categories:', ' & '.join(near_categories))
            if print_qualitative_html:
                print('<br />')
            if print_qualitative_perceptions:
                for i in data.index:
                    print('{} ({}) = {}'.format(
                        data.greenness_rating[i],
                        data.greenness_rating_num[i],
                        data.greenness_reason[i].replace('\n', '')))
                    if print_qualitative_html:
                        print('<br />')
                if print_qualitative_html:
                    print('<br />-----<br />')
            else:
                print('')

#### Goteborg

In [77]:
place_name = 'Goteborg'

In [78]:
# perceived as green (3-4), but not tagged as regular greenspace in OSM
h1_perceived_greener_than_osm_ids_got = greenness_perception_points_perplace[
    (greenness_perception_points_perplace.place_name==place_name) &
    (greenness_perception_points_perplace.greenness_rating_num>=3.0) & 
    (greenness_perception_points_perplace.near_regular_greenspace==False)].gsv_pano_id

if print_and_plot:
    if print_qualitative_locations:
        
        cases = h1_perceived_greener_than_osm_ids_got
        print('{} places in {} perceived as green, while not tagged as regular greenspace:\n'.format(len(cases), place_name))

        for gsv_pano_id in cases:
            # print the pano id, iframe, and ratings and reasons people gave
            data = greenness_perception_points[greenness_perception_points.gsv_pano_id==gsv_pano_id].reset_index(drop=True)
            if print_qualitative_html:
                print(data.iframe.iloc[0])
            print(gsv_pano_id)
            if print_qualitative_html:
                    print('<br />')
            near_categories = []
            if data.near_regular_greenspace.iloc[0]:
                near_categories.append('regular greenspace')
            if data.near_pocket_greenspace.iloc[0]:
                near_categories.append('pocket greenspace')
            if data.near_square.iloc[0]:
                near_categories.append('square')
            if data.near_playspace.iloc[0]:
                near_categories.append('playspace')
            if data.near_street.iloc[0]:
                near_categories.append('street')
            print('OSM categories:', ' & '.join(near_categories))
            if print_qualitative_html:
                print('<br />')
            if print_qualitative_perceptions:
                for i in data.index:
                    print('{} ({}) = {}'.format(
                        data.greenness_rating[i],
                        data.greenness_rating_num[i],
                        data.greenness_reason[i].replace('\n', '')))
                    if print_qualitative_html:
                        print('<br />')
            if print_qualitative_html:
                print('<br />-----<br />')
            else:
                print('')

In [79]:
# perceived as not-green (0-1), but tagged as regular greenspace in OSM
h1_perceived_lessgreen_than_osm_ids_bcn = greenness_perception_points_perplace[
    (greenness_perception_points_perplace.place_name==place_name) &
    (greenness_perception_points_perplace.greenness_rating_num<=1.0) & 
    (greenness_perception_points_perplace.near_regular_greenspace==True)].gsv_pano_id

if print_and_plot:
    if print_qualitative_locations:
        
        cases = h1_perceived_lessgreen_than_osm_ids_bcn
        print('{} places in {} perceived as not-green, tagged as regular greenspace:\n'.format(len(cases), place_name))

        for gsv_pano_id in cases:
            # print the pano id, iframe, and ratings and reasons people gave
            data = greenness_perception_points[greenness_perception_points.gsv_pano_id==gsv_pano_id].reset_index(drop=True)
            if print_qualitative_html:
                print(data.iframe.iloc[0])
            print(gsv_pano_id)
            if print_qualitative_html:
                    print('<br />')
            near_categories = []
            if data.near_regular_greenspace.iloc[0]:
                near_categories.append('regular greenspace')
            if data.near_pocket_greenspace.iloc[0]:
                near_categories.append('pocket greenspace')
            if data.near_square.iloc[0]:
                near_categories.append('square')
            if data.near_playspace.iloc[0]:
                near_categories.append('playspace')
            if data.near_street.iloc[0]:
                near_categories.append('street')
            print('OSM categories:', ' & '.join(near_categories))
            if print_qualitative_html:
                print('<br />')
            if print_qualitative_perceptions:
                for i in data.index:
                    print('{} ({}) = {}'.format(
                        data.greenness_rating[i],
                        data.greenness_rating_num[i],
                        data.greenness_reason[i].replace('\n', '')))
                    if print_qualitative_html:
                        print('<br />')
            if print_qualitative_html:
                print('<br />-----<br />')
            else:
                print('')

## H4: Suitability for activities

In [80]:
activity_perception_points['merge_on'] = activity_perception_points['gsv_pano_id'] + '_' + activity_perception_points['prolific_id']
greenness_perception_points['merge_on'] = greenness_perception_points['gsv_pano_id'] + '_' + greenness_perception_points['prolific_id']

perception_points = activity_perception_points.merge(greenness_perception_points[['greenness_rating_num', 'merge_on']], on='merge_on', how='inner')

activity_perception_points.drop(columns=['merge_on'], inplace=True)
greenness_perception_points.drop(columns=['merge_on'], inplace=True)
perception_points.drop(columns=['merge_on'], inplace=True)

In [81]:
activity_perception_points_perplace['merge_on'] = activity_perception_points_perplace['gsv_pano_id']
greenness_perception_points_perplace['merge_on'] = greenness_perception_points_perplace['gsv_pano_id']

perception_points_perplace = activity_perception_points_perplace.merge(greenness_perception_points_perplace[['greenness_rating_num', 'merge_on']], on='merge_on', how='inner')

activity_perception_points_perplace.drop(columns=['merge_on'], inplace=True)
greenness_perception_points_perplace.drop(columns=['merge_on'], inplace=True)
perception_points_perplace.drop(columns=['merge_on'], inplace=True)

In [82]:
# Are there significant differences in suitability OF PERCEIVED GREENSPACES for use between activity types? For which pairs?

In [83]:
perception_points_perceivedasgreen = perception_points[perception_points.greenness_rating_num>=3.0]

In [84]:
perception_points_perplace_perceivedasgreen = perception_points_perplace[perception_points_perplace.greenness_rating_num>=3.0]

In [85]:
if print_and_plot:
    
    gdf = perception_points_perplace_perceivedasgreen
    print('N:', len(gdf))

    phys = gdf.physical_rating_num
    soc = gdf.social_rating_num
    rel = gdf.relax_rating_num
    comm = gdf.commute_rating_num
    chil = gdf.children_rating_num

    print(stats.kruskal(phys, soc, rel, comm, chil, nan_policy='omit'))

    headers = ['']
    rows = []
    act_cols = ['physical_rating_num', 'social_rating_num', 'relax_rating_num', 'commute_rating_num', 'children_rating_num']

    for col_a in act_cols:
        row = [col_a]
        for col_b in act_cols:
            if col_a == col_b:
                row.append('X')
                break
            else:
                x = gdf[col_a]
                y = gdf[col_b]
                statistic, pvalue = stats.mannwhitneyu(x, y)
                statistic = str(round(statistic, 3))

                if pvalue <= 0.05:
                    statistic = str(statistic)+'*'
                    if pvalue <= 0.01:
                        statistic = statistic+'*'
                row.append(statistic + ' (p: {})'.format(str(round(pvalue, 3))))
        headers.append(col_a)
        rows.append(row)

    print(tabulate(rows, headers=headers))

In [86]:
if print_and_plot:
    
    for place_name in place_names:
        print(place_name)
    
        gdf = perception_points_perplace_perceivedasgreen[perception_points_perplace_perceivedasgreen.place_name==place_name]
        print('N:', len(gdf))

        phys = gdf.physical_rating_num
        soc = gdf.social_rating_num
        rel = gdf.relax_rating_num
        comm = gdf.commute_rating_num
        chil = gdf.children_rating_num

        print(stats.kruskal(phys, soc, rel, comm, chil, nan_policy='omit'))

        headers = ['']
        rows = []
        act_cols = ['physical_rating_num', 'social_rating_num', 'relax_rating_num', 'commute_rating_num', 'children_rating_num']

        for col_a in act_cols:
            row = [col_a]
            for col_b in act_cols:
                if col_a == col_b:
                    row.append('X')
                    break
                else:
                    x = gdf[col_a]
                    y = gdf[col_b]
                    statistic, pvalue = stats.mannwhitneyu(x, y)
                    statistic = str(round(statistic, 3))

                    if pvalue <= 0.05:
                        statistic = str(statistic)+'*'
                        if pvalue <= 0.01:
                            statistic = statistic+'*'
                    row.append(statistic + ' (p: {})'.format(str(round(pvalue, 3))))
            headers.append(col_a)
            rows.append(row)

        print(tabulate(rows, headers=headers), '\n\n')

In [87]:
# Does suitability for use per activity type correlate with perceived greenness? 

In [88]:
if print_and_plot:  
    
    fig, axs = plt.subplots(nrows=1, ncols=5, figsize=(20,3))
    
    
    # MEDIAN PERCEPTION PER PLACE
    gdf = perception_points_perplace
    alpha = 0.1
    
    x = gdf.greenness_rating_num
    y = gdf.physical_rating_num
    sns.regplot(ax=axs[0], x=x, y=y, scatter_kws={'alpha':alpha})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[0].set_title('Median perception per place\nGreenness & physical activity rating\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[0].set_title('Median perception per place\nGreenness & physical activity rating\nInsignificant')
        
    x = gdf.greenness_rating_num
    y = gdf.social_rating_num
    sns.regplot(ax=axs[1], x=x, y=y, scatter_kws={'alpha':alpha})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[1].set_title('Median perception per place\nGreenness & social activity rating\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[1].set_title('Median perception per place\nGreenness & social activity rating\nInsignificant')
        
    x = gdf.greenness_rating_num
    y = gdf.relax_rating_num
    sns.regplot(ax=axs[2], x=x, y=y, scatter_kws={'alpha':alpha})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[2].set_title('Median perception per place\nGreenness & relaxation rating\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[2].set_title('Median perception per place\nGreenness & relaxation rating\nInsignificant')
        
    x = gdf.greenness_rating_num
    y = gdf.commute_rating_num
    sns.regplot(ax=axs[3], x=x, y=y, scatter_kws={'alpha':alpha})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[3].set_title('Median perception per place\nGreenness & commuting rating\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[3].set_title('Median perception per place\nGreenness & commuting rating\nInsignificant')
        
    x = gdf.greenness_rating_num
    y = gdf.children_rating_num
    sns.regplot(ax=axs[4], x=x, y=y, scatter_kws={'alpha':alpha})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[4].set_title('Median perception per place\nGreenness & children\'s activity rating\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[4].set_title('Median perception per place\nGreenness & children\'s  activity rating\nInsignificant')

        
    plt.subplots_adjust(wspace=0.4, hspace=0.8)
    
    plt.show()

In [89]:
if print_and_plot:  
    
    for place_name in place_names:
    
        fig, axs = plt.subplots(nrows=1, ncols=5, figsize=(20,3))
        fig.suptitle(place_name, fontsize=20, y=1.25)


        # MEDIAN PERCEPTION PER PLACE
        gdf = perception_points_perplace[perception_points_perplace.place_name==place_name]
        alpha = 0.1

        x = gdf.greenness_rating_num
        y = gdf.physical_rating_num
        sns.regplot(ax=axs[0], x=x, y=y, scatter_kws={'alpha':alpha})
        correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
        if pvalue <= 0.05:
            axs[0].set_title('Median perception per place\nGreenness & physical activity rating\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
        else:
            axs[0].set_title('Median perception per place\nGreenness & physical activity rating\nInsignificant')

        x = gdf.greenness_rating_num
        y = gdf.social_rating_num
        sns.regplot(ax=axs[1], x=x, y=y, scatter_kws={'alpha':alpha})
        correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
        if pvalue <= 0.05:
            axs[1].set_title('Median perception per place\nGreenness & social activity rating\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
        else:
            axs[1].set_title('Median perception per place\nGreenness & social activity rating\nInsignificant')

        x = gdf.greenness_rating_num
        y = gdf.relax_rating_num
        sns.regplot(ax=axs[2], x=x, y=y, scatter_kws={'alpha':alpha})
        correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
        if pvalue <= 0.05:
            axs[2].set_title('Median perception per place\nGreenness & relaxation rating\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
        else:
            axs[2].set_title('Median perception per place\nGreenness & relaxation rating\nInsignificant')

        x = gdf.greenness_rating_num
        y = gdf.commute_rating_num
        sns.regplot(ax=axs[3], x=x, y=y, scatter_kws={'alpha':alpha})
        correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
        if pvalue <= 0.05:
            axs[3].set_title('Median perception per place\nGreenness & commuting rating\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
        else:
            axs[3].set_title('Median perception per place\nGreenness & commuting rating\nInsignificant')

        x = gdf.greenness_rating_num
        y = gdf.children_rating_num
        sns.regplot(ax=axs[4], x=x, y=y, scatter_kws={'alpha':alpha})
        correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
        if pvalue <= 0.05:
            axs[4].set_title('Median perception per place\nGreenness & children\'s activity rating\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
        else:
            axs[4].set_title('Median perception per place\nGreenness & children\'s  activity rating\nInsignificant')


        plt.subplots_adjust(wspace=0.4, hspace=0.8)

        plt.show()

In [90]:
# how many playspaces are perceived suitable for children?
if print_and_plot:
    perception_points_perplace_playspaces = perception_points_perplace[perception_points_perplace['near_playspace']]
    print('{}% of playspaces perceived suitable for children'.format(
        round(100*len(perception_points_perplace_playspaces[perception_points_perplace_playspaces.children_rating_num>=3.0])/len(perception_points_perplace_playspaces), 3)))

In [91]:
# how many greenspaces are perceived suitable for activities, per activity-category?

In [92]:
suitable_greenspaces = len(perception_points_perplace_perceivedasgreen[
    (perception_points_perplace_perceivedasgreen.physical_rating_num>=3.0) |
    (perception_points_perplace_perceivedasgreen.social_rating_num>=3.0) |
    (perception_points_perplace_perceivedasgreen.relax_rating_num>=3.0) |
    (perception_points_perplace_perceivedasgreen.commute_rating_num>=3.0) |
    (perception_points_perplace_perceivedasgreen.children_rating_num>=3.0)])
suitable_general = len(perception_points_perplace[
    (perception_points_perplace.physical_rating_num>=3.0) |
    (perception_points_perplace.social_rating_num>=3.0) |
    (perception_points_perplace.relax_rating_num>=3.0) |
    (perception_points_perplace.commute_rating_num>=3.0) |
    (perception_points_perplace.children_rating_num>=3.0)])
print('{} greenspaces perceived suitable for at least one activity-type\n(opposed to {} spaces in general)'.format(suitable_greenspaces, suitable_general))

138 greenspaces perceived suitable for at least one activity-type
(opposed to 261 spaces in general)


In [93]:
cols = ['physical_rating_num', 'social_rating_num', 'relax_rating_num', 'commute_rating_num', 'children_rating_num']

In [94]:
for activity in cols:
    suitable_greenspaces = len(perception_points_perplace_perceivedasgreen[(perception_points_perplace_perceivedasgreen[activity]>=3.0)])
    suitable_general = len(perception_points_perplace[perception_points_perplace[activity]>=3.0])
    print('{} greenspaces perceived suitable for {} (opposed to {} spaces in general)'.format(suitable_greenspaces, activity, suitable_general))


66 greenspaces perceived suitable for physical_rating_num (opposed to 84 spaces in general)
30 greenspaces perceived suitable for social_rating_num (opposed to 55 spaces in general)
42 greenspaces perceived suitable for relax_rating_num (opposed to 50 spaces in general)
86 greenspaces perceived suitable for commute_rating_num (opposed to 171 spaces in general)
54 greenspaces perceived suitable for children_rating_num (opposed to 84 spaces in general)


In [95]:
for pair in list(itertools.combinations(cols, 2)):
    overlap = len(perception_points_perplace_perceivedasgreen[
        (perception_points_perplace_perceivedasgreen[pair[0]]>=3.0) & 
        (perception_points_perplace_perceivedasgreen[pair[1]]>=3.0)])
    print('{} greenspaces perceived suitable for both {} and {}'.format(overlap, pair[0], pair[1]))

18 greenspaces perceived suitable for both physical_rating_num and social_rating_num
24 greenspaces perceived suitable for both physical_rating_num and relax_rating_num
44 greenspaces perceived suitable for both physical_rating_num and commute_rating_num
24 greenspaces perceived suitable for both physical_rating_num and children_rating_num
21 greenspaces perceived suitable for both social_rating_num and relax_rating_num
18 greenspaces perceived suitable for both social_rating_num and commute_rating_num
17 greenspaces perceived suitable for both social_rating_num and children_rating_num
20 greenspaces perceived suitable for both relax_rating_num and commute_rating_num
25 greenspaces perceived suitable for both relax_rating_num and children_rating_num
21 greenspaces perceived suitable for both commute_rating_num and children_rating_num


In [96]:
for pair in list(itertools.combinations(cols, 3)):
    overlap = len(perception_points_perplace_perceivedasgreen[
        (perception_points_perplace_perceivedasgreen[pair[0]]>=3.0) & 
        (perception_points_perplace_perceivedasgreen[pair[1]]>=3.0) &
        (perception_points_perplace_perceivedasgreen[pair[2]]>=3.0)])
    print('{} greenspaces perceived suitable for {}, {} and {}'.format(overlap, pair[0], pair[1], pair[2]))

12 greenspaces perceived suitable for physical_rating_num, social_rating_num and relax_rating_num
12 greenspaces perceived suitable for physical_rating_num, social_rating_num and commute_rating_num
11 greenspaces perceived suitable for physical_rating_num, social_rating_num and children_rating_num
14 greenspaces perceived suitable for physical_rating_num, relax_rating_num and commute_rating_num
14 greenspaces perceived suitable for physical_rating_num, relax_rating_num and children_rating_num
15 greenspaces perceived suitable for physical_rating_num, commute_rating_num and children_rating_num
12 greenspaces perceived suitable for social_rating_num, relax_rating_num and commute_rating_num
14 greenspaces perceived suitable for social_rating_num, relax_rating_num and children_rating_num
11 greenspaces perceived suitable for social_rating_num, commute_rating_num and children_rating_num
13 greenspaces perceived suitable for relax_rating_num, commute_rating_num and children_rating_num


In [97]:
for pair in list(itertools.combinations(cols, 4)):
    overlap = len(perception_points_perplace_perceivedasgreen[
        (perception_points_perplace_perceivedasgreen[pair[0]]>=3.0) & 
        (perception_points_perplace_perceivedasgreen[pair[1]]>=3.0) &
        (perception_points_perplace_perceivedasgreen[pair[2]]>=3.0) &
        (perception_points_perplace_perceivedasgreen[pair[3]]>=3.0)])
    print('{} greenspaces perceived suitable for {}, {}, {} and {}'.format(overlap, pair[0], pair[1], pair[2], pair[3]))

8 greenspaces perceived suitable for physical_rating_num, social_rating_num, relax_rating_num and commute_rating_num
8 greenspaces perceived suitable for physical_rating_num, social_rating_num, relax_rating_num and children_rating_num
8 greenspaces perceived suitable for physical_rating_num, social_rating_num, commute_rating_num and children_rating_num
9 greenspaces perceived suitable for physical_rating_num, relax_rating_num, commute_rating_num and children_rating_num
9 greenspaces perceived suitable for social_rating_num, relax_rating_num, commute_rating_num and children_rating_num


In [98]:
for pair in list(itertools.combinations(cols, 5)):
    overlap = len(perception_points_perplace_perceivedasgreen[
        (perception_points_perplace_perceivedasgreen[pair[0]]>=3.0) & 
        (perception_points_perplace_perceivedasgreen[pair[1]]>=3.0) &
        (perception_points_perplace_perceivedasgreen[pair[2]]>=3.0) &
        (perception_points_perplace_perceivedasgreen[pair[3]]>=3.0) &
        (perception_points_perplace_perceivedasgreen[pair[4]]>=3.0)
    ])
    print('{} greenspaces perceived suitable for all activity-types'.format(overlap))

6 greenspaces perceived suitable for all activity-types


In [99]:
# if print_and_plot:  
    
#     act_color = 'purple'
    
#     fig, axs = plt.subplots(nrows=1, ncols=5, figsize=(20,3))
#     fig.suptitle('Perceived usability of greenspaces', fontsize=20, y=1.25)

#     venn2(subsets = (
#         len(perception_points_perplace_perceivedasgreen),
#         len(perception_points_perplace_perceivedasphysical),
#         len(perception_points_perplace_perceivedasgreenandphysical)),
#         ax=axs[0], set_labels = ('', 'for physical activity'), set_colors=('green', act_color), alpha = 0.5)
#     # axs[0].set_title('')
    
#     venn2(subsets = (
#         len(perception_points_perplace_perceivedasgreen),
#         len(perception_points_perplace_perceivedassocial),
#         len(perception_points_perplace_perceivedasgreenandsocial)),
#         ax=axs[1], set_labels = ('', 'for social activity'), set_colors=('green', act_color), alpha = 0.5)
    
#     venn2(subsets = (
#         len(perception_points_perplace_perceivedasgreen),
#         len(perception_points_perplace_perceivedasrelax),
#         len(perception_points_perplace_perceivedasgreenandrelax)),
#         ax=axs[2], set_labels = ('', 'for relaxation'), set_colors=('green', act_color), alpha = 0.5)
    
#     venn2(subsets = (
#         len(perception_points_perplace_perceivedasgreen),  
#         len(perception_points_perplace_perceivedascommute),
#         len(perception_points_perplace_perceivedasgreenandcommute)),
#         ax=axs[3], set_labels = ('', 'for commuting'), set_colors=('green', act_color), alpha = 0.5)
    
#     venn2(subsets = (
#         len(perception_points_perplace_perceivedasgreen),
#         len(perception_points_perplace_perceivedaschildren),
#         len(perception_points_perplace_perceivedasgreenandchildren)),
#         ax=axs[4], set_labels = ('', 'for children\'s activity'), set_colors=('green', act_color), alpha = 0.5)


#     plt.show()

## H3: Combining OSM and NDVI

In [100]:
def get_osm_ndvi_subset(osm_category, ndvi_quantile, exclusive=False):
    if osm_category != 'all':
        if exclusive:
            gdf = perception_points_perplace[(perception_points_perplace[osm_category]) & ~(perception_points_perplace['near_regular_greenspace'])]
        else:
            gdf = perception_points_perplace[perception_points_perplace[osm_category]]
    elif osm_category == 'all':
        if exclusive:
            gdf = perception_points_perplace[~(perception_points_perplace['near_regular_greenspace'])]
        else:
            gdf = perception_points_perplace
       
    place_name = 'Barcelona'
    ndvi_quantiles = ndvi_quantiles_bcn
    if ndvi_quantile == 'lower':
        gdf_bcn = gdf[(gdf.place_name==place_name) & (gdf.ndvi_median<ndvi_quantiles[0.25])].copy()
    elif ndvi_quantile == 'second':
        gdf_bcn = gdf[(gdf.place_name==place_name) & (gdf.ndvi_median>=ndvi_quantiles[0.25]) & (gdf.ndvi_median<ndvi_quantiles[0.5])].copy()
    elif ndvi_quantile == 'third':
        gdf_bcn = gdf[(gdf.place_name==place_name) & (gdf.ndvi_median>=ndvi_quantiles[0.5]) & (gdf.ndvi_median<ndvi_quantiles[0.75])].copy()
    elif ndvi_quantile == 'upper':
        gdf_bcn = gdf[(gdf.place_name==place_name) & (gdf.ndvi_median>=ndvi_quantiles[0.75])].copy()
        
    place_name = 'Rotterdam'
    ndvi_quantiles = ndvi_quantiles_rtd
    if ndvi_quantile == 'lower':
        gdf_rtd = gdf[(gdf.place_name==place_name) & (gdf.ndvi_median<ndvi_quantiles[0.25])].copy()
    elif ndvi_quantile == 'second':
        gdf_rtd = gdf[(gdf.place_name==place_name) & (gdf.ndvi_median>=ndvi_quantiles[0.25]) & (gdf.ndvi_median<ndvi_quantiles[0.5])].copy()
    elif ndvi_quantile == 'third':
        gdf_rtd = gdf[(gdf.place_name==place_name) & (gdf.ndvi_median>=ndvi_quantiles[0.5]) & (gdf.ndvi_median<ndvi_quantiles[0.75])].copy()
    elif ndvi_quantile == 'upper':
        gdf_rtd = gdf[(gdf.place_name==place_name) & (gdf.ndvi_median>=ndvi_quantiles[0.75])].copy()
        
    place_name = 'Goteborg'
    ndvi_quantiles = ndvi_quantiles_got
    if ndvi_quantile == 'lower':
        gdf_got = gdf[(gdf.place_name==place_name) & (gdf.ndvi_median<ndvi_quantiles[0.25])].copy()
    elif ndvi_quantile == 'second':
        gdf_got = gdf[(gdf.place_name==place_name) & (gdf.ndvi_median>=ndvi_quantiles[0.25]) & (gdf.ndvi_median<ndvi_quantiles[0.5])].copy()
    elif ndvi_quantile == 'third':
        gdf_got = gdf[(gdf.place_name==place_name) & (gdf.ndvi_median>=ndvi_quantiles[0.5]) & (gdf.ndvi_median<ndvi_quantiles[0.75])].copy()
    elif ndvi_quantile == 'upper':
        gdf_got = gdf[(gdf.place_name==place_name) & (gdf.ndvi_median>=ndvi_quantiles[0.75])].copy()
        
    return pd.concat([gdf_bcn, gdf_rtd, gdf_got])

In [101]:
def get_osm_ndvi_subset_stats(osm_category, ndvi_quantile, exclusive=False):
    
    subset = get_osm_ndvi_subset(osm_category, ndvi_quantile, exclusive=exclusive)
    
    return {
        'median': subset.greenness_rating_num.median(),
        'perc': round(100*len(subset[subset.greenness_rating_num>=3.0])/len(subset), 3),
        'n_green': len(subset[subset.greenness_rating_num>=3.0]),
        'n_total': len(subset),
        'ndvi_min': round(subset.ndvi_median.min(), 3),
        'ndvi_max': round(subset.ndvi_median.max(), 3)}

In [102]:
def print_osm_ndvi_stats(exclusive=False):
    
    headers = [
        'osm_category', 
        'lower ndvi\nBcn: {}-{}\nRtd: {}-{}\nGot: {}-{}'.format(round(ndvi_quantiles_bcn[0], 3), round(ndvi_quantiles_bcn[0.25], 3), round(ndvi_quantiles_rtd[0], 3), round(ndvi_quantiles_rtd[0.25], 3), round(ndvi_quantiles_got[0], 3), round(ndvi_quantiles_got[0.25], 3)), 
        'second ndvi\nBcn: {}-{}\nRtd: {}-{}\nGot: {}-{}'.format(round(ndvi_quantiles_bcn[0.25], 3), round(ndvi_quantiles_bcn[0.5], 3), round(ndvi_quantiles_rtd[0.25], 3), round(ndvi_quantiles_rtd[0.5], 3), round(ndvi_quantiles_got[0.25], 3), round(ndvi_quantiles_got[0.5], 3)), 
        'third ndvi\nBcn: {}-{}\nRtd: {}-{}\nGot: {}-{}'.format(round(ndvi_quantiles_bcn[0.5], 3), round(ndvi_quantiles_bcn[0.75], 3), round(ndvi_quantiles_rtd[0.5], 3), round(ndvi_quantiles_rtd[0.75], 3), round(ndvi_quantiles_got[0.5], 3), round(ndvi_quantiles_got[0.75], 3)), 
        'upper ndvi\nBcn: {}-{}\nRtd: {}-{}\nGot: {}-{}'.format(round(ndvi_quantiles_bcn[0.75], 3), round(ndvi_quantiles_bcn[1], 3), round(ndvi_quantiles_rtd[0.75], 3), round(ndvi_quantiles_rtd[1], 3), round(ndvi_quantiles_got[0.75], 3), round(ndvi_quantiles_got[1], 3)), ]
    rows = []
    osm_categories = ['near_regular_greenspace', 'near_pocket_greenspace', 'near_square', 'near_playspace', 'near_street'] #, 'all']
    for osm_category in osm_categories:
        if exclusive and osm_category=='near_regular_greenspace':
            stats_lower = get_osm_ndvi_subset_stats(osm_category, 'lower')
            stats_second = get_osm_ndvi_subset_stats(osm_category, 'second')
            stats_third = get_osm_ndvi_subset_stats(osm_category, 'third')
            stats_upper = get_osm_ndvi_subset_stats(osm_category, 'upper')
        else:
            stats_lower = get_osm_ndvi_subset_stats(osm_category, 'lower', exclusive=exclusive)
            stats_second = get_osm_ndvi_subset_stats(osm_category, 'second', exclusive=exclusive)
            stats_third = get_osm_ndvi_subset_stats(osm_category, 'third', exclusive=exclusive)
            stats_upper = get_osm_ndvi_subset_stats(osm_category, 'upper', exclusive=exclusive)
        rows.append([osm_category, 'median: '+str(stats_lower['median']), 'median: '+str(stats_second['median']), 'median: '+str(stats_third['median']), 'median: '+str(stats_upper['median'])])
        rows.append(['', str(stats_lower['perc'])+'% green', str(stats_second['perc'])+'% green', str(stats_third['perc'])+'% green', str(stats_upper['perc'])+'% green'])
        rows.append(['', 'n_green='+str(stats_lower['n_green']), 'n_green='+str(stats_second['n_green']), 'n_green='+str(stats_third['n_green']), 'n_green='+str(stats_upper['n_green'])])
        rows.append(['', 'n_total='+str(stats_lower['n_total']), 'n_total='+str(stats_second['n_total']), 'n_total='+str(stats_third['n_total']), 'n_total='+str(stats_upper['n_total'])])
        # rows.append(['', 'ndvi {}-{}'.format(stats_lower['ndvi_min'], stats_lower['ndvi_max']), 'ndvi {}-{}'.format(stats_second['ndvi_min'], stats_second['ndvi_max']), 'ndvi {}-{}'.format(stats_third['ndvi_min'], stats_third['ndvi_max']), 'ndvi {}-{}'.format(stats_upper['ndvi_min'], stats_upper['ndvi_max'])])
        rows.append([''])
        
    print(tabulate(rows, headers=headers))

In [103]:
if print_and_plot:
    print_osm_ndvi_stats(exclusive=True)

In [104]:
# what are percentages of true/false positives/negatives

In [105]:
print(len(perception_points_perplace[
    (perception_points_perplace.near_regular_greenspace) |
    (perception_points_perplace.near_pocket_greenspace) |
    (perception_points_perplace.near_square) |
    (perception_points_perplace.near_playspace) |
    (perception_points_perplace.near_street)]))

413


In [106]:
# case OSM - when using only OSM regular-greenspaces to represent greenspaces

In [107]:
OSM_truepos = 0
OSM_totalpos = 0
gdf = gpd.GeoDataFrame()

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_regular_greenspace', 'lower')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_regular_greenspace', 'second')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_regular_greenspace', 'third')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_regular_greenspace', 'upper')])

gdf.drop_duplicates(inplace=True)
OSM_totalpos = len(gdf)
OSM_truepos = len(gdf[gdf.greenness_rating_num>=3.0])

print('OSM case:\n{} true positives\n(and {} false positives)'.format(OSM_truepos, OSM_totalpos-OSM_truepos))

OSM case:
163 true positives
(and 183 false positives)


In [108]:
OSM_trueneg = 0
OSM_totalneg = 0
gdf = gpd.GeoDataFrame()

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_pocket_greenspace', 'lower', exclusive=True)])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_pocket_greenspace', 'second', exclusive=True)])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_pocket_greenspace', 'third', exclusive=True)])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_pocket_greenspace', 'upper', exclusive=True)])

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_square', 'lower', exclusive=True)])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_square', 'second', exclusive=True)])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_square', 'third', exclusive=True)])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_square', 'upper', exclusive=True)])

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_playspace', 'lower', exclusive=True)])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_playspace', 'second', exclusive=True)])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_playspace', 'third', exclusive=True)])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_playspace', 'upper', exclusive=True)])

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_street', 'lower', exclusive=True)])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_street', 'second', exclusive=True)])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_street', 'third', exclusive=True)])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_street', 'upper', exclusive=True)])

gdf.drop_duplicates(inplace=True)
OSM_totalneg = len(gdf)
OSM_trueneg = len(gdf[gdf.greenness_rating_num<3.0])

print('OSM case:\n{} true negatives\n(and {} false negatives)'.format(OSM_trueneg, OSM_totalneg-OSM_trueneg))

OSM case:
50 true negatives
(and 17 false negatives)


In [109]:
# case NDVI - when using only NDVI upper half to represent greenspaces

In [110]:
NDVI_truepos = 0
NDVI_totalpos = 0
gdf = gpd.GeoDataFrame()

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_regular_greenspace', 'third')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_pocket_greenspace', 'third')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_square', 'third')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_playspace', 'third')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_street', 'third')])

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_regular_greenspace', 'upper')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_pocket_greenspace', 'upper')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_square', 'upper')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_playspace', 'upper')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_street', 'upper')])

gdf.drop_duplicates(inplace=True)
NDVI_totalpos = len(gdf)
NDVI_truepos = len(gdf[gdf.greenness_rating_num>=3.0])

print('NDVI case:\n{} true positives\n(and {} false positives)'.format(NDVI_truepos, NDVI_totalpos-NDVI_truepos))

NDVI case:
132 true positives
(and 78 false positives)


In [111]:
NDVI_trueneg = 0
NDVI_totalneg = 0
gdf = gpd.GeoDataFrame()

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_regular_greenspace', 'lower')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_pocket_greenspace', 'lower')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_square', 'lower')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_playspace', 'lower')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_street', 'lower')])

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_regular_greenspace', 'second')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_pocket_greenspace', 'second')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_square', 'second')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_playspace', 'second')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_street', 'second')])

gdf.drop_duplicates(inplace=True)
NDVI_totalneg = len(gdf)
NDVI_trueneg = len(gdf[gdf.greenness_rating_num<3.0])

print('NDVI case:\n{} true negatives\n(and {} false negatives)'.format(NDVI_trueneg, NDVI_totalneg-NDVI_trueneg))

NDVI case:
155 true negatives
(and 48 false negatives)


In [112]:
# case combo - when combining the two to represent greenspaces
# reg-size greenspace exluding those in lower NDVI quantile
# also other types of spaces in upper NDVI quantile

In [113]:
COMBO_truepos = 0
COMBO_totalpos = 0
gdf = gpd.GeoDataFrame()

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_regular_greenspace', 'second')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_regular_greenspace', 'third')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_regular_greenspace', 'upper')])

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_pocket_greenspace', 'upper')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_square', 'upper')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_playspace', 'upper')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_street', 'upper')])

gdf.drop_duplicates(inplace=True)
COMBO_totalpos = len(gdf)
COMBO_truepos = len(gdf[gdf.greenness_rating_num>=3.0])

print('COMBO case:\n{} true positives\n(and {} false positives)'.format(COMBO_truepos, COMBO_totalpos-COMBO_truepos))

COMBO case:
154 true positives
(and 129 false positives)


In [114]:
COMBO_trueneg = 0
COMBO_totalneg = 0
gdf = gpd.GeoDataFrame()

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_regular_greenspace', 'lower')])

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_pocket_greenspace', 'lower')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_square', 'lower')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_playspace', 'lower')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_street', 'lower')])

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_pocket_greenspace', 'second')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_square', 'second')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_playspace', 'second')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_street', 'second')])

gdf = pd.concat([gdf, get_osm_ndvi_subset('near_pocket_greenspace', 'third')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_square', 'third')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_playspace', 'third')])
gdf = pd.concat([gdf, get_osm_ndvi_subset('near_street', 'third')])

gdf.drop_duplicates(inplace=True)
COMBO_totalneg = len(gdf)
COMBO_trueneg = len(gdf[gdf.greenness_rating_num<3.0])

print('COMBO case:\n{} true negatives\n(and {} false negatives)'.format(COMBO_trueneg, COMBO_totalneg-COMBO_trueneg))

COMBO case:
205 true negatives
(and 103 false negatives)


In [115]:
print('OSM case:\n{} true positives\n(and {} false positives)'.format(OSM_truepos, OSM_totalpos-OSM_truepos))
print('{} true negatives\n(and {} false negatives)'.format(OSM_trueneg, OSM_totalneg-OSM_trueneg))

OSM case:
163 true positives
(and 183 false positives)
50 true negatives
(and 17 false negatives)


In [116]:
print('NDVI case:\n{} true positives\n(and {} false positives)'.format(NDVI_truepos, NDVI_totalpos-NDVI_truepos))
print('{} true negatives\n(and {} false negatives)'.format(NDVI_trueneg, NDVI_totalneg-NDVI_trueneg))

NDVI case:
132 true positives
(and 78 false positives)
155 true negatives
(and 48 false negatives)


In [117]:
print('COMBO case:\n{} true positives\n(and {} false positives)'.format(COMBO_truepos, COMBO_totalpos-COMBO_truepos))
print('{} true negatives\n(and {} false negatives)'.format(COMBO_trueneg, COMBO_totalneg-COMBO_trueneg))

COMBO case:
154 true positives
(and 129 false positives)
205 true negatives
(and 103 false negatives)


In [118]:
if print_and_plot:
    
    fig, axs = plt.subplots(nrows=1, ncols=5, figsize=(20,4))
    fig.suptitle(place_name, y=1.2, fontsize=20)    
    
    gdf = perception_points_perplace[(perception_points_perplace['near_regular_greenspace'])]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[0], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[0].set_title('Regular greenspace\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[0].set_title('Regular greenspace\nGreenness rating & NDVI at point\nInsignificant')
        
        
    gdf = perception_points_perplace[(perception_points_perplace['near_pocket_greenspace']) & ~(perception_points_perplace['near_regular_greenspace'])]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[1], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[1].set_title('Pocket greenspace\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[1].set_title('Pocket greenspace\nGreenness rating & NDVI at point\nInsignificant')
        
        
    gdf = perception_points_perplace[(perception_points_perplace['near_square']) & ~(perception_points_perplace['near_regular_greenspace'])]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[2], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[2].set_title('Square\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[2].set_title('Square\nGreenness rating & NDVI at point\nInsignificant')
    
    
    gdf = perception_points_perplace[(perception_points_perplace['near_playspace']) & ~(perception_points_perplace['near_regular_greenspace'])]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[3], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[3].set_title('Playspace\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[3].set_title('Playspace\nGreenness rating & NDVI at point\nInsignificant')
        
        
    gdf = perception_points_perplace[(perception_points_perplace['near_street']) & ~(perception_points_perplace['near_regular_greenspace'])]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[4], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[4].set_title('Street\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[4].set_title('Street\nGreenness rating & NDVI at point\nInsignificant')
    
    
    plt.show()

In [119]:
if print_and_plot:
    
    place_name = 'Barcelona'
    
    fig, axs = plt.subplots(nrows=1, ncols=5, figsize=(20,4))
    fig.suptitle(place_name, y=1.2, fontsize=20)    
    
    gdf = perception_points_perplace[(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[0], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[0].set_title('Regular greenspace\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[0].set_title('Regular greenspace\nGreenness rating & NDVI at point\nInsignificant')
        
        
    gdf = perception_points_perplace[(perception_points_perplace['near_pocket_greenspace']) & ~(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[1], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[1].set_title('Pocket greenspace\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[1].set_title('Pocket greenspace\nGreenness rating & NDVI at point\nInsignificant')
        
        
    gdf = perception_points_perplace[(perception_points_perplace['near_square']) & ~(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[2], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[2].set_title('Square\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[2].set_title('Square\nGreenness rating & NDVI at point\nInsignificant')
    
    
    gdf = perception_points_perplace[(perception_points_perplace['near_playspace']) & ~(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[3], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[3].set_title('Playspace\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[3].set_title('Playspace\nGreenness rating & NDVI at point\nInsignificant')
        
        
    gdf = perception_points_perplace[(perception_points_perplace['near_street']) & ~(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[4], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[4].set_title('Street\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[4].set_title('Street\nGreenness rating & NDVI at point\nInsignificant')
    
    
    plt.show()

In [120]:
if print_and_plot:
    
    place_name = 'Rotterdam'
    
    fig, axs = plt.subplots(nrows=1, ncols=5, figsize=(20,4))
    fig.suptitle(place_name, y=1.2, fontsize=20)    
    
    gdf = perception_points_perplace[(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[0], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[0].set_title('Regular greenspace\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[0].set_title('Regular greenspace\nGreenness rating & NDVI at point\nInsignificant')
        
        
    gdf = perception_points_perplace[(perception_points_perplace['near_pocket_greenspace']) & ~(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[1], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[1].set_title('Pocket greenspace\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[1].set_title('Pocket greenspace\nGreenness rating & NDVI at point\nInsignificant')
        
        
    gdf = perception_points_perplace[(perception_points_perplace['near_square']) & ~(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[2], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[2].set_title('Square\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[2].set_title('Square\nGreenness rating & NDVI at point\nInsignificant')
    
    
    gdf = perception_points_perplace[(perception_points_perplace['near_playspace']) & ~(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[3], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[3].set_title('Playspace\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[3].set_title('Playspace\nGreenness rating & NDVI at point\nInsignificant')
        
        
    gdf = perception_points_perplace[(perception_points_perplace['near_street']) & ~(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[4], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[4].set_title('Street\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[4].set_title('Street\nGreenness rating & NDVI at point\nInsignificant')
    
    
    plt.show()

In [121]:
if print_and_plot:
    
    place_name = 'Goteborg'
    
    fig, axs = plt.subplots(nrows=1, ncols=5, figsize=(20,4))
    fig.suptitle(place_name, y=1.2, fontsize=20)
    
    
    gdf = perception_points_perplace[(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[0], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[0].set_title('Regular greenspace\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[0].set_title('Regular greenspace\nGreenness rating & NDVI at point\nInsignificant')
        
        
    gdf = perception_points_perplace[(perception_points_perplace['near_pocket_greenspace']) & ~(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[1], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[1].set_title('Pocket greenspace\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[1].set_title('Pocket greenspace\nGreenness rating & NDVI at point\nInsignificant')
        
        
    gdf = perception_points_perplace[(perception_points_perplace['near_square']) & ~(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[2], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[2].set_title('Square\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[2].set_title('Square\nGreenness rating & NDVI at point\nInsignificant')
    
    
    gdf = perception_points_perplace[(perception_points_perplace['near_playspace']) & ~(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[3], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[3].set_title('Playspace\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[3].set_title('Playspace\nGreenness rating & NDVI at point\nInsignificant')
        
        
    gdf = perception_points_perplace[(perception_points_perplace['near_street']) & ~(perception_points_perplace['near_regular_greenspace']) & (perception_points_perplace.place_name==place_name)]
    x = gdf.greenness_rating_num
    y = gdf.ndvi_median
    sns.regplot(ax=axs[4], x=x, y=y, scatter_kws={'alpha':0.1})
    correlation, pvalue = stats.spearmanr(x, y, nan_policy='omit')
    if pvalue <= 0.05:
        axs[4].set_title('Street\nGreenness rating & NDVI at point\nN: {}; Corr: {}; p-value: {}'.format(int(len(gdf)), round(correlation, 3), round(pvalue, 3)))
    else:
        axs[4].set_title('Street\nGreenness rating & NDVI at point\nInsignificant')
    
    
    plt.show()

## Pseudonmized data

### Pseudonimized ids for reporting

In [122]:
# generate pseudo ids for greenness data
pseudo_ids = greenness_perception_points[['prolific_id']].drop_duplicates().groupby('prolific_id').sample(frac=1).reset_index(drop=True)
pseudo_ids['pseudo_participant_id'] = pseudo_ids.index + 1

greenness_perception_points_pseudonimized = greenness_perception_points.merge(pseudo_ids, on='prolific_id')
greenness_perception_points_pseudonimized.drop(columns=['prolific_id'], inplace=True)

In [123]:
# check if any prolific ids are in activity data that were not also in greenness data
# if that is not the case, so we can use the same mapping for the activity data
len(activity_perception_points[~activity_perception_points.prolific_id.isin(pseudo_ids.prolific_id.to_list())])

0

In [124]:
# 0, so we use the same pseudo ids for activity data
activity_perception_points_pseudonimized = activity_perception_points.merge(pseudo_ids, on='prolific_id')
activity_perception_points_pseudonimized.drop(columns=['prolific_id'], inplace=True)

### Output data with pseudonimized ids

In [125]:
# turn into geodataframe
gsv_crs = 'EPSG:4326'

greenness_perception_points_pseudonimized = gpd.GeoDataFrame(
    greenness_perception_points_pseudonimized,
    geometry=gpd.points_from_xy(greenness_perception_points_pseudonimized.gsv_lng, greenness_perception_points_pseudonimized.gsv_lat),
    crs=gsv_crs)

activity_perception_points_pseudonimized = gpd.GeoDataFrame(
    activity_perception_points_pseudonimized,
    geometry=gpd.points_from_xy(activity_perception_points_pseudonimized.gsv_lng, activity_perception_points_pseudonimized.gsv_lat),
    crs=gsv_crs)

In [126]:
# organize columns
new_greenness_cols = [
    'place_name', 'gsv_pano_id', 'pseudo_participant_id',
    'greenness_rating', 'greenness_rating_num', 'greenness_reason',
    'near_regular_greenspace', 'near_pocket_greenspace', 'near_square', 'near_playspace', 'near_street',
    'ndvi_mean', 'ndvi_median', 'ndvi_max',
    'iframe', 'geometry']

greenness_perception_points_pseudonimized = greenness_perception_points_pseudonimized[new_greenness_cols]

In [127]:
# organize columns
new_activity_cols = [
    'place_name', 'gsv_pano_id', 'pseudo_participant_id',
    'physical_rating', 'physical_rating_num', 'physical_reason',
    'social_rating', 'social_rating_num', 'social_reason',
    'relax_rating', 'relax_rating_num', 'relax_reason',
    'commute_rating', 'commute_rating_num', 'commute_reason',
    'children_rating', 'children_rating_num', 'children_reason',
    'near_regular_greenspace', 'near_pocket_greenspace', 'near_square', 'near_playspace', 'near_street',
    'ndvi_mean', 'ndvi_median', 'ndvi_max',
    'iframe', 'geometry']

activity_perception_points_pseudonimized = activity_perception_points_pseudonimized[new_activity_cols]

### write output data

In [128]:
export_sub_folder = os.path.join(confidential_folder, 'pseudonimized', 'radius_{}'.format(radius))

In [129]:
if not os.path.exists(export_sub_folder):
    os.mkdir(export_sub_folder)

In [130]:
output_file = os.path.join(export_sub_folder, 'greenness_perceptions.geojson')
greenness_perception_points_pseudonimized.to_file(output_file, driver='GeoJSON')

  pd.Int64Index,


In [131]:
output_file = os.path.join(export_sub_folder, 'activity_perceptions.geojson')
activity_perception_points_pseudonimized.to_file(output_file, driver='GeoJSON')

  pd.Int64Index,
