# Step 5. Preprocess collected perceptions and combine with location-data

In [None]:
import os
import datetime

import numpy as np
import scipy
import fiona
import statistics
import math

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString, shape, mapping, Point, Polygon, MultiPolygon
from shapely.ops import cascaded_union, transform
import pyproj

import matplotlib.pyplot as plt
from matplotlib import colors, cm, style
import matplotlib.patches as mpatches
# from descartes import PolygonPatch

import osmnx as ox
import networkx as nx

import rasterio
from rasterio import MemoryFile
from rasterio.plot import show
from rasterio.mask import mask
import json

import contextily as cx
import folium
from folium.features import DivIcon

import random

In [None]:
from getpass import getpass

import requests
from requests import Request, Session

import hashlib
import hmac
import base64
import urllib.parse as urlparse

from datetime import date
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

## Define city and other settings

#### Place |Country    |CRS
Rotterdam    (The Netherlands)    EPSG:28992

Barcelona     (Spain)              EPSG:25830

Goteborg        (Sweden)             EPSG:3006

In [None]:
print_and_plot = False

In [None]:
place_name = 'Goteborg'
local_crs = 'EPSG:3006'

In [None]:
osm_crs = 'EPSG:3857'
gsv_crs = 'EPSG:4326'

In [None]:
export_folder = 'data'
export_osm_sub_folder = os.path.join('OSM', '{}_15Mar2023'.format(place_name.split(',')[0].replace(' ', '')), 'enriched')
sampled_folder = os.path.join('data', 'OSM', '{}_15Mar2023'.format(place_name), 'sampled')

In [None]:
confidential_folder = os.path.expanduser('~/confidential_folder')

In [None]:
# set random seed for generating random numbers
# and for sampling rows from geodataframes
random_state = 42
random.seed(random_state)

In [None]:
radius = 300

## Read data

In [None]:
sampled = gpd.read_file(os.path.join(sampled_folder, 'sampled.geojson'))

In [None]:
sampled['city'] = place_name

In [None]:
sampled.rename(columns={'gsv_iframe_html': 'iframe'}, inplace=True)

In [None]:
geotiff_file = os.path.join('data', 'NDVI', 'NDVI cloudless {} 10mres.tif'.format(place_name))
geotiff = rasterio.open(geotiff_file)
geotiff_data = geotiff.read()

In [None]:
regular_greenspaces = gpd.read_file(os.path.join('data', export_osm_sub_folder, 'regular_greenspaces_enriched.geojson'))
regular_greenspaces.crs = local_crs

In [None]:
pocket_greenspaces = gpd.read_file(os.path.join('data', export_osm_sub_folder, 'pocket_greenspaces_enriched.geojson'))
pocket_greenspaces.crs = local_crs

In [None]:
squares = gpd.read_file(os.path.join('data', export_osm_sub_folder, 'squares_enriched.geojson'))
squares.crs = local_crs

In [None]:
playspaces = gpd.read_file(os.path.join('data', export_osm_sub_folder, 'playspaces_enriched.geojson'))
playspaces.crs = local_crs

In [None]:
streets = gpd.read_file(os.path.join('data', export_osm_sub_folder, 'streets_enriched.geojson'))
streets.crs = local_crs

In [None]:
perceptions = gpd.GeoDataFrame()
demographics = gpd.GeoDataFrame()

In [None]:
if place_name == 'Barcelona':
    # age group 18-29
    sub_folder = 'Real Prolific Barcelona 18-29 Mar30'
    perceptions_filename = 'City Perception Real Barcelona 18-29_March 30, 2023_10.18.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_6424260a4fa27f2c7e8eb53a.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])
    
    # age group 30-39
    sub_folder = 'Real Prolific Barcelona 30-39 Mar30'
    perceptions_filename = 'City Perception Real Barcelona 30-39_March 30, 2023_08.54_corrected.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_6424251d38a1ff04d1864fd0.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])

    # age group 40-49
    sub_folder = 'Real Prolific Barcelona 40-49 Mar30'
    perceptions_filename = 'City Perception Real Barcelona 40-49_March 30, 2023_07.16.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_642424a6dcf6595d50daf9c7.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])

    # age group 50-59
    sub_folder = 'Real Prolific Barcelona 50-59 Mar29'
    perceptions_filename = 'City Perception Real Barcelona 50-59_March 29, 2023_09.22.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_642407af18c73da6c5d47c8d.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])

    # age group 60+
    sub_folder = 'Real Prolific Barcelona 60+ Mar28'
    perceptions_filename = 'City Perception Real Barcelona_March 28, 2023_05.51_corrected.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_641c6b00b70ba558d0f17239.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])

In [None]:
if place_name == 'Rotterdam':
    # age group 18-29
    sub_folder = 'Real Prolific Rotterdam 18-29 May2'
    perceptions_filename = 'City Perception Real Rotterdam 18-29_May 2, 2023_03.50.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_644912432207fd15335e6ed3.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])
    
    # age group 30-39
    sub_folder = 'Real Prolific Rotterdam 30-39 May2'
    perceptions_filename = 'City Perception Real Rotterdam 30-39_May 2, 2023_03.48.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_6449124b1677afb325bb50bd.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])
    
    # age group 40-49
    sub_folder = 'Real Prolific Rotterdam 40-49 May2'
    perceptions_filename = 'City Perception Real Rotterdam 40-49_May 2, 2023_03.33.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_64491257bd526b836ed93760.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])

    # age group 50-59
    sub_folder = 'Real Prolific Rotterdam 50-59 May2'
    perceptions_filename = 'City Perception Real Rotterdam 50-59_May 2, 2023_03.31_corrected.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_644912646c7244a4d6fdd8c0.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])
    
    # age group 60+
    sub_folder = 'Real Prolific Rotterdam 60+ May1'
    perceptions_filename = 'City Perception Real Rotterdam 60+_May 1, 2023_09.45.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_6449126c15f75e2bf1615d0e.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])

In [None]:
if place_name == 'Goteborg':
    # age group 18-29
    sub_folder = 'Real Prolific Goteborg 18-29 May12'
    perceptions_filename = 'City Perception Real Goteborg 18-29_May 12, 2023_03.14.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_6449325f80adcf408dc72404.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])
    
    # age group 30-39
    sub_folder = 'Real Prolific Goteborg 30-39 May12'
    perceptions_filename = 'City Perception Real Goteborg 30-39_May 12, 2023_05.51.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_644932589dd4737ea883c164.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])
    
    # age group 40-49
    sub_folder = 'Real Prolific Goteborg 40-49 May12'
    perceptions_filename = 'City Perception Real Goteborg 40-49_May 12, 2023_05.50.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_6449324dd679d296fc3f742c.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])
    
    # age group 50-59
    sub_folder = 'Real Prolific Goteborg 50-59 May12'
    perceptions_filename = 'City Perception Real Goteborg 50-59_May 12, 2023_05.09_corrected.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_64493244eb25800c5761f47a.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])
    
    # age group 60+
    sub_folder = 'Real Prolific Goteborg 60+ May12'
    perceptions_filename = 'City Perception Real Goteborg 60+_May 12, 2023_05.08.xlsx'
    perceptions = pd.concat([perceptions, pd.read_excel(os.path.join(confidential_folder, sub_folder, perceptions_filename), skiprows=[1])])
    demographics_filename = 'prolific_export_644932346413a288af1d965f.csv'
    demographics = pd.concat([demographics, pd.read_csv(os.path.join(confidential_folder, sub_folder, demographics_filename))])

In [None]:
perceptions.reset_index(drop=True, inplace=True)
demographics.reset_index(drop=True, inplace=True)

In [None]:
if print_and_plot:
    print(len(perceptions))

## Filter relevant, combine all, and match with Prolific demographics

In [None]:
consent_yes = 'I have read and understood the study information, or it has been read to me. I have been able to ask questions about the study and my questions have been answered to my satisfaction.,I consent voluntarily to be a participant in this study and understand that I can refuse to answer questions and I can withdraw from the study at any time, without having to give a reason.,I understand that taking part in the study involves answering questions about my general living environment preferences, as well as about panoramic street-level images of urban environments. And that anonymised information I provide will be used for potential reports and publications.,I understand that personal information collected about me that can identify me, such as my name or email, will not be shared beyond the study team.,I understand that anonymised and aggregated data may be archived to be used for future research and learning. I understand that access to this archive repository is open.'

In [None]:
# filter out: preview entries, uncompleted entries
# double check if all entries provided consent
perceptions_filtered = perceptions[
    (perceptions['Status']=='IP Address')
    & (perceptions['Progress']==100.0)
    & (perceptions['Finished']==True)
    & (perceptions['DistributionChannel']!='preview')
    & (perceptions['consent']==consent_yes)
]

In [None]:
if print_and_plot:
    print('{}/{} completed prolific entries that provided consent remain after filtering'.format(len(perceptions_filtered), len(perceptions)))

In [None]:
# filter out: entries with low recaptcha score
perceptions_filtered = perceptions_filtered[
    (perceptions_filtered['Q_RecaptchaScore']>=0.5) |
    (perceptions_filtered['Q_RecaptchaScore'].isna())]

In [None]:
if print_and_plot:
    print('{}/{} entries with valid recaptcha remain after filtering'.format(len(perceptions_filtered), len(perceptions)))

In [None]:
if print_and_plot:
    print('The following entries did not pass the recaptcha:')
    print(perceptions[perceptions['Q_RecaptchaScore']<0.5][['prolific_id', 'Q_RecaptchaScore']])
    
    print('\nThe following entries did not have a recaptcha score, check them manually:')
    print(perceptions[perceptions['Q_RecaptchaScore'].isna()][['prolific_id', 'Q_RecaptchaScore']])

In [None]:
# after the manual check, we exclude the following indices
if place_name == 'Rotterdam':
    to_exclude = [129]
    perceptions_filtered = perceptions_filtered[~perceptions_filtered.index.isin(to_exclude)]
    
    if print_and_plot:
        print('{}/{} entries remain after manual check'.format(len(perceptions_filtered), len(perceptions)))

In [None]:
# filter out: entries without sensible prolific id
perceptions_filtered = perceptions_filtered[
    (perceptions_filtered['prolific_id'].str.len()==24)]

In [None]:
if print_and_plot:
    print('{}/{} entries with sensible prolific id remain after filtering'.format(len(perceptions_filtered), len(perceptions)))

In [None]:
if print_and_plot:
    print('The following entries do not have a valid Prolific ID:')
    print(perceptions[(perceptions['prolific_id'].str.len()!=24) & (perceptions['prolific_id'].notna())].prolific_id)

In [None]:
# filter out: preview entries, uncompleted entries, entries with recaptcha < 0.5
# check if prolific id is 24 characters in length
# double check if all entries provided consent
perceptions_filtered = perceptions_filtered[
    (perceptions_filtered['act_loc4_rating#1_6']=='Never')
]

In [None]:
if print_and_plot:
    print('{}/{} entries that passed the control question remain after filtering'.format(len(perceptions_filtered), len(perceptions)))

In [None]:
if print_and_plot:
    print('The following Prolific IDs did NOT pass the control question:')
    print(perceptions[(perceptions['act_loc4_rating#1_6']!='Never') & (perceptions.prolific_id.notna())].prolific_id)

In [None]:
perceptions = perceptions_filtered

In [None]:
if print_and_plot:
    print(len(perceptions))

In [None]:
demographics = demographics.add_suffix('_Prolific')
perceptions = perceptions.merge(demographics, how='left', left_on='prolific_id', right_on='Participant id_Prolific', suffixes=(False, '_Prolific'))

## Clean up attributes

In [None]:
# provide more sensible names to some columns
perceptions = perceptions.rename(columns={
    
    'pre_gender_4_TEXT': 'pre_gender_selfdescription',
    
    'act_loc1_rating#1_1': 'act_loc1_physical_rating',
    'act_loc1_rating#1_2': 'act_loc1_social_rating',
    'act_loc1_rating#1_3': 'act_loc1_relax_rating',
    'act_loc1_rating#1_4': 'act_loc1_commute_rating',
    'act_loc1_rating#1_5': 'act_loc1_children_rating',
    'act_loc1_rating#2_1_1': 'act_loc1_physical_reason',
    'act_loc1_rating#2_2_1': 'act_loc1_social_reason',
    'act_loc1_rating#2_3_1': 'act_loc1_relax_reason',
    'act_loc1_rating#2_4_1': 'act_loc1_commute_reason',
    'act_loc1_rating#2_5_1': 'act_loc1_children_reason',
    'loc_act_1': 'act_loc1_iframe',
    'click_act_1': 'act_loc1_clicks',
    
    'act_loc2_rating#1_1': 'act_loc2_physical_rating',
    'act_loc2_rating#1_2': 'act_loc2_social_rating',
    'act_loc2_rating#1_3': 'act_loc2_relax_rating',
    'act_loc2_rating#1_4': 'act_loc2_commute_rating',
    'act_loc2_rating#1_5': 'act_loc2_children_rating',
    'act_loc2_rating#2_1_1': 'act_loc2_physical_reason',
    'act_loc2_rating#2_2_1': 'act_loc2_social_reason',
    'act_loc2_rating#2_3_1': 'act_loc2_relax_reason',
    'act_loc2_rating#2_4_1': 'act_loc2_commute_reason',
    'act_loc2_rating#2_5_1': 'act_loc2_children_reason',
    'loc_act_2': 'act_loc2_iframe',
    'click_act_2': 'act_loc2_clicks',
    
    'act_loc3_rating#1_1': 'act_loc3_physical_rating',
    'act_loc3_rating#1_2': 'act_loc3_social_rating',
    'act_loc3_rating#1_3': 'act_loc3_relax_rating',
    'act_loc3_rating#1_4': 'act_loc3_commute_rating',
    'act_loc3_rating#1_5': 'act_loc3_children_rating',
    'act_loc3_rating#2_1_1': 'act_loc3_physical_reason',
    'act_loc3_rating#2_2_1': 'act_loc3_social_reason',
    'act_loc3_rating#2_3_1': 'act_loc3_relax_reason',
    'act_loc3_rating#2_4_1': 'act_loc3_commute_reason',
    'act_loc3_rating#2_5_1': 'act_loc3_children_reason',
    'loc_act_3': 'act_loc3_iframe',
    'click_act_3': 'act_loc3_clicks',
    
    'act_loc4_rating#1_1': 'act_loc4_physical_rating',
    'act_loc4_rating#1_2': 'act_loc4_social_rating',
    'act_loc4_rating#1_3': 'act_loc4_relax_rating',
    'act_loc4_rating#1_4': 'act_loc4_commute_rating',
    'act_loc4_rating#1_5': 'act_loc4_children_rating',
    'act_loc4_rating#2_1_1': 'act_loc4_physical_reason',
    'act_loc4_rating#2_2_1': 'act_loc4_social_reason',
    'act_loc4_rating#2_3_1': 'act_loc4_relax_reason',
    'act_loc4_rating#2_4_1': 'act_loc4_commute_reason',
    'act_loc4_rating#2_5_1': 'act_loc4_children_reason',
    'act_loc4_rating#1_6' : 'control_just_select_never',
    'loc_act_4': 'act_loc4_iframe',
    'click_act_4': 'act_loc4_clicks',
    
    'act_loc5_rating#1_1': 'act_loc5_physical_rating',
    'act_loc5_rating#1_2': 'act_loc5_social_rating',
    'act_loc5_rating#1_3': 'act_loc5_relax_rating',
    'act_loc5_rating#1_4': 'act_loc5_commute_rating',
    'act_loc5_rating#1_5': 'act_loc5_children_rating',
    'act_loc5_rating#2_1_1': 'act_loc5_physical_reason',
    'act_loc5_rating#2_2_1': 'act_loc5_social_reason',
    'act_loc5_rating#2_3_1': 'act_loc5_relax_reason',
    'act_loc5_rating#2_4_1': 'act_loc5_commute_reason',
    'act_loc5_rating#2_5_1': 'act_loc5_children_reason',
    'loc_act_5': 'act_loc5_iframe',
    'click_act_5': 'act_loc5_clicks',
    
    'green_loc1_rating_1': 'green_loc1_rating',
    'loc_gr_1': 'green_loc1_iframe',
    'click_gr_1': 'green_loc1_clicks',
    
    'green_loc2_rating_1': 'green_loc2_rating',
    'loc_gr_2': 'green_loc2_iframe',
    'click_gr_2': 'green_loc2_clicks',
    
    'green_loc3_rating_1': 'green_loc3_rating',
    'loc_gr_3': 'green_loc3_iframe',
    'click_gr_3': 'green_loc3_clicks',
    
    'green_loc4_rating_1': 'green_loc4_rating',
    'loc_gr_4': 'green_loc4_iframe',
    'click_gr_4': 'green_loc4_clicks',
    
    'green_loc5_rating_1': 'green_loc5_rating',
    'loc_gr_5': 'green_loc5_iframe',
    'click_gr_5': 'green_loc5_clicks',
    
    'post_profession_1': 'post_profession_built',
    'post_profession_2': 'post_profession_health',
    'post_importance_live_1': 'post_importance_live_physical',
    'post_importance_live_2': 'post_importance_live_social',
    'post_importance_live_3': 'post_importance_live_relax',
    'post_importance_live_4': 'post_importance_live_commute',
    'post_importance_live_5': 'post_importance_live_children',
    'post_importance_gree_1': 'post_importance_green_physical',
    'post_importance_gree_2': 'post_importance_green_social',
    'post_importance_gree_3': 'post_importance_green_relax',
    'post_importance_gree_4': 'post_importance_green_commute',
    'post_importance_gree_5': 'post_importance_green_children',
    'survey_questions_1': 'survey_questions_clear',
    'survey_questions_2': 'survey_questions_panned',
    'survey_questions_3': 'survey_questions_walked',
    'survey_questions_4': 'survey_questions_knewplaces'
})

### Perceptions per location

#### for activities

In [None]:
activity_perceptions_1 = perceptions[[
    'prolific_id',
    'loc_ifr_1', 'act_loc1_clicks',
    'act_loc1_physical_rating', 'act_loc1_physical_reason',
    'act_loc1_social_rating', 'act_loc1_social_reason',
    'act_loc1_relax_rating', 'act_loc1_relax_reason',
    'act_loc1_commute_rating', 'act_loc1_commute_reason',
    'act_loc1_children_rating', 'act_loc1_children_reason']]

activity_perceptions_1 = activity_perceptions_1.rename(columns={
    'loc_ifr_1': 'iframe', 
    'act_loc1_clicks': 'clicks',
    'act_loc1_physical_rating': 'physical_rating', 
    'act_loc1_physical_reason': 'physical_reason',
    'act_loc1_social_rating': 'social_rating', 
    'act_loc1_social_reason': 'social_reason', 
    'act_loc1_relax_rating': 'relax_rating', 
    'act_loc1_relax_reason': 'relax_reason', 
    'act_loc1_commute_rating': 'commute_rating', 
    'act_loc1_commute_reason': 'commute_reason', 
    'act_loc1_children_rating': 'children_rating', 
    'act_loc1_children_reason': 'children_reason'})

In [None]:
activity_perceptions_2 = perceptions[[
    'prolific_id',
    'loc_ifr_2', 'act_loc2_clicks',
    'act_loc2_physical_rating', 'act_loc2_physical_reason',
    'act_loc2_social_rating', 'act_loc2_social_reason',
    'act_loc2_relax_rating', 'act_loc2_relax_reason',
    'act_loc2_commute_rating', 'act_loc2_children_rating',
    'act_loc2_commute_reason', 'act_loc2_children_reason']]

activity_perceptions_2 = activity_perceptions_2.rename(columns={
    'loc_ifr_2': 'iframe', 
    'act_loc2_clicks': 'clicks',
    'act_loc2_physical_rating': 'physical_rating', 
    'act_loc2_physical_reason': 'physical_reason',
    'act_loc2_social_rating': 'social_rating', 
    'act_loc2_social_reason': 'social_reason', 
    'act_loc2_relax_rating': 'relax_rating', 
    'act_loc2_relax_reason': 'relax_reason', 
    'act_loc2_commute_rating': 'commute_rating', 
    'act_loc2_commute_reason': 'commute_reason', 
    'act_loc2_children_rating': 'children_rating', 
    'act_loc2_children_reason': 'children_reason'})

In [None]:
activity_perceptions_3 = perceptions[[
    'prolific_id',
    'loc_ifr_3', 'act_loc3_clicks',
    'act_loc3_physical_rating', 'act_loc3_physical_reason',
    'act_loc3_social_rating', 'act_loc3_social_reason',
    'act_loc3_relax_rating', 'act_loc3_relax_reason',
    'act_loc3_commute_rating', 'act_loc3_children_rating',
    'act_loc3_commute_reason', 'act_loc3_children_reason']]

activity_perceptions_3 = activity_perceptions_3.rename(columns={
    'loc_ifr_3': 'iframe', 
    'act_loc3_clicks': 'clicks',
    'act_loc3_physical_rating': 'physical_rating', 
    'act_loc3_physical_reason': 'physical_reason',
    'act_loc3_social_rating': 'social_rating', 
    'act_loc3_social_reason': 'social_reason', 
    'act_loc3_relax_rating': 'relax_rating', 
    'act_loc3_relax_reason': 'relax_reason', 
    'act_loc3_commute_rating': 'commute_rating', 
    'act_loc3_commute_reason': 'commute_reason', 
    'act_loc3_children_rating': 'children_rating', 
    'act_loc3_children_reason': 'children_reason'})

In [None]:
activity_perceptions_4 = perceptions[[
    'prolific_id',
    'loc_ifr_4', 'act_loc4_clicks',
    'act_loc4_physical_rating', 'act_loc4_physical_reason',
    'act_loc4_social_rating', 'act_loc4_social_reason',
    'act_loc4_relax_rating', 'act_loc4_relax_reason',
    'act_loc4_commute_rating', 'act_loc4_children_rating',
    'act_loc4_commute_reason', 'act_loc4_children_reason']]

activity_perceptions_4 = activity_perceptions_4.rename(columns={
    'loc_ifr_4': 'iframe', 
    'act_loc4_clicks': 'clicks',
    'act_loc4_physical_rating': 'physical_rating', 
    'act_loc4_physical_reason': 'physical_reason',
    'act_loc4_social_rating': 'social_rating', 
    'act_loc4_social_reason': 'social_reason', 
    'act_loc4_relax_rating': 'relax_rating', 
    'act_loc4_relax_reason': 'relax_reason', 
    'act_loc4_commute_rating': 'commute_rating', 
    'act_loc4_commute_reason': 'commute_reason', 
    'act_loc4_children_rating': 'children_rating', 
    'act_loc4_children_reason': 'children_reason'})

In [None]:
activity_perceptions_5 = perceptions[[
    'prolific_id',
    'loc_ifr_5', 'act_loc5_clicks',
    'act_loc5_physical_rating', 'act_loc5_physical_reason',
    'act_loc5_social_rating', 'act_loc5_social_reason',
    'act_loc5_relax_rating', 'act_loc5_relax_reason',
    'act_loc5_commute_rating', 'act_loc5_children_rating',
    'act_loc5_commute_reason', 'act_loc5_children_reason']]

activity_perceptions_5 = activity_perceptions_5.rename(columns={
    'loc_ifr_5': 'iframe', 
    'act_loc5_clicks': 'clicks',
    'act_loc5_physical_rating': 'physical_rating', 
    'act_loc5_physical_reason': 'physical_reason',
    'act_loc5_social_rating': 'social_rating', 
    'act_loc5_social_reason': 'social_reason', 
    'act_loc5_relax_rating': 'relax_rating', 
    'act_loc5_relax_reason': 'relax_reason', 
    'act_loc5_commute_rating': 'commute_rating', 
    'act_loc5_commute_reason': 'commute_reason', 
    'act_loc5_children_rating': 'children_rating', 
    'act_loc5_children_reason': 'children_reason'})

In [None]:
activity_perceptions = pd.concat([
    activity_perceptions_1, 
    activity_perceptions_2, 
    activity_perceptions_3, 
    activity_perceptions_4, 
    activity_perceptions_5
])
activity_perceptions.reset_index(inplace=True, drop=True)

In [None]:
activity_mapping = {
    'Never': 0,
    'Rarely': 1,
    'Sometimes': 2,
    'Often': 3,
    'Always': 4}

activity_perceptions['physical_rating_num'] = activity_perceptions['physical_rating'].map(activity_mapping)
activity_perceptions['social_rating_num'] = activity_perceptions['social_rating'].map(activity_mapping)
activity_perceptions['relax_rating_num'] = activity_perceptions['relax_rating'].map(activity_mapping)
activity_perceptions['commute_rating_num'] = activity_perceptions['commute_rating'].map(activity_mapping)
activity_perceptions['children_rating_num'] = activity_perceptions['children_rating'].map(activity_mapping)

In [None]:
if print_and_plot:
    print(len(activity_perceptions))

In [None]:
activity_perceptions = activity_perceptions[activity_perceptions.iframe.notna()]

In [None]:
if print_and_plot:
    print(len(activity_perceptions))

#### for greenness

In [None]:
greenness_perceptions_1 = perceptions[[
    'prolific_id',
    'loc_ifr_1', 'green_loc1_clicks',
    'green_loc1_rating', 'green_loc1_reason']]

greenness_perceptions_1 = greenness_perceptions_1.rename(columns={
    'loc_ifr_1': 'iframe', 
    'green_loc1_clicks': 'clicks',
    'green_loc1_rating': 'rating', 
    'green_loc1_reason': 'reason'})

In [None]:
greenness_perceptions_2 = perceptions[[
    'prolific_id',
    'loc_ifr_2', 'green_loc2_clicks',
    'green_loc2_rating', 'green_loc2_reason']]

greenness_perceptions_2 = greenness_perceptions_2.rename(columns={
    'loc_ifr_2': 'iframe', 
    'green_loc2_clicks': 'clicks',
    'green_loc2_rating': 'rating', 
    'green_loc2_reason': 'reason'})

In [None]:
greenness_perceptions_3 = perceptions[[
    'prolific_id',
    'loc_ifr_3', 'green_loc3_clicks',
    'green_loc3_rating', 'green_loc3_reason']]

greenness_perceptions_3 = greenness_perceptions_3.rename(columns={
    'loc_ifr_3': 'iframe', 
    'green_loc3_clicks': 'clicks',
    'green_loc3_rating': 'rating', 
    'green_loc3_reason': 'reason'})

In [None]:
greenness_perceptions_4 = perceptions[[
    'prolific_id',
    'loc_ifr_4', 'green_loc4_clicks',
    'green_loc4_rating', 'green_loc4_reason']]

greenness_perceptions_4 = greenness_perceptions_4.rename(columns={
    'loc_ifr_4': 'iframe', 
    'green_loc4_clicks': 'clicks',
    'green_loc4_rating': 'rating', 
    'green_loc4_reason': 'reason'})

In [None]:
greenness_perceptions_5 = perceptions[[
    'prolific_id',
    'loc_ifr_5', 'green_loc5_clicks',
    'green_loc5_rating', 'green_loc5_reason']]

greenness_perceptions_5 = greenness_perceptions_5.rename(columns={
    'loc_ifr_5': 'iframe', 
    'green_loc5_clicks': 'clicks',
    'green_loc5_rating': 'rating', 
    'green_loc5_reason': 'reason'})

In [None]:
greenness_perceptions = pd.concat([
    greenness_perceptions_1, 
    greenness_perceptions_2, 
    greenness_perceptions_3, 
    greenness_perceptions_4, 
    greenness_perceptions_5
])
greenness_perceptions.reset_index(inplace=True, drop=True)

In [None]:
greenness_perceptions = greenness_perceptions.rename(columns={
    'rating': 'greenness_rating',
    'reason': 'greenness_reason'
})

In [None]:
greenness_mapping = {
    'Not at all': 0,
    'A little': 1,
    'Neutral': 2,
    'Fairly': 3,
    'Very': 4}

greenness_perceptions['greenness_rating_num'] = greenness_perceptions['greenness_rating'].map(greenness_mapping)

In [None]:
if print_and_plot:
    print(len(greenness_perceptions))

In [None]:
greenness_perceptions = greenness_perceptions[greenness_perceptions.iframe.notna()]

In [None]:
if print_and_plot:
    print(len(greenness_perceptions))

### filter out perceptions without any clicks (i.e., no panning around)

In [None]:
# first question (activity perceptions) answered without any interactions with (clicks in) the panorama
# filter these perceptions out

activities_noclicks_indices = activity_perceptions[activity_perceptions.clicks==0].index

if print_and_plot:
    print('filtering out {}/{} ({}%) of activity perceptions with unsatisfying panorama interactions'.format(
        len(activities_noclicks_indices),
        len(activity_perceptions),
        round(100*len(activities_noclicks_indices)/len(activity_perceptions), 3)))
activity_perceptions = activity_perceptions[~activity_perceptions.index.isin(activities_noclicks_indices)]

In [None]:
# second question (greenness perceptions) answered without any interactions with (clicks in) the panorama
# and neither interaction with that same panorama beforehand
# filter these perceptions out

greenness_noclicks_indices = greenness_perceptions[greenness_perceptions.clicks==0].index
both_noclicks_indices = activities_noclicks_indices.intersection(greenness_noclicks_indices)

if print_and_plot:
    print('filtering out {}/{} ({}%) of greenness perceptions with unsatisfying panorama interactions'.format(
        len(both_noclicks_indices),
        len(greenness_perceptions),
        round(100*len(both_noclicks_indices)/len(greenness_perceptions), 3)))
greenness_perceptions = greenness_perceptions[~greenness_perceptions.index.isin(both_noclicks_indices)]

## Combine with location-data

#### Merge perceptions with sampled places

In [None]:
activity_perceptions.iframe = activity_perceptions.iframe.str.replace('\n','')
greenness_perceptions.iframe = greenness_perceptions.iframe.str.replace('\n','')
sampled.iframe = sampled.iframe.str.replace(',\n','')

In [None]:
sampled_cols = ['iframe', 'category', 'ndvi_median', 'geometry', 'gsv_pano_id', 'gsv_lat', 'gsv_lng']

In [None]:
activity_perceptions = pd.merge(activity_perceptions, sampled[sampled_cols], on='iframe', how='left')
activity_perceptions = gpd.GeoDataFrame(activity_perceptions, geometry='geometry')
if print_and_plot:
    print(len(activity_perceptions))

In [None]:
greenness_perceptions = pd.merge(greenness_perceptions, sampled[sampled_cols], on='iframe', how='left')
greenness_perceptions = gpd.GeoDataFrame(greenness_perceptions, geometry='geometry')
if print_and_plot:
    print(len(greenness_perceptions))

In [None]:
# check for pano's that were not rated

In [None]:
if print_and_plot:
    print(sampled[~sampled.gsv_pano_id.isin(greenness_perceptions.gsv_pano_id)]['gsv_pano_id'])

#### OSM categories in surroundings of sampled places

In [None]:
activity_perceptions.rename(columns={'category': 'place_category'}, inplace=True)
greenness_perceptions.rename(columns={'category': 'place_category'}, inplace=True)

In [None]:
greenness_perceptions = gpd.sjoin(greenness_perceptions, regular_greenspaces[['geometry', 'category']], how='left')
greenness_perceptions['near_regular_greenspace'] = greenness_perceptions['category'].notnull()
greenness_perceptions = greenness_perceptions[~greenness_perceptions.index.duplicated(keep='first')]
greenness_perceptions.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
greenness_perceptions = gpd.sjoin(greenness_perceptions, pocket_greenspaces[['geometry', 'category']], how='left')
greenness_perceptions['near_pocket_greenspace'] = greenness_perceptions['category'].notnull()
greenness_perceptions = greenness_perceptions[~greenness_perceptions.index.duplicated(keep='first')]
greenness_perceptions.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
greenness_perceptions = gpd.sjoin(greenness_perceptions, squares[['geometry', 'category']], how='left')
greenness_perceptions['near_square'] = greenness_perceptions['category'].notnull()
greenness_perceptions = greenness_perceptions[~greenness_perceptions.index.duplicated(keep='first')]
greenness_perceptions.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
greenness_perceptions = gpd.sjoin(greenness_perceptions, playspaces[['geometry', 'category']], how='left')
greenness_perceptions['near_playspace'] = greenness_perceptions['category'].notnull()
greenness_perceptions = greenness_perceptions[~greenness_perceptions.index.duplicated(keep='first')]
greenness_perceptions.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
greenness_perceptions = gpd.sjoin(greenness_perceptions, streets[['geometry', 'category']], how='left')
greenness_perceptions['near_street'] = greenness_perceptions['category'].notnull()
greenness_perceptions = greenness_perceptions[~greenness_perceptions.index.duplicated(keep='first')]
greenness_perceptions.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
if print_and_plot:
    print(len(greenness_perceptions))

In [None]:
activity_perceptions = gpd.sjoin(activity_perceptions, regular_greenspaces[['geometry', 'category']], how='left')
activity_perceptions['near_regular_greenspace'] = activity_perceptions['category'].notnull()
activity_perceptions = activity_perceptions[~activity_perceptions.index.duplicated(keep='first')]
activity_perceptions.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
activity_perceptions = gpd.sjoin(activity_perceptions, pocket_greenspaces[['geometry', 'category']], how='left')
activity_perceptions['near_pocket_greenspace'] = activity_perceptions['category'].notnull()
activity_perceptions = activity_perceptions[~activity_perceptions.index.duplicated(keep='first')]
activity_perceptions.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
activity_perceptions = gpd.sjoin(activity_perceptions, squares[['geometry', 'category']], how='left')
activity_perceptions['near_square'] = activity_perceptions['category'].notnull()
activity_perceptions = activity_perceptions[~activity_perceptions.index.duplicated(keep='first')]
activity_perceptions.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
activity_perceptions = gpd.sjoin(activity_perceptions, playspaces[['geometry', 'category']], how='left')
activity_perceptions['near_playspace'] = activity_perceptions['category'].notnull()
activity_perceptions = activity_perceptions[~activity_perceptions.index.duplicated(keep='first')]
activity_perceptions.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
activity_perceptions = gpd.sjoin(activity_perceptions, streets[['geometry', 'category']], how='left')
activity_perceptions['near_street'] = activity_perceptions['category'].notnull()
activity_perceptions = activity_perceptions[~activity_perceptions.index.duplicated(keep='first')]
activity_perceptions.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
if print_and_plot:
    print(len(activity_perceptions))

#### Median NDVI in direct surroundings of GSV locations sampled places

In [None]:
activity_perception_points = activity_perceptions.copy()
activity_perception_points['geometry'] = gpd.points_from_xy(activity_perception_points.gsv_lng, activity_perception_points.gsv_lat, crs=gsv_crs).to_crs(local_crs)
activity_perception_points['geometry'] = activity_perception_points.geometry.buffer(radius)

In [None]:
activity_perception_points.reset_index(inplace=True, drop=True)
activity_perception_points.drop(columns=['ndvi_median'], inplace=True)

In [None]:
greenness_perception_points = greenness_perceptions.copy()
greenness_perception_points['geometry'] = gpd.points_from_xy(greenness_perception_points.gsv_lng, greenness_perception_points.gsv_lat, crs=gsv_crs).to_crs(local_crs)
greenness_perception_points['geometry'] = greenness_perception_points.geometry.buffer(radius)

In [None]:
greenness_perception_points.reset_index(inplace=True, drop=True)
greenness_perception_points.drop(columns=['ndvi_median'], inplace=True)

In [None]:
def get_ndvi_values(gdf, geotiff):
    gdf_mask = gdf.to_crs(geotiff.crs).copy()    
    
    nodata = 255

    for i in range(len(gdf_mask)):  
        
        # for each row in the gdf
        coords = [json.loads(gdf_mask.to_json())['features'][i]['geometry']]
        index = int([json.loads(gdf_mask.to_json())['features'][i]['id']][0])
        
        data, out_transform = mask(dataset=geotiff, shapes=coords, filled=True, crop=True, nodata=nodata)
        
        # exclude all nodata values and values below 0 (water)
        data = data[data!=nodata]
        data = data[data>=0]
        
        if len(data)==0:
            gdf_mask.loc[index, 'ndvi_mean'] = None
            gdf_mask.loc[index, 'ndvi_median'] = None
            gdf_mask.loc[index, 'ndvi_max'] = None
        else:
            gdf_mask.loc[index, 'ndvi_mean'] = round(np.mean(data), 3)
            gdf_mask.loc[index, 'ndvi_median'] = round(np.median(data), 3)
            gdf_mask.loc[index, 'ndvi_max'] = round(np.max(data), 3)
    
    gdf_mask = gdf_mask.to_crs(gdf.crs).copy()
    return gdf_mask

In [None]:
greenness_perception_points = get_ndvi_values(greenness_perception_points, geotiff)

In [None]:
activity_perception_points = get_ndvi_values(activity_perception_points, geotiff)

In [None]:
if print_and_plot:
    print(len(greenness_perception_points))

In [None]:
if print_and_plot:
    print(len(activity_perception_points))

#### OSM categories in direct surroundings of GSV locations sampled places

In [None]:
activity_perception_points.rename(columns={'category': 'place_category'}, inplace=True)
greenness_perception_points.rename(columns={'category': 'place_category'}, inplace=True)

In [None]:
greenness_perception_points = gpd.sjoin(greenness_perception_points, regular_greenspaces[['geometry', 'category']], how='left')
greenness_perception_points['near_regular_greenspace'] = greenness_perception_points['category'].notnull()
greenness_perception_points = greenness_perception_points[~greenness_perception_points.index.duplicated(keep='first')]
greenness_perception_points.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
greenness_perception_points = gpd.sjoin(greenness_perception_points, pocket_greenspaces[['geometry', 'category']], how='left')
greenness_perception_points['near_pocket_greenspace'] = greenness_perception_points['category'].notnull()
greenness_perception_points = greenness_perception_points[~greenness_perception_points.index.duplicated(keep='first')]
greenness_perception_points.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
greenness_perception_points = gpd.sjoin(greenness_perception_points, squares[['geometry', 'category']], how='left')
greenness_perception_points['near_square'] = greenness_perception_points['category'].notnull()
greenness_perception_points = greenness_perception_points[~greenness_perception_points.index.duplicated(keep='first')]
greenness_perception_points.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
greenness_perception_points = gpd.sjoin(greenness_perception_points, playspaces[['geometry', 'category']], how='left')
greenness_perception_points['near_playspace'] = greenness_perception_points['category'].notnull()
greenness_perception_points = greenness_perception_points[~greenness_perception_points.index.duplicated(keep='first')]
greenness_perception_points.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
greenness_perception_points = gpd.sjoin(greenness_perception_points, streets[['geometry', 'category']], how='left')
greenness_perception_points['near_street'] = greenness_perception_points['category'].notnull()
greenness_perception_points = greenness_perception_points[~greenness_perception_points.index.duplicated(keep='first')]
greenness_perception_points.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
if print_and_plot:
    print(len(greenness_perception_points))

In [None]:
activity_perception_points = gpd.sjoin(activity_perception_points, regular_greenspaces[['geometry', 'category']], how='left')
activity_perception_points['near_regular_greenspace'] = activity_perception_points['category'].notnull()
activity_perception_points = activity_perception_points[~activity_perception_points.index.duplicated(keep='first')]
activity_perception_points.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
activity_perception_points = gpd.sjoin(activity_perception_points, pocket_greenspaces[['geometry', 'category']], how='left')
activity_perception_points['near_pocket_greenspace'] = activity_perception_points['category'].notnull()
activity_perception_points = activity_perception_points[~activity_perception_points.index.duplicated(keep='first')]
activity_perception_points.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
activity_perception_points = gpd.sjoin(activity_perception_points, squares[['geometry', 'category']], how='left')
activity_perception_points['near_square'] = activity_perception_points['category'].notnull()
activity_perception_points = activity_perception_points[~activity_perception_points.index.duplicated(keep='first')]
activity_perception_points.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
activity_perception_points = gpd.sjoin(activity_perception_points, playspaces[['geometry', 'category']], how='left')
activity_perception_points['near_playspace'] = activity_perception_points['category'].notnull()
activity_perception_points = activity_perception_points[~activity_perception_points.index.duplicated(keep='first')]
activity_perception_points.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
activity_perception_points = gpd.sjoin(activity_perception_points, streets[['geometry', 'category']], how='left')
activity_perception_points['near_street'] = activity_perception_points['category'].notnull()
activity_perception_points = activity_perception_points[~activity_perception_points.index.duplicated(keep='first')]
activity_perception_points.drop(columns=['category', 'index_right'], inplace=True)

In [None]:
if print_and_plot:
    print(len(activity_perception_points))

## People with extreme greenness ratings

In [None]:
# median greenness per place
greenness_perceptions_perplace = greenness_perceptions[['gsv_pano_id', 'greenness_rating_num']].groupby(['gsv_pano_id']).median()
greenness_perceptions_perplace.rename(columns={'greenness_rating_num': 'greenness_rating_num_medianperplace'}, inplace=True)

In [None]:
# calculate deviations of ratings from median per place
greenness_perceptions_potentialoutliers = greenness_perceptions[['prolific_id', 'gsv_pano_id', 'greenness_rating_num']].merge(greenness_perceptions_perplace[['greenness_rating_num_medianperplace']], on='gsv_pano_id', how='left')
greenness_perceptions_potentialoutliers['greenness_rating_num_absdifference'] = abs(greenness_perceptions_potentialoutliers['greenness_rating_num']-greenness_perceptions_potentialoutliers['greenness_rating_num_medianperplace'])
greenness_perceptions_potentialoutliers['greenness_rating_num_squareddifference'] = (greenness_perceptions_potentialoutliers['greenness_rating_num']-greenness_perceptions_potentialoutliers['greenness_rating_num_medianperplace'])**2

# the same but inverse - calculate deviations of ratings from median per place
greenness_perceptions_potentialoutliers['greenness_rating_num_inverse'] = 4-greenness_perceptions_potentialoutliers['greenness_rating_num']
greenness_perceptions_potentialoutliers['greenness_rating_num_absdifference_inverse'] = abs(greenness_perceptions_potentialoutliers['greenness_rating_num_inverse']-greenness_perceptions_potentialoutliers['greenness_rating_num_medianperplace'])
greenness_perceptions_potentialoutliers['greenness_rating_num_squareddifference_inverse'] = (greenness_perceptions_potentialoutliers['greenness_rating_num_inverse']-greenness_perceptions_potentialoutliers['greenness_rating_num_medianperplace'])**2

In [None]:
# sum deviations per participant
participants_potentialoutliers = greenness_perceptions_potentialoutliers[['prolific_id', 'greenness_rating_num_squareddifference', 'greenness_rating_num_absdifference', 'greenness_rating_num_squareddifference_inverse', 'greenness_rating_num_absdifference_inverse']].groupby(['prolific_id']).sum()

In [None]:
if print_and_plot:
    participants_potentialoutliers.greenness_rating_num_absdifference.hist()

In [None]:
if print_and_plot:
    participants_potentialoutliers.greenness_rating_num_squareddifference.hist()

In [None]:
if print_and_plot:
    participants_potentialoutliers.greenness_rating_num_absdifference_inverse.hist()

In [None]:
if print_and_plot:
    participants_potentialoutliers.greenness_rating_num_squareddifference_inverse.hist()

In [None]:
participants_potentialoutliers['greenness_rating_num_absdifference_regularminusinverse'] = participants_potentialoutliers['greenness_rating_num_absdifference']-participants_potentialoutliers['greenness_rating_num_absdifference_inverse']
participants_potentialoutliers['greenness_rating_num_squareddifference_regularminusinverse'] = participants_potentialoutliers['greenness_rating_num_squareddifference']-participants_potentialoutliers['greenness_rating_num_squareddifference_inverse']

In [None]:
if print_and_plot:
    participants_potentialoutliers.greenness_rating_num_absdifference_regularminusinverse.hist()

In [None]:
if print_and_plot:
    participants_potentialoutliers.greenness_rating_num_squareddifference_regularminusinverse.hist()

In [None]:
if print_and_plot:
    for prolific_id in participants_potentialoutliers[participants_potentialoutliers.greenness_rating_num_squareddifference_regularminusinverse>0].index:
        print(prolific_id)
        indices = greenness_perceptions[greenness_perceptions.prolific_id==prolific_id].index
        for i in indices:
            print(greenness_perceptions['greenness_rating_num'][i], greenness_perceptions['greenness_reason'][i])
        print('')

In [None]:
# seems to have misunderstood the scale: large deviation from median and reasons suggest oppostite ratings
outlier_prolific_ids = ['prolific_id_a', 'prolific_id_b']

In [None]:
# greenness_perceptions.head(10)[['prolific_id', 'greenness_rating_num', 'greenness_rating']]

In [None]:
# greenness_perceptions[greenness_perceptions.prolific_id=='60328ae847873028ddf4e8ef'][['prolific_id', 'greenness_rating_num', 'greenness_rating']]

In [None]:
# greenness_perceptions[greenness_perceptions.prolific_id=='6384f88517c736ff7718bd5f'][['prolific_id', 'greenness_rating_num', 'greenness_rating']]

In [None]:
# invert ratings by these people
for prolific_id in outlier_prolific_ids:
    if prolific_id in greenness_perceptions.prolific_id.to_list():
        if print_and_plot:
            print('Flipping greenness ratings for', prolific_id)
            
        greenness_mapping = {
            4: 'Very',
            3: 'Fairly',
            2: 'Neutral',
            1: 'A little',
            0: 'Not at all'}    
            
        for row in greenness_perceptions[greenness_perceptions.prolific_id==prolific_id].index.to_list():
            new_greenness_rating_num = 4-greenness_perceptions[greenness_perceptions.index==row]['greenness_rating_num'][row]
            new_greenness_rating = greenness_mapping[new_greenness_rating_num]
            greenness_perceptions.at[row, 'greenness_rating_num'] = new_greenness_rating_num
            greenness_perceptions.at[row, 'greenness_rating'] = new_greenness_rating
            
        for row in greenness_perception_points[greenness_perception_points.prolific_id==prolific_id].index.to_list():
            new_greenness_rating_num = 4-greenness_perception_points[greenness_perception_points.index==row]['greenness_rating_num'][row]
            new_greenness_rating = greenness_mapping[new_greenness_rating_num]
            greenness_perception_points.at[row, 'greenness_rating_num'] = new_greenness_rating_num
            greenness_perception_points.at[row, 'greenness_rating'] = new_greenness_rating

In [None]:
# greenness_perceptions.head(10)[['prolific_id', 'greenness_rating_num', 'greenness_rating']]

In [None]:
# greenness_perceptions[greenness_perceptions.prolific_id=='60328ae847873028ddf4e8ef'][['prolific_id', 'greenness_rating_num', 'greenness_rating']]

In [None]:
# greenness_perceptions[greenness_perceptions.prolific_id=='6384f88517c736ff7718bd5f'][['prolific_id', 'greenness_rating_num', 'greenness_rating']]

## Export output pre-processed perceptions for analysis

In [None]:
export_sub_folder = os.path.join(confidential_folder, 'preprocessed', 'radius_{}'.format(radius))

In [None]:
if not os.path.exists(export_sub_folder):
    os.mkdir(export_sub_folder)

In [None]:
output_file = os.path.join(export_sub_folder, 'perceptions_{}.csv'.format(place_name))
perceptions.to_csv(output_file)

In [None]:
output_file = os.path.join(export_sub_folder, 'activity_perceptions_{}.geojson'.format(place_name))
activity_perceptions_output = activity_perceptions.apply(lambda c: c.astype(str) if c.name != "geometry" else c, axis=0)
activity_perceptions_output.to_file(output_file, driver='GeoJSON')

In [None]:
output_file = os.path.join(export_sub_folder, 'activity_perception_points_{}.geojson'.format(place_name))
activity_perception_points_output = activity_perception_points.apply(lambda c: c.astype(str) if c.name != "geometry" else c, axis=0)
activity_perception_points_output.to_file(output_file, driver='GeoJSON')

In [None]:
output_file = os.path.join(export_sub_folder, 'greenness_perceptions_{}.geojson'.format(place_name))
greenness_perceptions_output = greenness_perceptions.apply(lambda c: c.astype(str) if c.name != "geometry" else c, axis=0)
greenness_perceptions_output.to_file(output_file, driver='GeoJSON')

In [None]:
output_file = os.path.join(export_sub_folder, 'greenness_perception_points_{}.geojson'.format(place_name))
greenness_perception_points_output = greenness_perception_points.apply(lambda c: c.astype(str) if c.name != "geometry" else c, axis=0)
greenness_perception_points_output.to_file(output_file, driver='GeoJSON')