-
Notifications
You must be signed in to change notification settings - Fork 2
/
2 main processing.sps
249 lines (213 loc) · 6.77 KB
/
2 main processing.sps
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
* Encoding: windows-1252.
* 1. define folder and files.
DEFINE basefolder () 'C:\github\osmbe\traffic-sign-project\' !ENDDEFINE.
* add the location and name of the new traffic sign data.
DEFINE newshape () 'raw-output\20220731_road_signs.csv' !ENDDEFINE.
* when was the previous dataset downloaded?
* date in format DD.MM.YYYY.
DEFINE olddate () '30.05.2022' !ENDDEFINE.
DEFINE newdate () '31.07.2022' !ENDDEFINE.
* 2. load and limit source datasets.
PRESERVE.
SET DECIMAL DOT.
GET DATA /TYPE=TXT
/FILE=basefolder + newshape
/DELCASE=LINE
/DELIMITERS=","
/QUALIFIER='"'
/ARRANGEMENT=DELIMITED
/FIRSTCASE=2
/DATATYPEMIN PERCENTAGE=51.0
/VARIABLES=
FID a50
UUID a50
externe_id_bord A50
locatie_x A15
locatie_y A15
bordcode a60
opstelhoogte f10.0
breedte f10.0
hoogte f10.0
folietype a10
leverancier a50
vorm a10
fabricage_type a50
fabricage_jaar f4.0
fabricage_maand f2.0
besteknummer a50
opmerkingen a50
beheerder a50
datum_plaatsing A10
parameters a250
bevestigingsProfielen f2.0
beugels f2.0
type_bevestiging a50
id_opstelling f10.0
id_aanzicht f10.0
geometry a100
/MAP.
RESTORE.
CACHE.
EXECUTE.
DATASET NAME data WINDOW=FRONT.
* expect about 1.5 million cases.
* silly cleaning of CSV to fit SPSS datatypes.
compute locatie_x=replace(locatie_x,".",",").
compute locatie_y=replace(locatie_y,".",",").
compute datum_plaatsing=replace(datum_plaatsing,"/",".").
alter type locatie_x locatie_y (F12.2).
alter type datum_plaatsing (EDATE10).
match files
/file=*
/keep=FID bordcode parameters datum_plaatsing id_aanzicht id_opstelling locatie_x locatie_y.
string versiondate (a10).
compute versiondate=newdate.
alter type versiondate (EDATE10).
string olddatevar (a10).
compute olddatevar=olddate.
alter type olddatevar (EDATE10).
if datum_plaatsing>versiondate fromthefuture=1.
if datum_plaatsing>olddatevar newsign=1.
* 4. Remove traffic signs of the future (bad date entry, one supposes).
* Keep only traffic signs created since the last run.
FILTER OFF.
USE ALL.
SELECT IF (missing(fromthefuture) & newsign=1).
EXECUTE.
* 5. Keep only interesting traffic signs.
* 5A. Load traffic sign info: from seperateley prepared file.
PRESERVE.
SET DECIMAL COMMA.
GET DATA /TYPE=TXT
/FILE=basefolder + "find-interesting-signs\road_signs_cleaned.csv"
/ENCODING='Locale'
/DELCASE=LINE
/DELIMITERS=";"
/ARRANGEMENT=DELIMITED
/FIRSTCASE=2
/DATATYPEMIN PERCENTAGE=95.0
/VARIABLES=
bordcode A60
name AUTO
help AUTO
wiki AUTO
key AUTO
value AUTO
confusion_possible AUTO
opinion AUTO
/MAP.
RESTORE.
CACHE.
EXECUTE.
DATASET NAME signmetadata WINDOW=FRONT.
DATASET ACTIVATE data.
* Z signs are not in our external data, so let's just give them an extra flag "this is a zonal thing".
if char.index(bordcode,"Z")=1 zone=1.
if char.index(bordcode,"Z")=1 bordcode=char.substr(bordcode,2,59).
* remove trailing /.
compute bordcode=replace(bordcode,"/","").
EXECUTE.
sort cases bordcode (a).
* added signs background info.
MATCH FILES /FILE=*
/TABLE='signmetadata'
/BY bordcode.
EXECUTE.
dataset close signmetadata.
* sign are interesting if the "opinion" about it is "1" (whcih means "we think it's worth mapping their effects").
* but signs are clustered into an "aanzicht" (one pole, one direction) and need to be read together.
* so we only throw signs away if none of the signs in the "aanzicht" are interesting
* for example, G-signs have interesting text only; this will be merged with main sign info if that sign is interesting.
AGGREGATE
/OUTFILE=* MODE=ADDVARIABLES
/BREAK=id_aanzicht
/opinion_max=MAX(opinion)
/n_aanzicht=N.
FILTER OFF.
USE ALL.
SELECT IF (opinion_max > 0).
EXECUTE.
sort cases id_aanzicht (a).
* the "parameters" contains a description of text on the sign, but contains a lot of metatext that is not interesting.
* that is cleaned up here.
compute parameters=replace(parameters,"Vrije ingave,","").
compute parameters=replace(parameters,"neen,","").
compute parameters=replace(parameters,"Neen,","").
compute parameters=replace(parameters,"Tekst,","").
compute parameters=replace(parameters,"ja,","").
compute parameters=replace(parameters,"met opschrift,","").
* duplicate feature analysis creates variables which will help with filtering:
- primaryfirst: the first sign in the aanzicht
- primarylast: the last one
- the sequence number within the aanzicht (0 if there's only one)
- indupgrp: 0 if single sign, 1 if more signs in the aanzicht.
* Identify Duplicate Cases.
SORT CASES BY id_aanzicht(A) FID(A).
MATCH FILES
/FILE=*
/BY id_aanzicht
/FIRST=PrimaryFirst
/LAST=PrimaryLast.
DO IF (PrimaryFirst).
COMPUTE MatchSequence=1-PrimaryLast.
ELSE.
COMPUTE MatchSequence=MatchSequence+1.
END IF.
LEAVE MatchSequence.
FORMATS MatchSequence (f7).
COMPUTE InDupGrp=MatchSequence>0.
EXECUTE.
* make zonal signs readable.
if zone=1 bordcode=concat(ltrim(rtrim(bordcode))," (zone)").
* now we will join all the bordcode info into a single variable, with the relevant result being written into the last sign of the aanzicht.
string bordcode_merge (a100).
* give all their own bordcode.
compute bordcode_merge=bordcode.
* then add the bordcode of the signs "above" in the aanzicht.
if InDupGrp=1 & primaryfirst=0 bordcode_merge=concat(ltrim(rtrim(lag(bordcode_merge)))," | ",ltrim(rtrim(bordcode))).
EXECUTE.
* do the same with the parameters.
string parameters_merge (a100).
compute parameters_merge=parameters.
if InDupGrp=1 & primaryfirst=0 & lag(parameters_merge)~="" parameters_merge=concat(ltrim(rtrim(lag(parameters_merge)))," | ",ltrim(rtrim(parameters))).
compute parameters_merge=RTRIM(rtrim(parameters_merge),"|").
compute parameters_merge=RTRIM(rtrim(parameters_merge),"|").
EXECUTE.
* and now do the same with the name.
string name_merge (a100).
compute name_merge=name.
if InDupGrp=1 & primaryfirst=0 & lag(name_merge)~="" name_merge=concat(ltrim(rtrim(lag(name_merge)))," | ",ltrim(rtrim(name))).
compute name_merge=RTRIM(rtrim(name_merge),"|").
EXECUTE.
* keep only the row with the merged info for everything in the aanzicht.
FILTER OFF.
USE ALL.
SELECT IF (PrimaryLast = 1).
EXECUTE.
* set names for maproulette use.
* maproulette likes the name "id" for an external identifier.
rename variables FID=id.
compute id=replace(id,"Verkeersborden.Vlaanderen_Borden.","").
EXECUTE.
rename variables bordcode_merge=traffic_sign_code.
rename variables parameters_merge=extra_text.
rename variables datum_plaatsing=date_installed.
rename variables name_merge=traffic_sign_description.
match files
/file=*
/keep=id
traffic_sign_code
extra_text
traffic_sign_description
date_installed
locatie_x
locatie_y.
EXECUTE.
SAVE TRANSLATE OUTFILE=basefolder + 'maproulette-readables\road_signs_selection_lambert72.csv'
/TYPE=CSV
/ENCODING='Locale'
/MAP
/REPLACE
/FIELDNAMES
/CELLS=VALUES.
* open in QGIS as Lambert 72, EPSG 31370), save as GeoJSON in ESPG:4326