Skip to content

Commit 5872f3d

Browse files
authored
feat(country_codes): add dictionaries for country codes (#26)
1 parent 3031972 commit 5872f3d

File tree

9 files changed

+286
-62
lines changed

9 files changed

+286
-62
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ resources/whosonfirst/dictionaries/*/*.txt
66

77
# whitelist certain files
88
!resources/whosonfirst/dictionaries/country/wof:country.txt
9-
!resources/whosonfirst/dictionaries/country/wof:shortcode.txt
9+
!resources/whosonfirst/dictionaries/country/wof:country_alpha3.txt
1010
!resources/whosonfirst/dictionaries/country/name:eng_x_preferred.txt
1111
!resources/whosonfirst/dictionaries/dependency/wof:shortcode.txt
1212
!resources/whosonfirst/dictionaries/dependency/name:eng_x_preferred.txt

classification/CountryClassification.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ const Classification = require('./Classification')
22

33
class CountryClassification extends Classification {
44
constructor (confidence, meta) {
5-
super(confidence, meta)
5+
super(0.9, meta)
66
this.public = true
77
this.label = 'country'
88
}

classification/CountryClassification.test.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ module.exports.tests.constructor = (test) => {
77
let c = new Classification()
88
t.true(c.public)
99
t.equals(c.label, 'country')
10-
t.equals(c.confidence, 1.0)
10+
t.equals(c.confidence, 0.9)
1111
t.deepEqual(c.meta, {})
1212
t.end()
1313
})

classifier/WhosOnFirstClassifier.js

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ const placetypes = {
2424
// classifications: [AreaClassification, DependencyClassification]
2525
// },
2626
'country': {
27-
files: ['name:eng_x_preferred.txt'],
27+
files: ['name:eng_x_preferred.txt', 'wof:country.txt', 'wof:country_alpha3.txt'],
2828
classifications: [AreaClassification, CountryClassification]
2929
}
3030
}
@@ -104,7 +104,16 @@ class WhosOnFirstClassifier extends PhraseClassifier {
104104

105105
Object.keys(placetypes).forEach(placetype => {
106106
if (this.tokens[placetype].has(span.norm)) {
107-
placetypes[placetype].classifications.forEach(Class => span.classify(new Class(1.0)))
107+
// do not classify tokens if they already have a 'StopWordClassification'
108+
if (
109+
span.classifications.hasOwnProperty('StopWordClassification') || (
110+
span.graph.length('child') > 0 &&
111+
span.graph.findOne('child').classifications.hasOwnProperty('StopWordClassification')
112+
)
113+
) { return }
114+
115+
// classify tokens
116+
placetypes[placetype].classifications.forEach(Class => span.classify(new Class()))
108117
}
109118
})
110119
}

resources/pelias/dictionaries/whosonfirst/locality/name:eng_x_preferred.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ sf
1010
!temple
1111
!airport
1212
!deli
13+
!us
1314
# remove any localities which share a name with a US state
1415
!alabama
1516
!alaska
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
mac
2+
bhr
3+
btn
4+
lso
5+
pan
6+
mmr
7+
sxm
8+
vat
9+
gha
10+
irq
11+
idn
12+
omn
13+
bfa
14+
jam
15+
esh
16+
mdg
17+
tza
18+
afg
19+
kor
20+
guf
21+
bwa
22+
lao
23+
zwe
24+
cmr
25+
ben
26+
lbr
27+
sau
28+
eth
29+
com
30+
ecu
31+
vut
32+
ner
33+
tto
34+
ago
35+
bdi
36+
gnq
37+
tha
38+
abw
39+
qat
40+
rwa
41+
mdv
42+
kaz
43+
lka
44+
isr
45+
ven
46+
dji
47+
hnd
48+
tcd
49+
ken
50+
plw
51+
grd
52+
bhs
53+
and
54+
png
55+
reu
56+
pry
57+
mus
58+
khm
59+
irn
60+
sen
61+
lca
62+
spm
63+
mkd
64+
som
65+
mwi
66+
gtm
67+
caf
68+
tur
69+
blr
70+
guy
71+
kwt
72+
twn
73+
alb
74+
gab
75+
syr
76+
glp
77+
jor
78+
jpn
79+
fsm
80+
hti
81+
cyp
82+
mng
83+
cuw
84+
sur
85+
civ
86+
dza
87+
ton
88+
imn
89+
npl
90+
sle
91+
ind
92+
bgd
93+
hkg
94+
cri
95+
brb
96+
cck
97+
yem
98+
dma
99+
arg
100+
phl
101+
ury
102+
tjk
103+
col
104+
per
105+
myt
106+
atg
107+
mtq
108+
lbn
109+
nam
110+
cog
111+
slv
112+
ggy
113+
kna
114+
mli
115+
zmb
116+
vct
117+
blz
118+
are
119+
egy
120+
tls
121+
slb
122+
jey
123+
nic
124+
gmb
125+
sgp
126+
tuv
127+
bih
128+
bol
129+
uga
130+
lby
131+
swz
132+
prk
133+
cod
134+
uzb
135+
tgo
136+
ssd
137+
pak
138+
syc
139+
mhl
140+
mne
141+
tkm
142+
cub
143+
mrt
144+
wsm
145+
rus
146+
gin
147+
mar
148+
chn
149+
tun
150+
kir
151+
dom
152+
ata
153+
aze
154+
cpv
155+
ant
156+
moz
157+
nga
158+
mys
159+
nru
160+
brn
161+
sdn
162+
fji
163+
gnb
164+
kgz
165+
vnm
166+
stp
167+
arm
168+
eri
169+
aut
170+
aus
171+
bel
172+
bgr
173+
bra
174+
can
175+
che
176+
chl
177+
cze
178+
deu
179+
dnk
180+
esp
181+
est
182+
fin
183+
fra
184+
gbr
185+
geo
186+
grc
187+
grl
188+
hrv
189+
hun
190+
irl
191+
isl
192+
ita
193+
xkx
194+
lie
195+
ltu
196+
lux
197+
lva
198+
mco
199+
mda
200+
mex
201+
mlt
202+
nld
203+
nor
204+
nzl
205+
pol
206+
prt
207+
pse
208+
rou
209+
srb
210+
smr
211+
svk
212+
svn
213+
swe
214+
usa
215+
ukr
216+
zaf

test/addressit.usa.test.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ const testcase = (test, common) => {
9494
{ place: 'Mt Tabor Park' },
9595
{ housenumber: '6220' }, { street: 'SE Salmon St' },
9696
{ locality: 'Portland' }, { region: 'OR' },
97-
{ postcode: '97215' }
97+
{ postcode: '97215' }, { country: 'USA' }
9898
], true)
9999

100100
// assert('Mt Tabor Park', [], true)

test/functional.test.js

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,8 @@ const testcase = (test, common) => {
8383

8484
// do not parse 'aus' as a locality if it follows a region
8585
assert('new south wales aus', [
86-
[{ region: 'new south wales' }],
87-
[{ locality: 'south wales' }]]
88-
)
86+
{ region: 'new south wales' }, { country: 'aus' }
87+
], true)
8988
}
9089

9190
module.exports.all = (tape, common) => {

0 commit comments

Comments
 (0)