-
Notifications
You must be signed in to change notification settings - Fork 10
/
locator.py
1485 lines (1275 loc) · 52.3 KB
/
locator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
'''
The locator module allows to get detailed city
information including the region and country of a city from a
location string.
Examples for location strings are:
Amsterdam, Netherlands
Vienna, Austria
Vienna, IL
Paris - Texas
Paris TX
the locator will lookup the cities and try to disambiguate the result based on the country or region information found.
The results in string representationa are:
Amsterdam (NH(North Holland) - NL(Netherlands))
Vienna (9(Vienna) - AT(Austria))
Vienna (IL(Illinois) - US(United States))
Paris (TX(Texas) - US(United States))
Paris (TX(Texas) - US(United States))
Each city returned has a city.region and city.country attribute with the details of the city.
Created on 2020-09-18
@author: wf
'''
import os
import glob
import urllib
import re
import csv
import sys
import gzip
import shutil
import json
from pathlib import Path
from lodstorage.entity import EntityManager
from lodstorage.storageconfig import StorageConfig, StoreMode
from sklearn.neighbors import BallTree
from geograpy.wikidata import Wikidata
from lodstorage.sql import SQLDB
from geograpy.utils import remove_non_ascii
from geograpy import wikidata
from argparse import ArgumentParser
from argparse import RawDescriptionHelpFormatter
from lodstorage.jsonable import JSONAble
from math import radians, cos, sin, asin, sqrt
from geograpy.utils import Profiler, Download
class LocationManager(EntityManager):
'''
a list of locations
'''
def __init__(self, name:str, entityName:str, entityPluralName:str, listName:str=None, tableName:str=None,clazz=None, primaryKey:str=None, config:StorageConfig=None, handleInvalidListTypes=True, filterInvalidListTypes=False, debug=False):
'''
construct me
Args:
name(string): name of this LocationManager
entityName(string): entityType to be managed e.g. Country
entityPluralName(string): plural of the the entityType e.g. Countries
listName(str): the name of the list to hold
tableName(str): the name of the table to use
config(StorageConfig): the configuration to be used if None a default configuration will be used
handleInvalidListTypes(bool): True if invalidListTypes should be converted or filtered
filterInvalidListTypes(bool): True if invalidListTypes should be deleted
debug(boolean): override debug setting when default of config is used via config=None
'''
if config is None:
config=LocationContext.getDefaultConfig()
super().__init__(name=name,
entityName=entityName,
entityPluralName=entityPluralName,
listName=listName,
clazz=clazz,
tableName=tableName,
primaryKey=primaryKey,
config=config,
handleInvalidListTypes=handleInvalidListTypes,
filterInvalidListTypes=filterInvalidListTypes,
debug=debug)
self.balltree = None
self.locationByWikidataID={}
if config is not None and config.mode==StoreMode.SQL:
self.sqldb=self.getSQLDB(config.cacheFile)
def getBallTuple(self, cache:bool=True):
'''
get the BallTuple=BallTree,validList of this location list
Args:
cache(bool): if True calculate and use a cached version otherwise recalculate on
every call of this function
Returns:
BallTree,list: a sklearn.neighbors.BallTree for the given list of locations, list: the valid list of locations
list: valid list of locations
'''
validList = []
if self.balltree is None or not cache:
coordinatesrad = []
for location in self.getList():
if location.lat and location.lon:
latlonrad = (radians(location.lat), radians(location.lon))
coordinatesrad.append(latlonrad)
validList.append(location)
self.ballTuple = BallTree(coordinatesrad, metric='haversine'), validList
return self.ballTuple
def fromCache(self,force=False,getListOfDicts=None,sampleRecordCount=-1):
'''
get me from the cache
'''
super().fromCache(force, getListOfDicts, sampleRecordCount)
self.locationByWikidataID={}
for entry in self.getList():
self.locationByWikidataID[entry.wikidataid]=entry
def getLocationByID(self, wikidataID:str):
'''
Returns the location object that corresponds to the given location
Args:
wikidataID: wikidataid of the location that should be returned
Returns:
Location object
'''
location=None
if wikidataID in self.locationByWikidataID:
location=self.locationByWikidataID[wikidataID]
return location
def add(self,location):
'''
add the given location to me
Args:
location(object): the location to be added and put in my hash map
'''
self.getList().append(location)
if hasattr(location,"wikidataid"):
self.locationByWikidataID[location.wikidataid]=location
@staticmethod
def getBackupDirectory():
home = str(Path.home())
path = f"{home}/.geograpy3"
return path
@staticmethod
def downloadBackupFile(url:str, fileName:str, targetDirectory:str=None, force:bool=False):
'''
Downloads from the given url the zip-file and extracts the file corresponding to the given fileName.
Args:
url: url linking to a downloadable gzip file
fileName: Name of the file that should be extracted from gzip file
targetDirectory(str): download the file this directory
force (bool): True if the download should be forced
Returns:
Name of the extracted file with path to the backup directory
'''
if targetDirectory is None:
backupDirectory=LocationManager.getBackupDirectory()
else:
backupDirectory=targetDirectory
extractTo = f"{backupDirectory}/{fileName}"
# we might want to check whether a new version is available
if Download.needsDownload(extractTo,force=force):
if not os.path.isdir(backupDirectory):
os.makedirs(backupDirectory)
zipped = f"{extractTo}.gz"
print(f"Downloading {zipped} from {url} ... this might take a few seconds")
urllib.request.urlretrieve(url, zipped)
print(f"unzipping {extractTo} from {zipped}")
with gzip.open(zipped, 'rb') as gzipped:
with open(extractTo, 'wb') as unzipped:
shutil.copyfileobj(gzipped, unzipped)
if not os.path.isfile(extractTo):
raise (f"could not extract {fileName} from {zipped}")
return extractTo
@classmethod
def downloadBackupFileFromGitHub(cls,fileName:str, targetDirectory:str=None):
'''
download the given fileName from the github data directory
Args:
fileName(str): the filename to download
targetDirectory(str): download the file this directory
Return:
str: the local file
'''
# Data is downloaded from the github wiki - to modify the data clone the wiki
# as documented in https://github.com/somnathrakshit/geograpy3/wiki
# git clone https://github.com/somnathrakshit/geograpy3.wiki.git
url = f"https://raw.githubusercontent.com/wiki/somnathrakshit/geograpy3/data/{fileName}.gz"
backupFile = LocationManager.downloadBackupFile(url, fileName, targetDirectory)
return backupFile
def getByName(self, *names:str):
'''
Get locations matching given names
Args:
name: Name of the location
Returns:
Returns locations that match the given name
'''
query = f"SELECT * FROM {self.clazz.__name__}Lookup WHERE label IN ({','.join('?'*len(names))})"
locationRecords = self.sqldb.query(query, params=tuple(names))
locations=self._locationsFromLookup(*locationRecords)
return locations
def getLocationsByWikidataId(self, *wikidataId:str):
'''
Returns Location objects for the given wikidataids
Args:
*wikidataId(str): wikidataIds of the locations that should be returned
Returns:
Location objects matching the given wikidataids
'''
wikidataIds=set(wikidataId)
if wikidataIds is None or not wikidataIds:
return
query=f"SELECT * FROM {self.clazz.__name__}Lookup WHERE wikidataid IN ({','.join('?'*len(wikidataIds))})"
locationRecords=self.sqldb.query(query, params=tuple(list(wikidataIds)))
if locationRecords:
locations=self._locationsFromLookup(*locationRecords)
return locations
else:
if self.debug:
print("No Records matching the given wikidataIds found.")
return
def _locationsFromLookup(self, *locationRecords:dict):
'''
Convert given lookup records to the corresponding location objects
Args:
*locationRecords: lookup records of locations
Returns:
List of Location objects based on the given records
'''
if self.clazz is City:
locations=[City.fromCityLookup(record) for record in locationRecords]
elif self.clazz is Region:
locations = [Region.fromRegionLookup(record) for record in locationRecords]
elif self.clazz is Country:
locations = [Country.fromCountryLookup(record) for record in locationRecords]
else:
locations=[self.clazz.fromRecord(lr) for lr in locationRecords]
return locations
def getLocationByIsoCode(self, isoCode:str):
'''
Get possible locations matching the given isoCode
Args:
isoCode: isoCode of possible Locations
Returns:
List of wikidata ids of locations matching the given isoCode
'''
if isinstance(self, RegionManager) or isinstance(self, CountryManager):
if isinstance(self, RegionManager):
query = f"SELECT wikidataid FROM {self.tableName} WHERE iso LIKE (?) OR iso LIKE (?)"
params = (f"%-{isoCode}", isoCode,)
else:
query = f"SELECT wikidataid FROM {self.tableName} WHERE iso LIKE (?)"
params = (isoCode,)
qres = self.sqldb.query(query, params)
locationIds = [record['wikidataid'] for record in qres if 'wikidataid' in record]
return locationIds
else:
return []
class CountryManager(LocationManager):
'''
a list of countries
'''
def __init__(self, name:str="CountryManager", config:StorageConfig=None, debug=False):
super().__init__(name=name,
entityName="country",
entityPluralName="countries",
clazz=Country,
primaryKey="wikidataid",
tableName="countries",
config=config,
debug=debug
)
self.wd=Wikidata()
self.getListOfDicts=self.wd.getCountries
@classmethod
def fromErdem(cls):
'''
get country list provided by Erdem Ozkol https://github.com/erdem
'''
countryManager = CountryManager(name="countries_erdem")
countryJsonUrl = "https://gist.githubusercontent.com/erdem/8c7d26765831d0f9a8c62f02782ae00d/raw/248037cd701af0a4957cce340dabb0fd04e38f4c/countries.json"
with urllib.request.urlopen(countryJsonUrl) as url:
jsonCountryList = json.loads(url.read().decode())
for jsonCountry in jsonCountryList:
country = Country()
country.name = jsonCountry['name']
country.iso = jsonCountry['country_code']
country.lat = jsonCountry['latlng'][0]
country.lon = jsonCountry['latlng'][1]
countryManager.add(country)
return countryManager
class RegionManager(LocationManager):
'''
a list of regions
'''
def __init__(self, name:str="RegionManager", config:StorageConfig=None,debug=False):
super().__init__(name=name,
entityName="region",
entityPluralName="regions",
clazz=Region,
primaryKey="regionId",
tableName="regions",
config=config,
debug=debug
)
self.wd=Wikidata()
self.getListOfDicts=self.wd.getRegions
class CityManager(LocationManager):
'''
a list of cities
'''
def __init__(self, name:str="CityManager",config:StorageConfig=None,debug=False):
super().__init__(name=name,
entityName="city",
entityPluralName="cities",
clazz=City,
primaryKey=None,
tableName="cities",
config=config,
debug=debug
)
self.wd=Wikidata()
self.getListOfDicts=self.wd.getCities
@classmethod
def getJsonFiles(cls,config:StorageConfig) -> list:
'''
get the list of the json files that have my data
Return:
list: a list of json file names
'''
jsondir=f"{config.getCachePath()}/regions"
if not os.path.exists(jsondir):
os.makedirs(jsondir)
jsonFiles = sorted(glob.glob(f"{jsondir}/*.json"), key=lambda path:int(re.findall(r'\d+', path)[0]))
return jsonFiles
class Earth:
radius = 6371.000 # radius of earth in km
class Location(JSONAble):
'''
Represents a Location
'''
def __init__(self, **kwargs):
for key in kwargs.keys():
setattr(self, key, kwargs[key])
@classmethod
def getSamples(cls):
samplesLOD = [{
"name": "Los Angeles",
"wikidataid": "Q65",
"lat": 34.05223,
"lon":-118.24368,
"partOf": "US/CA",
"level": 5,
"locationKind": "City",
"comment": None,
"population": 3976322
}]
return samplesLOD
@staticmethod
def haversine(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
c = 2 * asin(sqrt(a))
return c * Earth.radius
def getNClosestLocations(self, lookupLocationManager, n:int):
"""
Gives a list of up to n locations which have the shortest distance to
me as calculated from the given listOfLocations
Args:
lookupLocationManager(LocationManager): a LocationManager object to use for lookup
n(int): the maximum number of closest locations to return
Returns:
list: a list of result Location/distance tuples
"""
balltree, lookupListOfLocations = lookupLocationManager.getBallTuple()
# check for n+1 entries since we might have my own record in the lookup list which we'll ignore late
distances, indices = balltree.query([[radians(self.lat), radians(self.lon)]], k=n + 1, return_distance=True)
resultLocations = self.balltreeQueryResultToLocationManager(distances[0], indices[0], lookupListOfLocations)
return resultLocations
def getLocationsWithinRadius(self, lookupLocationManager, radiusKm:float):
"""
Gives the n closest locations to me from the given lookupListOfLocations
Args:
lookupLocationManager(LocationManager): a LocationManager object to use for lookup
radiusKm(float): the radius in which to check (in km)
Returns:
list: a list of result Location/distance tuples
"""
balltree, lookupListOfLocations = lookupLocationManager.getBallTuple()
indices, distances = balltree.query_radius([[radians(self.lat), radians(self.lon)]], r=radiusKm / Earth.radius,
return_distance=True)
locationList = self.balltreeQueryResultToLocationManager(distances[0], indices[0], lookupListOfLocations)
return locationList
def balltreeQueryResultToLocationManager(self, distances, indices, lookupListOfLocations):
'''
convert the given ballTree Query Result to a LocationManager
Args:
distances(list): array of distances
indices(list): array of indices
lookupListOfLocations(list): a list of valid locations to use for lookup
Return:
list: a list of result Location/distance tuples
'''
locationListWithDistance = []
for i, locationIndex in enumerate(indices):
distance = distances[i] * Earth.radius
location = lookupListOfLocations[locationIndex]
# do not add myself or any other equivalent location
if not distance < 0.0001:
locationListWithDistance.append((location, distance))
# sort by distance (Ball tree only does this for one of the queries ...)
locationListWithDistance = sorted(locationListWithDistance, key=lambda lwd: lwd[1])
return locationListWithDistance
def distance(self, other) -> float:
'''
calculate the distance to another Location
Args:
other(Location): the other location
Returns:
the haversine distance in km
'''
# see https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points
distance = Location.haversine(self.lon, self.lat, other.lon, other.lat)
return distance
def isKnownAs(self, name) -> bool:
'''
Checks if this location is known under the given name
Args:
name(str): name the location should be checked against
Returns:
True if the given name is either the name of the location or present in the labels of the location
'''
isKnown = False
if hasattr(self, 'labels'):
if name in self.labels:
isKnown = True
if hasattr(self, 'name'):
if name == self.name:
isKnown = True
return isKnown
@staticmethod
def partialDict(record, clazz, keys=None):
if keys is None:
keys = clazz.getSamples()[0].keys()
pDict = {k: v for k, v in record.items() if k in keys}
return pDict
@staticmethod
def mappedDict(record, keyMapList: list):
keyMap = {}
for mkey, mValue in keyMapList:
keyMap[mkey] = mValue
pDict = {keyMap[k]: v for k, v in record.items() if k in keyMap.keys()}
return pDict
@classmethod
def fromRecord(cls,regionRecord: dict):
'''
create a location from a dict record
Args:
regionRecord(dict): the records as returned from a Query
Returns:
Region: the corresponding region information
'''
location=cls()
location.fromDict(regionRecord)
return location
class City(Location):
'''
a single city as an object
'''
def __init__(self, **kwargs):
super(City, self).__init__(**kwargs)
if not hasattr(self, 'level'):
setattr(self, 'level', 5)
if not hasattr(self, 'locationKind'):
setattr(self, 'locationKind', "City")
self._country = None
self._region = None
@classmethod
def getSamples(cls):
samplesLOD = [{
"name": "Los Angeles",
"wikidataid": "Q65",
"lat": 34.05223,
"lon":-118.24368,
"geoNameId": "5368361",
"gndId": "4036361-2",
"partOf": "US/CA",
"level": 5,
"locationKind": "City",
"pop": "3976322",
"regionId": "Q99",
"countryId": "Q30"
}]
return samplesLOD
def __str__(self):
name=self.name if hasattr(self,"name") else "?"
text = f"{name} ({self.region} - {self.country})"
return text
@staticmethod
def fromCityLookup(cityLookupRecord:dict):
'''
create a city from a cityLookupRecord and setting City, Region and Country while at it
Args:
cityRecord(dict): a map derived from the CityLookup view
'''
# we create city, region and country from scratch without true
# object relational mapping and lookup from the locationContext
# this is only useful for small result sets that need no further interlinking
city=City()
# first take all params
cityRecord=City.partialDict(cityLookupRecord,City)
city.fromDict(cityRecord)
regionRecord=City.mappedDict(cityLookupRecord,
[("regionId","wikidataid"),("regionName","name"),("regionIso","iso"),("regionPop","pop"),("regionLat","lat"),("regionLon","lon")])
city.region=Region.fromRecord(regionRecord)
countryRecord=City.mappedDict(cityLookupRecord,
[("countryId","wikidataid"),("countryName","name"),("countryIso","iso"),("countryLat","lat"),("countryLon","lon")])
city.country=Country()
city.country.fromDict(countryRecord)
city.region.country=city.country
return city
def setValue(self, name, record):
'''
set a field value with the given name to
the given record dicts corresponding entry or none
Args:
name(string): the name of the field
record(dict): the dict to get the value from
'''
if name in record:
value = record[name]
else:
value = None
setattr(self, name, value)
@property
def country(self):
return self._country
@country.setter
def country(self, country):
self._country = country
@property
def region(self):
return self._region
@region.setter
def region(self, region):
self._region = region
class Region(Location):
'''
a Region (Subdivision)
'''
def __init__(self, **kwargs):
super(Region, self).__init__(**kwargs)
if not hasattr(self, 'level'):
setattr(self, 'level', 4)
if not hasattr(self, 'locationKind'):
setattr(self,'locationKind', "Region")
self._country = None
@classmethod
def getSamples(cls):
samplesLOD = [{
"name": "California",
"wikidataid": "Q99",
"lat": 37.0,
"lon":-120.0,
"partOf": "US",
"level": 4,
"locationKind": "Region",
"comment": None,
"labels": ["CA", "California"],
"iso": "US-CA",
"country_wikidataid": "Q30"
}]
return samplesLOD
def __str__(self):
text = f"{self.iso}({self.name})"
return text
@property
def country(self):
return self._country
@country.setter
def country(self, country):
self._country = country
@staticmethod
def fromRegionLookup(regionLookupRecord: dict):
'''
create a region from a regionLookupRecord and setting Region and Country while at it
Args:
regionRecord(dict): a map derived from the CityLookup view
'''
# we create region and country from scratch without true
# object relational mapping and lookup from the locationContext
# this is only useful for small result sets that need no further interlinking
region = Region()
# first take all params
regionRecord = Location.partialDict(regionLookupRecord, Region)
region.fromDict(regionRecord)
countryRecord = Location.mappedDict(regionLookupRecord,
[("countryId", "wikidataid"), ("countryName", "name"), ("countryIso", "iso"),
("countryLat", "lat"), ("countryLon", "lon")])
region.country = Country()
region.country.fromDict(countryRecord)
return region
class Country(Location):
'''
a country
'''
def __init__(self, lookupSource='sqlDB', **kwargs):
'''
coonstruct me
'''
super(Country, self).__init__(**kwargs)
if not hasattr(self, 'level'):
setattr(self, 'level', 3)
if not hasattr(self, 'locationKind'):
setattr(self, 'locationKind', "Country")
@classmethod
def getSamples(cls):
samplesLOD = [
{
'wikidataid': 'Q38',
'name': 'Italy',
'iso': 'IT',
'pop': 60317000.0,
'lat': 42.5,
'lon': 12.5,
},
{
"name": "United States of America",
"wikidataid": "Q30",
"lat": 39.82818,
"lon":-98.5795,
"partOf": "North America",
"level": 3,
"locationKind": "Country",
"comment": None,
"labels":["USA", "US", "United States of America"],
"iso":"US"
}, {
}]
return samplesLOD
def __str__(self):
text = f"{self.iso}({self.name})"
return text
@staticmethod
def fromCountryLookup(countryLookupRecord: dict):
'''
create a region from a regionLookupRecord and setting Region and Country while at it
Args:
regionRecord(dict): a map derived from the CityLookup view
'''
# we create region and country from scratch without true
# object relational mapping and lookup from the locationContext
# this is only useful for small result sets that need no further interlinking
country = Country()
countryRecord = Location.partialDict(countryLookupRecord, Region)
country.fromDict(countryRecord)
return country
class LocationContext(object):
'''
Holds LocationManagers of all hierarchy levels and provides methods to traverse through the levels
'''
db_filename="locations.db"
def __init__(self, countryManager:CountryManager, regionManager:RegionManager, cityManager:CityManager, config:StorageConfig):
'''
construct me
Args:
countryManager(CountryManager): the country manager to be used
regionManager(RegionManager): the region manager to be used
cityManager(CityManager): the city manager to be used
'''
self.countryManager = countryManager
self.regionManager = regionManager
self.cityManager = cityManager
self.locator=Locator(storageConfig=config)
def interlinkLocations(self,warnOnDuplicates:bool=True,profile=True):
'''
Interlinks locations by adding the hierarchy references to the locations
Args:
warnOnDuplicates(bool): if there are duplicates warn
'''
profile=Profiler("interlinking Locations", profile=profile)
duplicates=[]
self._countryLookup, _dup = self.countryManager.getLookup("wikidataid")
duplicates.extend(_dup)
self._regionLookup, _dup = self.regionManager.getLookup("wikidataid")
duplicates.extend(_dup)
self._cityLookup, _dup = self.cityManager.getLookup("wikidataid")
duplicates.extend(_dup)
if len(duplicates)>0 and warnOnDuplicates:
print(f"There are {len(duplicates)} duplicate wikidataids in the country,region and city managers used")
if self.debug:
print(duplicates)
# interlink region with country
for region in self.regions:
country = self._countryLookup.get(getattr(region, 'countryId'))
if country is not None and isinstance(country, Country):
region.country = country
# interlink city with region and country
for city in self.cities:
country = self._countryLookup.get(getattr(city, 'countryId'))
if country is not None and isinstance(country, Country):
city.country = country
region = self._regionLookup.get(getattr(city, 'regionId'))
if region is not None and isinstance(region, Region):
city.region = region
_elapsed=profile.time()
def load(self,forceUpdate:bool=False,warnOnDuplicates:bool=False):
'''
load my data
'''
for manager in self.countryManager,self.regionManager,self.cityManager:
manager.fromCache(force=forceUpdate)
self.interlinkLocations(warnOnDuplicates=warnOnDuplicates)
@classmethod
def fromCache(cls, config:StorageConfig=None):
'''
Inits a LocationContext form Cache if existent otherwise init cache
'''
if config is None:
config = cls.getDefaultConfig()
cityManager = CityManager("cities", config=config)
regionManager = RegionManager("regions", config=config)
countryManager = CountryManager("countries", config=config)
locationContext = LocationContext(countryManager, regionManager, cityManager, config)
return locationContext
@staticmethod
def getDefaultConfig() -> StorageConfig:
'''
Returns default StorageConfig
'''
config = StorageConfig(cacheDirName="geograpy3")
cachedir = config.getCachePath()
config.cacheFile = f"{cachedir}/{LocationContext.db_filename}"
return config
@property
def countries(self) -> list:
return self.countryManager.getList()
@property
def regions(self) -> list:
return self.regionManager.getList()
@property
def cities(self) -> list:
return self.cityManager.getList()
def locateLocation(self, *locations, verbose:bool=False):
'''
Get possible locations for the given location names.
Current prioritization of the results is city(ordered by population)→region→country
ToDo: Extend the ranking of the results e.g. matching of multiple location parts increase ranking
Args:
*locations:
verbose(bool): If True combinations of locations names are used to improve the search results. (Increases lookup time)
Returns:
'''
if locations is None or locations is (None):
return
locationParts = []
for location in locations:
if location is not None:
for locationPart in location.split(','):
locationParts.append(locationPart)
# Split locationParts even further
lp=[]
for locationPart in locationParts:
parts=locationPart.split(' ')
lp.extend(parts)
# Spliting by space breakes the look up for cities such as 'Los Angeles'
if verbose:
numberParts=len(parts)
if numberParts>1:
lp.extend([f"{parts[i]} {parts[i+1]}" for i in range(numberParts-1)])
# if numberParts > 2:
# lp.extend([f"{parts[i]} {parts[i + 1]} {parts[i + 2]}" for i in range(numberParts - 2)])
locationParts.extend(lp)
locationParts=list(set(locationParts)) # remove duplicates
cities=self.cityManager.getByName(*locationParts)
regions = self.regionManager.getByName(*locationParts)
countries = self.countryManager.getByName(*locationParts)
# remove locations already identified by location in lower hierarchy
getAttrValues=lambda locations, attr:[getattr(location,attr) for location in locations if hasattr(location, attr)]
excludeRegionIds=getAttrValues(cities, 'regionId')
regions=[region for region in regions if hasattr(region, 'wikidataid') and not region.wikidataid in excludeRegionIds]
excludeCountryIds=[*getAttrValues(cities, "countryId"), *getAttrValues(regions, "countryId")]
countries=[country for country in countries if hasattr(country, 'wikidataid') and not country.wikidataid in excludeCountryIds]
# build final result in the order city→region→country
cities.sort(key=lambda c: int(getattr(c, 'pop', 0)) if getattr(c, 'pop') is not None else 0, reverse=True)
res = [*cities, *regions, *countries]
return res
class Locator(object):
'''
location handling
'''
# singleton instance
locator = None
def __init__(self, db_file=None, correctMisspelling=False, storageConfig:StorageConfig=None, debug=False):
'''
Constructor
Args:
db_file(str): the path to the database file
correctMispelling(bool): if True correct typical misspellings
storageConfig(StorageConfig): the storage Configuration to use
debug(bool): if True show debug information
'''
self.debug = debug
self.correctMisspelling = correctMisspelling
if storageConfig is None:
storageConfig=LocationContext.getDefaultConfig()
self.storageConfig=storageConfig
if db_file is None:
self.db_path = self.storageConfig.getCachePath()
self.db_file = self.storageConfig.cacheFile
else:
self.db_file=db_file
self.view = "CityLookup"
self.sqlDB = SQLDB(self.db_file, errorDebug=True)
self.getAliases()
self.dbVersion = "2021-08-18 16:15:00"
@staticmethod
def resetInstance():
Locator.locator = None
@staticmethod
def getInstance(correctMisspelling=False, debug=False):
'''
get the singleton instance of the Locator. If parameters are changed on further calls
the initial parameters will still be in effect since the original instance will be returned!
Args:
correctMispelling(bool): if True correct typical misspellings
debug(bool): if True show debug information
'''
if Locator.locator is None:
Locator.locator = Locator(correctMisspelling=correctMisspelling, debug=debug)
return Locator.locator
def normalizePlaces(self,places:list):
'''
normalize places
Args:
places(list) a list of places
Return:
list: stripped and aliased list of places
'''
nplaces=[]
for place in places:
place = place.strip()
if place in self.aliases:
place = self.aliases[place]
nplaces.append(place)
return nplaces
def locateCity(self, places:list):
'''
locate a city, region country combination based on the given wordtoken information
Args:
places(list): a list of places derived by splitting a locality e.g. "San Francisco, CA"
leads to "San Francisco", "CA"
Returns:
City: a city with country and region details