Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

initial commit

  • Loading branch information...
commit e41aec229efafb333c97e56901ada9ed22aed427 0 parents
@ramusus ramusus authored
27 LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2008, Justin Bronn
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of geonames nor the names of its contributors may be used
+ to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8 README
@@ -0,0 +1,8 @@
+This is an experimental application for using Geonames data within
+GeoDjango. To get started download the necessary data (read
+`DOWNLOAD` in `data` subdirectory), add `geonames` to your
+`INSTALLED_APPS`, and run the following management commands.
+These commands will take a while to run.
+
+ $ ./manage.py compress_geonames
+ $ ./manage.py load_geonames
0  __init__.py
No changes.
7 admin.py
@@ -0,0 +1,7 @@
+from django.contrib.gis import admin
+from models import Geoname
+
+class GeonameAdmin(admin.OSMGeoAdmin):
+ search_fields = ('name',)
+
+admin.site.register(Geoname, GeonameAdmin)
12 data/DOWNLOAD
@@ -0,0 +1,12 @@
+Geonames data may be downloaded from:
+
+ http://download.geonames.org/export/dump/
+
+The management commands are written assuming that the following data
+files are downloaded:
+
+ allCountries.zip
+ alternateNames.zip
+
+Other data files, as they are supported, should also be placed in
+here.
2  data/create_alternate_indexes.sql
@@ -0,0 +1,2 @@
+CREATE INDEX "geonames_alternate_geoname_id" ON "geonames_alternate" ("geoname_id");
+CREATE INDEX "geonames_alternate_variant" ON "geonames_alternate" ("variant");
12 data/create_geoname_indexes.sql
@@ -0,0 +1,12 @@
+CREATE INDEX "geonames_geoname_point_id" ON "geonames_geoname" USING GIST ( "point" GIST_GEOMETRY_OPS );
+CREATE INDEX "geonames_geoname_name" ON "geonames_geoname" ("name");
+CREATE INDEX "geonames_geoname_fclass" ON "geonames_geoname" ("fclass");
+CREATE INDEX "geonames_geoname_fcode" ON "geonames_geoname" ("fcode");
+CREATE INDEX "geonames_geoname_country" ON "geonames_geoname" ("country");
+CREATE INDEX "geonames_geoname_admin1" ON "geonames_geoname" ("admin1");
+CREATE INDEX "geonames_geoname_admin2" ON "geonames_geoname" ("admin2");
+CREATE INDEX "geonames_geoname_admin3" ON "geonames_geoname" ("admin3");
+CREATE INDEX "geonames_geoname_admin4" ON "geonames_geoname" ("admin4");
+CREATE INDEX "geonames_geoname_population" ON "geonames_geoname" ("population");
+CREATE INDEX "geonames_geoname_elevation" ON "geonames_geoname" ("elevation");
+CREATE INDEX "geonames_geoname_topo" ON "geonames_geoname" ("topo");
2  data/drop_alternate_indexes.sql
@@ -0,0 +1,2 @@
+DROP INDEX "geonames_alternate_geoname_id";
+DROP INDEX "geonames_alternate_variant";
12 data/drop_geoname_indexes.sql
@@ -0,0 +1,12 @@
+DROP INDEX "geonames_geoname_point_id";
+DROP INDEX "geonames_geoname_name";
+DROP INDEX "geonames_geoname_fclass";
+DROP INDEX "geonames_geoname_fcode";
+DROP INDEX "geonames_geoname_country";
+DROP INDEX "geonames_geoname_admin1";
+DROP INDEX "geonames_geoname_admin2";
+DROP INDEX "geonames_geoname_admin3";
+DROP INDEX "geonames_geoname_admin4";
+DROP INDEX "geonames_geoname_population";
+DROP INDEX "geonames_geoname_elevation";
+DROP INDEX "geonames_geoname_topo";
56 load.py
@@ -0,0 +1,56 @@
+import bz2, gzip, os, zipfile
+from datetime import datetime
+
+from django.db import transaction
+
+from models import Admin1Code, Admin2Code, TimeZone, Geoname, Alternate
+
+GEONAMES_DATA = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data'))
+
+def txt_lengths(txt_file):
+ fh = open(os.path.join(GEONAMES_DATA, txt_file))
+ lengths = {}
+ for line in fh:
+ splits = line.split('\t')
+ for i, col in enumerate(splits):
+ n = len(col.strip())
+ if not i in lengths:
+ lengths[i] = [n]
+ else:
+ lengths[i].append(n)
+
+ cols = lengths.keys()
+ cols.sort()
+ for col in cols:
+ print '%d: %d' % (col, max(lengths[col]))
+
+def clean(sarr):
+ return [s.strip().decode('utf8') for s in sarr]
+
+@transaction.commit_on_success
+def run():
+ # Loading the Admin1Code models
+ fh = open(os.path.join(GEONAMES_DATA, 'admin1Codes.txt'))
+ fields = ('code', 'name')
+ for line in fh:
+ splits = line.split('\t')
+ kwargs = dict(zip(fields, clean(splits)))
+ admin1 = Admin1Code.objects.create(**kwargs)
+
+ # Loading the Admin2Code models
+ fh = open(os.path.join(GEONAMES_DATA, 'admin2Codes.txt'))
+ fields = ('code', 'name', 'ascii', 'geonameid')
+ for line in fh:
+ splits = line.split('\t')
+ kwargs = dict(zip(fields, clean(splits)))
+ for key in ('ascii', 'geonameid'): kwargs.pop(key)
+ admin2 = Admin2Code.objects.create(**kwargs)
+
+ # Loading the TimeZone models.
+ fh = open(os.path.join(GEONAMES_DATA, 'timeZones.txt'))
+ fields = ('tzid', 'gmt_offset', 'dst_offset')
+ header = fh.next()
+ for line in fh:
+ splits = line.split('\t')
+ kwargs = dict(zip(fields, clean(splits)))
+ tz = TimeZone.objects.create(**kwargs)
0  management/__init__.py
No changes.
0  management/commands/__init__.py
No changes.
126 management/commands/compress_geonames.py
@@ -0,0 +1,126 @@
+import datetime, gzip, os, sys, zipfile
+from optparse import make_option
+
+from django.core.management.base import NoArgsCommand
+
+from geonames import models
+GEONAMES_DATA = os.path.abspath(os.path.join(os.path.dirname(models.__file__), 'data'))
+
+class Command(NoArgsCommand):
+
+ option_list = NoArgsCommand.option_list + (
+ make_option('-t', '--time', action='store_true', dest='time', default=False,
+ help='Print the total time in running this command'),
+ make_option('-l', '--lengths', action='store_true', dest='lengths', default=False,
+ help='Print the lengths for each of the fields.'),
+ make_option('--no-countries', action='store_true', dest='no_countries', default=False,
+ help='Do not perform compression on allCountries.zip'),
+ make_option('--no-alternates', action='store_true', dest='no_alternates', default=False,
+ help='Do not perform compression on alternateNames.zip'),
+ )
+
+ clear_line = chr(27) + '[2K' + chr(27) +'[G'
+
+ def allCountries(self, **options):
+ zf = zipfile.ZipFile(os.path.join(GEONAMES_DATA, 'allCountries.zip'))
+ gzf = gzip.GzipFile(os.path.join(GEONAMES_DATA, 'allCountries.gz'), 'w')
+
+ in_fields = ['geonameid', 'name', 'asciiname', 'alternates', 'latitude', 'longitude',
+ 'fclass', 'fcode', 'country_code', 'cc2',
+ 'admin1', 'admin2', 'admin3', 'admin4',
+ 'population', 'elevation', 'topo', 'timezone', 'mod_date']
+ out_fields = [f for f in in_fields if not f in ('latitude', 'longitude', 'asciiname')]
+ len_fields = ['name', 'asciiname', 'alternates', 'fclass', 'fcode', 'country_code',
+ 'cc2', 'admin1', 'admin2', 'admin3', 'admin4', 'timezone']
+ if options['lengths']: lengths = dict([(f, 0) for f in len_fields])
+
+ contents = zf.read('allCountries.txt').split('\n')
+ num_lines = len(contents)
+ for i, line in enumerate(contents):
+ if line:
+ row = dict(zip(in_fields, map(str.strip, line.split('\t'))))
+ if options['lengths']:
+ for k in len_fields: lengths[k] = max(len(row[k]), lengths[k])
+
+ try:
+ # Setting integers to 0 so they won't have to be NULL.
+ for key in ('population', 'elevation', 'topo'):
+ if not row[key]: row[key] = '0'
+
+ # Getting the EWKT for the point -- has to be EWKT or else
+ # the insertion of the point will raise a constraint error for
+ # for a non-matching ID.
+ wkt = 'SRID=4326;POINT(%s %s)' % (row['longitude'], row['latitude'])
+ except KeyError:
+ sys.stderr.write('Invalid row (line %d):\n' % i)
+ sys.stderr.write('%s\n' % str(row))
+ else:
+ new_line = '\t'.join([row[k] for k in out_fields])
+ new_line += '\t%s\n' % wkt
+ gzf.write(new_line)
+
+ if i % 10000 == 0:
+ sys.stdout.write(self.clear_line)
+ sys.stdout.write('Compressing allCountries.txt: %.2f%% (%d/%d)' % ( (100. * i) / num_lines, i, num_lines))
+ sys.stdout.flush()
+
+ gzf.close()
+
+ sys.stdout.write('\n')
+
+ if options['lengths']:
+ for fld in len_fields:
+ sys.stdout.write('%s:\t%d\n' % (fld, lengths[fld]))
+
+ def alternateNames(self, **options):
+ zf = zipfile.ZipFile(os.path.join(GEONAMES_DATA, 'alternateNames.zip'))
+ gzf = gzip.GzipFile(os.path.join(GEONAMES_DATA, 'alternateNames.gz'), 'w')
+
+ in_fields = ['alternateid', 'geoname_id', 'isolanguage', 'variant', 'preferred', 'short']
+ bool_fields = ['preferred', 'short']
+ len_fields = ['isolanguage', 'variant']
+ out_fields = in_fields
+ if options['lengths']: lengths = dict([(f, 0) for f in len_fields])
+
+ contents = zf.read('alternateNames.txt').split('\n')
+ num_lines = len(contents)
+ for i, line in enumerate(contents):
+ if line:
+ row = dict(zip(in_fields, map(str.strip, line.split('\t'))))
+ for bool_field in bool_fields:
+ if row[bool_field]:
+ row[bool_field] = '1'
+ else:
+ row[bool_field] = '0'
+ if options['lengths']:
+ for k in len_fields: lengths[k] = max(len(row[k]), lengths[k])
+ new_line = '\t'.join([row[k] for k in out_fields])
+ new_line += '\n'
+ gzf.write(new_line)
+
+ if i % 10000 == 0:
+ sys.stdout.write(self.clear_line)
+ sys.stdout.write('Compressing alternateNames.txt: %.2f%% (%d/%d)' % ( (100. * i) / num_lines, i, num_lines))
+ sys.stdout.flush()
+
+ gzf.close()
+
+ sys.stdout.write('\n')
+
+ if options['lengths']:
+ for fld in len_fields:
+ sys.stdout.write('%s:\t%d\n' % (fld, lengths[fld]))
+
+
+ def handle_noargs(self, **options):
+ if options['time']:
+ start_time = datetime.datetime.now()
+
+ if not options['no_countries']:
+ self.allCountries(**options)
+
+ if not options['no_alternates']:
+ self.alternateNames(**options)
+
+ if options['time']:
+ sys.stdout.write('\nCompleted in %s\n' % (datetime.datetime.now() - start_time))
87 management/commands/load_geonames.py
@@ -0,0 +1,87 @@
+import datetime, os, sys
+from optparse import make_option
+
+from django.db import connection, models
+from django.core.management import call_command, sql, color
+from django.core.management.base import NoArgsCommand
+from django.conf import settings
+from django.contrib.gis.db.backend.postgis.creation import get_cmd_options
+
+from geonames import models as m
+Alternate = m.Alternate
+Geoname = m.Geoname
+GEONAMES_DATA = os.path.abspath(os.path.join(os.path.dirname(m.__file__), 'data'))
+
+class Command(NoArgsCommand):
+
+ option_list = NoArgsCommand.option_list + (
+ make_option('-t', '--time', action='store_true', dest='time', default=False,
+ help='Print the total time in running this command'),
+ make_option('--no-alternates', action='store_true', dest='no_alternates', default=False,
+ help='Disable loading of the Geonames alternate names data.'),
+ make_option('--no-geonames', action='store_true', dest='no_geonames', default=False,
+ help='Disable loading of the Geonames data.'),
+ )
+
+ def handle_noargs(self, **options):
+ if options['time']: start_time = datetime.datetime.now()
+
+ # Making sure the db tables exist.
+ call_command('syncdb')
+ db_table = Geoname._meta.db_table
+
+ db_opts = get_cmd_options(settings.DATABASE_NAME)
+
+ fromfile_cmd = 'psql %(db_opts)s -f %(sql_file)s'
+ fromfile_args = {'db_opts' : db_opts,
+ }
+
+ ### COPY'ing into the Geonames table ###
+
+ # Executing a shell command that pipes the unzipped data to PostgreSQL
+ # using the `COPY` directive. This builds the database directly from
+ # the file made by the `compress_geonames` command, and eliminates the
+ # overhead from using the ORM. Moreover, copying from a gzipped file
+ # reduces disk I/O.
+ copy_sql = "COPY %s (geonameid,name,alternates,fclass,fcode,country,cc2,admin1,admin2,admin3,admin4,population,elevation,topo,timezone,moddate,point) FROM STDIN;" % db_table
+ copy_cmd = 'gzcat %(gz_file)s | psql %(db_opts)s -c "%(copy_sql)s"'
+ copy_args = {'gz_file' : os.path.join(GEONAMES_DATA, 'allCountries.gz'),
+ 'db_opts' : db_opts,
+ 'copy_sql' : copy_sql
+ }
+
+ # Printing the copy command and executing it.
+ if not options['no_geonames']:
+ fromfile_args['sql_file'] = os.path.join(GEONAMES_DATA, 'drop_geoname_indexes.sql')
+ print(fromfile_cmd % fromfile_args)
+ os.system(fromfile_cmd % fromfile_args)
+ print(copy_cmd % copy_args)
+ os.system(copy_cmd % copy_args)
+ fromfile_args['sql_file'] = os.path.join(GEONAMES_DATA, 'create_geoname_indexes.sql')
+ print(fromfile_cmd % fromfile_args)
+ os.system(fromfile_cmd % fromfile_args)
+ print('Finished PostgreSQL `COPY` from Geonames all countries data file.')
+
+ ### COPY'ing into the Geonames alternate table ###
+
+ db_table = Alternate._meta.db_table
+ copy_sql = "COPY %s (alternateid,geoname_id,isolanguage,variant,preferred,short) FROM STDIN;" % db_table
+ copy_cmd = 'gzcat %(gz_file)s | psql %(db_opts)s -c "%(copy_sql)s"'
+ copy_args = {'gz_file' : os.path.join(GEONAMES_DATA, 'alternateNames.gz'),
+ 'db_opts' : get_cmd_options(settings.DATABASE_NAME),
+ 'copy_sql' : copy_sql
+ }
+
+ if not options['no_alternates']:
+ fromfile_args['sql_file'] = os.path.join(GEONAMES_DATA, 'drop_alternate_indexes.sql')
+ print(fromfile_cmd % fromfile_args)
+ os.system(fromfile_cmd % fromfile_args)
+ print(copy_cmd % copy_args)
+ os.system(copy_cmd % copy_args)
+ print('Finished PostgreSQL `COPY` from Geonames alternate names data file.')
+ fromfile_args['sql_file'] = os.path.join(GEONAMES_DATA, 'create_alternate_indexes.sql')
+ print(fromfile_cmd % fromfile_args)
+ os.system(fromfile_cmd % fromfile_args)
+
+ # Done
+ if options['time']: print('\nCompleted in %s' % (datetime.datetime.now() - start_time))
72 models.py
@@ -0,0 +1,72 @@
+from django.contrib.gis.db import models
+
+class BigIntegerField(models.PositiveIntegerField):
+ def db_type(self):
+ return 'bigint'
+
+### Geonames.org Models ###
+
+class Admin1Code(models.Model):
+ code = models.CharField(max_length=6)
+ name = models.CharField(max_length=58)
+
+ objects = models.GeoManager()
+
+ def __unicode__(self):
+ return u': '.join([self.code, self.name])
+
+class Admin2Code(models.Model):
+ code = models.CharField(max_length=32)
+ name = models.CharField(max_length=46)
+
+ objects = models.GeoManager()
+
+ def __unicode__(self):
+ return u': '.join([self.code, self.name])
+
+class TimeZone(models.Model):
+ tzid = models.CharField(max_length=30)
+ gmt_offset = models.FloatField()
+ dst_offset = models.FloatField()
+
+ objects = models.GeoManager()
+
+ def __unicode__(self):
+ return self.tzid
+
+class Geoname(models.Model):
+ geonameid = models.PositiveIntegerField(primary_key=True, unique=True)
+ name = models.CharField(max_length=154, db_index=True)
+ alternates = models.TextField(blank=True)
+ fclass = models.CharField(max_length=1, db_index=True)
+ fcode = models.CharField(max_length=5, db_index=True)
+ country = models.CharField(max_length=2, blank=True, db_index=True)
+ cc2 = models.CharField('Alternate Country Code', max_length=32, blank=True)
+ admin1 = models.CharField(max_length=6, blank=True, db_index=True)
+ admin2 = models.CharField(max_length=63, blank=True, db_index=True)
+ admin3 = models.CharField(max_length=10, blank=True, db_index=True)
+ admin4 = models.CharField(max_length=8, blank=True, db_index=True)
+ population = BigIntegerField(db_index=True)
+ elevation = models.IntegerField(db_index=True)
+ topo = models.IntegerField(db_index=True)
+ timezone = models.CharField(max_length=30, blank=True)
+ moddate = models.DateField('Date of Last Modification')
+ point = models.PointField(null=True)
+
+ objects = models.GeoManager()
+
+ def __unicode__(self):
+ return self.name
+
+class Alternate(models.Model):
+ alternateid = models.PositiveIntegerField(primary_key=True, unique=True)
+ geoname = models.ForeignKey(Geoname)
+ isolanguage = models.CharField(max_length=7)
+ variant = models.CharField(max_length=222, db_index=True)
+ preferred = models.BooleanField()
+ short = models.BooleanField()
+
+ objects = models.GeoManager()
+
+ def __unicode__(self):
+ return self.geoname.name
1  views.py
@@ -0,0 +1 @@
+# Create your views here.
Please sign in to comment.
Something went wrong with that request. Please try again.