ramusus · pix0r · May 6, 2011 · May 6, 2011 · May 6, 2011 · May 6, 2011
diff --git a/.gitignore b/.gitignore
@@ -1 +1,5 @@
+build/
+dist/
 geonames/data
+*.pyc
+*.egg-info
diff --git a/DOWNLOAD b/DOWNLOAD
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,2 @@
+include README.md
+recursive-include geonames/sql *.sql
diff --git a/README b/README
diff --git a/README.md b/README.md
@@ -0,0 +1,44 @@
+===============
+django-geonames
+===============
+
+This is an experimental application for using Geonames data within
+GeoDjango.
+
+Furthermore this is a fork of
+[ramusus django-geonames](https://github.com/lazerscience/django-geonames),
+containing some (osx specific) fixes:
+
+* setup.py includes geonames/sql/* as package_data
+* load_geonames checks to see if its run on OSX <= 10.6.7,
+  because `zcat` has a bug there, so we use `gzcat` instead.
+  Hopefully this gets fixed with 10.7
+
+I also encounter some weird error: `syncdb` wouldnt create the tables
+specified by `geonames/models.py`. I didnt find the reason why this
+happens. However heres a workaround in case you encounter this too:
+
+* add `geonames` to your `INSTALLED_APPS` (as you would anyway)
+* now do `python manage.py sqlall geonames` and copy the sql from the output
+* run `python manage.py dbshell`, paste the copied sql and execute it
+* now that we have our tables created, you can follow the installation
+  instructions
+
+Installation
+============
+
+Note that running all this can take some serious time. The database is
+pretty huge (~200mb WITHOUT alternateNames, which itself weights in
+at ~94mb). So you might want to do `load_geonames` over night, it did
+take my macbook 18h28min to complete with a default 5200rpm drive,
+you might find it a hell lot faster with a 7200rpm drive or some
+SSD.
+
+1. Add `geonames` to your `INSTALLED_APPS`
+2. `python manage.py syncdb` so it creates the tables
+3. `python manage.py download_geonames` will download all the data
+    you might need (and more)
+4. `python manage.py compress_geonames` -- This will gzip the downloaded
+    data, to minimize disk I/O during sql read.
+5. `python manage.py load_geonames` -- Load the data.
+6. Hours later: have fun ;-)
diff --git a/geonames/admin.py b/geonames/admin.py
@@ -1,7 +1,44 @@
 from django.contrib.gis import admin
-from models import Geoname
 
-class GeonameAdmin(admin.OSMGeoAdmin):
+from geonames.models import Geoname, Alternate
+
+from django.utils.translation import ugettext_lazy as _
+from django.contrib.admin import SimpleListFilter
+
+
+class CityListFilter(SimpleListFilter):
+    # Human-readable title which will be displayed in the
+    # right admin sidebar just above the filter options.
+    title = _('is topographically')
+    # Parameter for the filter that will be used in the URL query.
+    parameter_name = 'topo'
+
+    def lookups(self, request, model_admin):
+        return (
+            ('city', _('city or town')),
+            ('country', _('country')),
+            ('continent', _('continent')),
+        )
+
+    def queryset(self, request, queryset):
+        selected = self.value()
+        if selected == 'city':
+            queryset = queryset.filter(fcode='PPL')
+        elif selected == 'country':
+            queryset = queryset.filter(fcode='PCLI')
+        elif selected == 'continent':
+            queryset = queryset.filter(fcode='CONT')
+        return queryset
+
+
+class AlternateInline(admin.TabularInline):
+    model = Alternate
+
+
+class GeonameAdmin(admin.GeoModelAdmin):
     search_fields = ('name',)
-
+    list_display = ('name', 'country', 'timezone')
+    list_filter = (CityListFilter, 'country', 'timezone')
+    inlines = (AlternateInline,)
+
 admin.site.register(Geoname, GeonameAdmin)
diff --git a/geonames/load.py b/geonames/load.py
@@ -1,11 +1,12 @@
-import bz2, gzip, os, zipfile
-from datetime import datetime
+import os
 
 from django.db import transaction
 
-from models import Admin1Code, Admin2Code, TimeZone, Geoname, Alternate
+from geonames.models import Admin1Code, Admin2Code, TimeZone
+
+GEONAMES_DATA = os.path.abspath(
+    os.path.join(os.path.dirname(__file__), 'data'))
 
-GEONAMES_DATA = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data'))
 
 def txt_lengths(txt_file):
     fh = open(os.path.join(GEONAMES_DATA, txt_file))
@@ -18,15 +19,17 @@ def txt_lengths(txt_file):
                 lengths[i] = [n]
             else:
                 lengths[i].append(n)
-    
+
     cols = lengths.keys()
     cols.sort()
     for col in cols:
         print '%d: %d' % (col, max(lengths[col]))
 
+
 def clean(sarr):
     return [s.strip().decode('utf8') for s in sarr]
 
+
 @transaction.commit_on_success
 def run():
     # Loading the Admin1Code models
@@ -35,16 +38,17 @@ def run():
     for line in fh:
         splits = line.split('\t')
         kwargs = dict(zip(fields, clean(splits)))
-        admin1 = Admin1Code.objects.create(**kwargs)
+        Admin1Code.objects.create(**kwargs)
 
     # Loading the Admin2Code models
     fh = open(os.path.join(GEONAMES_DATA, 'admin2Codes.txt'))
     fields = ('code', 'name', 'ascii', 'geonameid')
     for line in fh:
         splits = line.split('\t')
         kwargs = dict(zip(fields, clean(splits)))
-        for key in ('ascii', 'geonameid'): kwargs.pop(key)
-        admin2 = Admin2Code.objects.create(**kwargs)
+        for key in ('ascii', 'geonameid'):
+            kwargs.pop(key)
+        Admin2Code.objects.create(**kwargs)
 
     # Loading the TimeZone models.
     fh = open(os.path.join(GEONAMES_DATA, 'timeZones.txt'))
@@ -53,4 +57,4 @@ def run():
     for line in fh:
         splits = line.split('\t')
         kwargs = dict(zip(fields, clean(splits)))
-        tz = TimeZone.objects.create(**kwargs)    
+        TimeZone.objects.create(**kwargs)
diff --git a/geonames/management/commands/compress_geonames.py b/geonames/management/commands/compress_geonames.py
@@ -1,13 +1,25 @@
-import datetime, gzip, os, sys, zipfile
+import datetime
+import gzip
+import os
+import sys
+import zipfile
 from optparse import make_option
 
 from django.core.management.base import NoArgsCommand
+from django.conf import settings
 
 from geonames import models
-GEONAMES_DATA = os.path.abspath(os.path.join(os.path.dirname(models.__file__), 'data'))
+
+GEONAMES_DATA = getattr(settings,
+        'GEONAMES_DATA',
+        os.path.abspath(os.path.join(os.path.dirname(models.__file__), 'data'))
+        )
+GEONAMES_DATA_PC = getattr(settings,
+        'GEONAMES_DATA_PC',
+        os.path.join(GEONAMES_DATA, 'pc'),
+        )
 
 class Command(NoArgsCommand):
-
     option_list = NoArgsCommand.option_list + (
         make_option('-t', '--time', action='store_true', dest='time', default=False,
                     help='Print the total time in running this command'),
@@ -17,43 +29,47 @@ class Command(NoArgsCommand):
                     help='Do not perform compression on allCountries.zip'),
         make_option('--no-alternates', action='store_true', dest='no_alternates', default=False,
                     help='Do not perform compression on alternateNames.zip'),
-                    )
-
+        make_option('--no-postalcodes', action='store_true', dest='no_postalcodes', default=False,
+                    help='Do not perform compression on postalcodes allCountries.zip'),
+    )
     clear_line = chr(27) + '[2K' + chr(27) +'[G'
 
     def allCountries(self, **options):
         zf = zipfile.ZipFile(os.path.join(GEONAMES_DATA, 'allCountries.zip'))
         gzf = gzip.GzipFile(os.path.join(GEONAMES_DATA, 'allCountries.gz'), 'w')
 
         in_fields = ['geonameid', 'name', 'asciiname', 'alternates', 'latitude', 'longitude',
-                     'fclass', 'fcode', 'country_code', 'cc2', 
+                     'fclass', 'fcode', 'country_code', 'cc2',
                      'admin1', 'admin2', 'admin3', 'admin4',
                      'population', 'elevation', 'topo', 'timezone', 'mod_date']
         out_fields = [f for f in in_fields if not f in ('latitude', 'longitude', 'asciiname')]
         len_fields = ['name', 'asciiname', 'alternates', 'fclass', 'fcode', 'country_code',
                       'cc2', 'admin1', 'admin2', 'admin3', 'admin4', 'timezone']
-        if options['lengths']: lengths = dict([(f, 0) for f in len_fields])
+        if options['lengths']:
+            lengths = dict([(f, 0) for f in len_fields])
 
         contents = zf.read('allCountries.txt').split('\n')
         num_lines = len(contents)
         for i, line in enumerate(contents):
             if line:
                 row = dict(zip(in_fields, map(str.strip, line.split('\t'))))
                 if options['lengths']:
-                    for k in len_fields: lengths[k] = max(len(row[k]), lengths[k])
-
+                    for k in len_fields:
+                        lengths[k] = max(len(row[k]), lengths[k])
+
                 # fixing trailing slash problem in geonames data
                 try:
                     if row['name'][-1:] == "\\":
                         row['name'] = row['name'][0:-1]
                 except:
                     pass
-                    
+
                 try:
                     # Setting integers to 0 so they won't have to be NULL.
                     for key in ('population', 'elevation', 'topo'):
-                        if not row[key]: row[key] = '0'  
-
+                        if not row[key]:
+                            row[key] = '0'
+
                     # Getting the EWKT for the point -- has to be EWKT or else
                     # the insertion of the point will raise a constraint error for
                     # for a non-matching ID.
@@ -68,7 +84,8 @@ def allCountries(self, **options):
 
             if i % 10000 == 0:
                 sys.stdout.write(self.clear_line)
-                sys.stdout.write('Compressing allCountries.txt: %.2f%% (%d/%d)' % ( (100. * i) / num_lines, i, num_lines))
+                sys.stdout.write('Compressing allCountries.txt: %.2f%% (%d/%d)' %
+                                 ((100. * i) / num_lines, i, num_lines))
                 sys.stdout.flush()
 
         gzf.close()
@@ -87,7 +104,8 @@ def alternateNames(self, **options):
         bool_fields = ['preferred', 'short']
         len_fields = ['isolanguage', 'variant']
         out_fields = in_fields
-        if options['lengths']: lengths = dict([(f, 0) for f in len_fields])
+        if options['lengths']:
+            lengths = dict([(f, 0) for f in len_fields])
 
         contents = zf.read('alternateNames.txt').split('\n')
         num_lines = len(contents)
@@ -100,14 +118,16 @@ def alternateNames(self, **options):
                     else:
                         row[bool_field] = '0'
                 if options['lengths']:
-                    for k in len_fields: lengths[k] = max(len(row[k]), lengths[k])
+                    for k in len_fields:
+                        lengths[k] = max(len(row[k]), lengths[k])
                 new_line = '\t'.join([row[k] for k in out_fields])
                 new_line += '\n'
                 gzf.write(new_line)
 
                 if i % 10000 == 0:
                     sys.stdout.write(self.clear_line)
-                    sys.stdout.write('Compressing alternateNames.txt: %.2f%% (%d/%d)' % ( (100. * i) / num_lines, i, num_lines))
+                    sys.stdout.write('Compressing alternateNames.txt: %.2f%% (%d/%d)' %
+                                     ((100. * i) / num_lines, i, num_lines))
                     sys.stdout.flush()
 
         gzf.close()
@@ -118,6 +138,48 @@ def alternateNames(self, **options):
             for fld in len_fields:
                 sys.stdout.write('%s:\t%d\n' % (fld, lengths[fld]))
 
+    def postalCodes(self, **options):
+        zf = zipfile.ZipFile(os.path.join(GEONAMES_DATA_PC, 'allCountries.zip'))
+        gzf = gzip.GzipFile(os.path.join(GEONAMES_DATA_PC, 'allCountries.gz'), 'w')
+
+        in_fields = ['countrycode', 'postalcode', 'placename', 'admin1name', 'admin1code', 'admin2name', 'admin2code', 'admin3name', 'admin3code', 'latitude', 'longitude', 'accuracy']
+        len_fields = ['countrycode', 'postalcode', 'placename', 'admin1name', 'admin1code', 'admin2name', 'admin2code', 'admin3name', 'admin3code']
+        out_fields = in_fields
+        if options['lengths']:
+            lengths = dict([(f, 0) for f in len_fields])
+
+        contents = zf.read('allCountries.txt').split('\n')
+        num_lines = len(contents)
+        for i, line in enumerate(contents):
+            if line:
+                row = dict(zip(in_fields, map(str.strip, line.split('\t'))))
+                if options['lengths']:
+                    for k in len_fields:
+                        lengths[k] = max(len(row[k]), lengths[k])
+
+                if row['latitude'] == '' or row['longitude'] == '':
+                    continue
+
+                if row['accuracy'] == '':
+                    row['accuracy'] = '0'
+
+                new_line = '\t'.join([row[k] for k in out_fields])
+                new_line += '\n'
+                gzf.write(new_line)
+
+                if i % 10000 == 0:
+                    sys.stdout.write(self.clear_line)
+                    sys.stdout.write('Compressing allCountries.txt: %.2f%% (%d/%d)' %
+                                     ((100. * i) / num_lines, i, num_lines))
+                    sys.stdout.flush()
+
+        gzf.close()
+
+        sys.stdout.write('\n')
+
+        if options['lengths']:
+            for fld in len_fields:
+                sys.stdout.write('%s:\t%d\n' % (fld, lengths[fld]))
 
     def handle_noargs(self, **options):
         if options['time']:
@@ -129,5 +191,8 @@ def handle_noargs(self, **options):
         if not options['no_alternates']:
             self.alternateNames(**options)
 
-        if options['time']: 
+        if not options['no_postalcodes']:
+            self.postalCodes(**options)
+
+        if options['time']:
             sys.stdout.write('\nCompleted in %s\n' % (datetime.datetime.now() - start_time))