Skip to content

Latest commit

 

History

History
1746 lines (1594 loc) · 58.2 KB

TODO.org

File metadata and controls

1746 lines (1594 loc) · 58.2 KB

Annotated function

See toString() for IFn.
(use 'debug.core)
(import 'clojure.lang.AFunction)

(defmethod print-method clojure.lang.AFunction
  [f writer]
  (print-simple (:doc (meta f)) writer))

(defn foo ^{:doc "docstring"} [] 2)

(debug foo
       (with-meta foo {:doc "docstring"}))
(use 'debug.core)

(import 'clojure.lang.IFn)

(defn name-fn [name fn]
  (reify IFn
    (toString [this] name)
    ;; Would have to implement one of these for each `invoke' in IFn:
    ;; there are 22.
    (invoke [this arg1] (fn arg1))))

(defmacro source-fn [name function]
  `(def ~name
     (reify IFn
       (toString [this] (str (quote ~function)))
       (invoke [this arg1#] (~function arg1#)))))

(def plusish (name-fn "(fn [x] (+ x x))"
                      (fn [x] (+ x x))))

(source-fn plusishish (fn [y] (+ y y)))

(debug
 ((reify clojure.lang.IFn
    (toString [this] "harro")
    (invoke [this a]
      ((fn [x] (+ x x))
       a)))
  2)
 (reify clojure.lang.IFn
   (toString [this] "harro")
   (invoke [this a]
     ((fn [x] (+ x x))
      a)))
 plusish
 (plusish 2)
 plusishish
 (plusishish 2)
 )

CANCELED Switch to Clojure-friendly geotools.

  • CLOSING NOTE [2011-12-01 Thu 11:51]
    Should either implement ISeq for features or move to this.

https://github.com/iwillig/geoscript-clj

Tool to convert shapefiles to maps.

Need to have an internal map if we’re going to bucket collisions.

See Hickey’s addURL:

static public void addURL(Object url) throws MalformedURLException{
    URL u = (url instanceof String) ? (new URL((String) url)) : (URL) url;
    ClassLoader ccl = Thread.currentThread().getContextClassLoader();
    if(ccl instanceof DynamicClassLoader)
        ((DynamicClassLoader)ccl).addURL(u);
    else
        throw new IllegalAccessError("Context classloader is not a DynamicClassLoader");
}

It doesn’t work anymore because the ContextClassLoader is no longer a DynamicClassLoader (though it is a URLClassLoader).

(use 'add-classpath.core)

(add-classpath "lib/gt-shapefile-8.0-M3.jar")
(add-classpath "lib/jts-1.12.jar")
(add-classpath "lib/tools.cli-0.2.2-SNAPSHOT.jar")
;; (add-classpath "lib/cadr-1.0.0-SNAPSHOT-standalone.jar")
(add-classpath "lib/lambda-1.0.1-20111019.122151-1.jar")

(use 'debug.core)
(use 'clojure.tools.cli)
;; (use 'cadr.core)
(use 'lambda.core)

(import 'java.net.URLClassLoader)
(import 'java.net.URL)
(import 'java.io.File)

(import 'clojure.lang.AFunction)
(import 'clojure.lang.IFn)
(import 'org.geotools.data.shapefile.ShapefileDataStore)
(import 'org.geotools.data.collection.CollectionFeatureSource)
(import 'com.vividsolutions.jts.io.WKBWriter)

;;; This doesn't work, since it doesn't redefine it in clojure.core or
;;; tools.cli. Can we intern it elsewhere?
;;; <http://stackoverflow.com/a/4599444>
(defmethod print-method AFunction
  [f writer]
  (print-simple (:doc (meta f)) writer))

(defmacro def-monadic [name function]
  "Implements a monadic function whose toString is the source of the
function itself."
  `(def ~name
     (reify IFn
       (toString [this] (str (quote ~function)))
       (invoke [this arg1#] (~function arg1#)))))

(def eval-string
  (λ [string]
     ((comp eval read-string) string)))

(def-monadic default-feature-name
  (λ [feature]
     (.getAttribute feature "NAME10")))

(def-monadic default-feature-geometry
  (λ [feature]
     (.getDefaultGeometry feature)))

(def-monadic default-feature-filter
  (constantly true))

;;; Really should just make an ISeq out of features, so that we can
;;; reduce on it; etc.
(def features
  (λ [shapefile]
     (.features
      (.getFeatures
       (.getFeatureSource
        (new ShapefileDataStore
             (.toURL (.toURI (new File shapefile)))))))))

(def reduce-features
  (λ [f val features]
     (with-open [features features]
       (loop [val val]
         (if (.hasNext features)
           (let [feature (.next features)]
             (recur (f val feature)))
           val)))))

(def do-features
  (λ [f features]
     (with-open [features features]
       (loop []
         (if (.hasNext features)
           (let [feature (.next features)]
             (f feature)
             (recur)))))))

(let [writer (new WKBWriter)]
  (def geometry->hex
    (λ [geometry]
       (WKBWriter/toHex (.write writer geometry)))))

(def print-geometries
  (λ [name hexen]
     (printf "%s\t%s\n" name (apply str (interpose "|" hexen)))))

(def print-geometry
  (λ [name hex]
     (printf "%s\t%s\n" name hex)))

(def print-geometry-map
  (λ [feature-name feature-geometry feature-filter files]
     (doseq [file files]
       (do-features
        (λ [feature]
           (if (feature-filter feature)
             (let [name (feature-name feature)
                   hex (geometry->hex (feature-geometry feature))]
               (print-geometry name hex))))
        (features file)))))

(def print-bucketed-geometry-map
  (λ [feature-name feature-geometry feature-filter files]
     (doseq [[name geometries]
             (reduce
              (λ [name->geometries file]
                 (reduce-features
                  (λ [name->geometries feature]
                     (if (feature-filter feature)
                       (let [name (feature-name feature)
                             hex (geometry->hex (feature-geometry feature))]
                         (assoc name->geometries
                           name
                           (cons hex
                                 (get name->geometries
                                      name
                                      '()))))
                       name->geometries))
                  name->geometries
                  (features file)))
              {}
              files)]
       (printf "%s\t%s\n" name (apply str (interpose "|" geometries))))))

(let [[{feature-name :feature-name
        feature-geometry :feature-geometry
        feature-filter :feature-filter
        bucket-duplicates :bucket-duplicates}
       files
       usage]
      (cli *command-line-args*
           ["-n" "--name" "Extract a name from a feature"
            :name :feature-name
            :parse-fn eval-string
            :default (with-meta default-feature-name {:doc "harro"})]
           ["-g" "--geometry" "Extract a geometry from a feature"
            :name :feature-geometry
            :parse-fn eval-string
            :default default-feature-geometry]
           ["-f" "--filter" "Filter features"
            :name :feature-filter
            :parse-fn eval-string
            :default default-feature-filter]
           ["-b" "--bucket" "Bucket duplicates"
            :name :bucket-duplicates
            :flag true
            :default true])]
  (if (empty? files)
    (println usage)
    (if bucket-duplicates
      (print-bucketed-geometry-map feature-name
                                   feature-geometry
                                   feature-filter
                                   files)
      (print-geometry-map feature-name
                          feature-geometry
                          feature-filter
                          files))))

Tool has parameters for name-extraction, geometry-extraction and filtering; which are clojure functions against the feature.

shp2map in: https://github.com/Factual/data-projects/tree/master/projects; link the executable .jar in wiki? maven? proof of concept: zip codes.

Test against Tiger data.

FIPS codes for states.
addClassPath("lib/gt-shapefile-8.0-M3.jar");
addClassPath("lib/jts-1.12.jar");
addClassPath("lib/guava-10.0.1.jar");

import java.net.URL;
import java.util.HashMap;
import java.nio.file.Files;
import java.nio.file.FileSystems;

import org.geotools.data.shapefile.ShapefileDataStore;
import com.vividsolutions.jts.io.WKBWriter;
import com.vividsolutions.jts.io.WKTReader;
import com.google.common.collect.ImmutableMap;

// for (feature: new ShapefileDataStore(new URL("file:shp/tl_2010_01_state10.shp")).getFeatureSource().getFeatures().toArray()) {
//     print(feature.getDefaultGeometry().getClass());
//     print(feature.getType().getAttributeDescriptors());
//     print(feature.getType().getTypes());
//     print(feature.getAttribute("NAME10"));
// }

FIPStoState = new ImmutableMap.Builder()
    .put("01", "AL")
    .put("02", "AK")
    .put("04", "AZ")
    .put("05", "AR")
    .put("06", "CA")
    .put("08", "CO")
    .put("09", "CT")
    .put("10", "DE")
    .put("11", "DC")
    .put("12", "FL")
    .put("13", "GA")
    .put("15", "HI")
    .put("16", "ID")
    .put("17", "IL")
    .put("18", "IN")
    .put("19", "IA")
    .put("20", "KS")
    .put("21", "KY")
    .put("22", "LA")
    .put("23", "ME")
    .put("24", "MD")
    .put("25", "MA")
    .put("26", "MI")
    .put("27", "MN")
    .put("28", "MS")
    .put("29", "MO")
    .put("30", "MT")
    .put("31", "NE")
    .put("32", "NV")
    .put("33", "NH")
    .put("34", "NJ")
    .put("35", "NM")
    .put("36", "NY")
    .put("37", "NC")
    .put("38", "ND")
    .put("39", "OH")
    .put("40", "OK")
    .put("41", "OR")
    .put("42", "PA")
    .put("44", "RI")
    .put("45", "SC")
    .put("46", "SD")
    .put("47", "TN")
    .put("48", "TX")
    .put("49", "UT")
    .put("50", "VT")
    .put("51", "VA")
    .put("53", "WA")
    .put("54", "WV")
    .put("55", "WI")
    .put("56", "WY")
    .put("60", "AS")
    .put("64", "FM")
    .put("66", "GU")
    .put("68", "MH")
    .put("69", "MP")
    .put("70", "PW")
    .put("72", "PR")
    .put("74", "UM")
    .put("78", "VI")
    .build();

// reader = new WKTReader();
writer = new WKBWriter();

for (feature: new ShapefileDataStore(new URL("file:places/tl_2010_06_place10.shp")).getFeatureSource().getFeatures().toArray()) {
    // print(feature.getDefaultGeometry().getClass());
    // print(feature.getType().getAttributeDescriptors());
    // print(feature.getType().getTypes());
    // print(feature);
    // print(FIPStoState.get(feature.getAttribute("STATEFP10")));
    hex = WKBWriter.toHex(writer.write(feature.getDefaultGeometry()));
    print(feature.getAttribute("NAME10") + "\t" + hex);
    // break;
}

// shapefiles =
//     Files.newDirectoryStream(FileSystems.getDefault().getPath("places"),
//                              "*.shp").iterator();

// for (file: shapefiles) {
//     print(file);
// }

// // for (file: Files.newDirectoryStream(FileSystems.getDefault().getPath("places"), "*.shp")) {
// //     print(file);
// // }

Create tool: here’s a shapefile, here’s a predicate; out: mapfile; value of attribute x = y (regex).

shp2pgsql: creates table, shape file, gives SQL statement;

US: state, city, zip; UK: city, zip(?).

E.g. lower the score based on non-pip; sanity check.

Test point against a list of envelope-geometries.

source("envelope-geometries.bsh");

import com.vividsolutions.jts.geom.Point;
import com.vividsolutions.jts.geom.Coordinate;
import com.vividsolutions.jts.geom.GeometryFactory;
import com.vividsolutions.jts.geom.impl.CoordinateArraySequence;

geometryFactory = new GeometryFactory();

point(latitude, longitude) {
    return new Point(new CoordinateArraySequence(new Coordinate[] {
                new Coordinate(longitude, latitude)
            }),
        geometryFactory);
}

covers(envelopeGeometries, latitude, longitude) {
    point = point(latitude, longitude);
    for (envelopeGeometry: envelopeGeometries) {
        if (envelopeGeometry.envelope.covers(point) &&
            envelopeGeometry.geometry.covers(point)) {
            return true;
        }
    }
    return false;
}

print(covers(envelopeGeometries, 38.6815049, -75.9773377));

Produce envelope-geometries given pipe-delimited hexes.

  • CLOSING NOTE [2011-11-21 Mon 11:10]
    Testing lists of envelope-geometries is defective under beanshell, though.
addClassPath("lib/guava-10.0.1.jar");
addClassPath("lib/jts-1.12.jar");
addClassPath("lib/junit-4.10.jar");
addClassPath("lib/junit.jar");

import java.util.Arrays;
import java.util.LinkedList;
import javax.xml.bind.DatatypeConverter;

import com.google.common.collect.Lists;
import com.google.common.base.Function;
import com.vividsolutions.jts.io.WKTReader;
import com.vividsolutions.jts.io.WKBReader;
import org.junit.runner.JUnitCore;
import org.junit.runner.Request;
// import org.junit.Assert;
import junit.framework.Assert;
import junit.framework.TestCase;
import junit.framework.Test;

envelopeGeometry(envelope, geometry) {
    envelope = envelope;
    geometry = geometry;

    public equals(envelopeGeometry) {
        return this.envelope.equals(envelopeGeometry.envelope) &&
            this.geometry.equals(envelopeGeometry.geometry);
    }

    public hashCode() {
        return this.envelope.hashCode() +
            this.geometry.hashCode();
    }

    return this;
}

public toEnvelopeGeometries(hexen) {
    reader = new WKBReader();
    
    Lists.transform(Arrays.asList(hexen.split("\\|")),
                    new Function() {
                        apply(hex) {
                            geometry = reader.read
                                (DatatypeConverter.parseHexBinary(hex));
                            return envelopeGeometry(geometry.getEnvelope(),
                                                    geometry);
                        }
                    });
}

envelopeGeometries = toEnvelopeGeometries("0101000020E610000077A96CB38CFE52C029C8748D3B574340|0101000020E6100000F78B7ED9E21655C0433058BB375B4040");

testEnvelopeGeometries = new LinkedList() {
        {
            reader = new WKTReader();
            geometry = reader.read("POINT (-75.9773377 38.6815049)");
            add(envelopeGeometry(geometry.getEnvelope(),
                                 geometry));
            geometry = reader.read("POINT (-84.3575958 32.7126383)");
            add(envelopeGeometry(geometry.getEnvelope(),
                                 geometry));
        }
    };

// Should be true, but isn't; has to do with defective list-equality?
print(envelopeGeometries.equals(testEnvelopeGeometries));

public class news extends Object {
    testHarro() {
        Assert.assertTrue(false);
    }
}

print(new JUnitCore().run(Request.method(news.class, "testHarro")).getFailures());
print(new JUnitCore().run(new TestCase() {
        run() {
        }

        testHarro() {
            print("uoetneuonth");
        }
    }).wasSuccessful());

Test countries.

  • CLOSING NOTE [2011-11-21 Mon 08:32]
    17 misses, 759 hits: 99.97%
(use
 debug
 postgresql
 srfi-69
 )

(define call-with-postgresql-connection
  (case-lambda
   ((connection-spec procedure)
    (call-with-postgresql-connection
     connection-spec
     (default-type-parsers)
     procedure))
   ((connection-spec type-parsers procedure)
    (call-with-postgresql-connection
     connection-spec
     type-parsers
     (default-type-unparsers)
     procedure))
   ((connection-spec type-parsers type-unparsers procedure)
    (let ((connection #f))
      (dynamic-wind
          (lambda () (set! connection
                           (connect connection-spec
                                    type-parsers
                                    type-unparsers)))
          (lambda () (procedure connection))
          (lambda () (disconnect connection)))))))

(let ((country->geometry (make-hash-table)))
  (call-with-postgresql-connection
   '((host . "bm02")
     (user . "postgres")
     (password . "postgres")
     (dbname . "gazetteer_world"))
   (lambda (connection)
     (row-for-each* (lambda (country-code geometry)
                      (hash-table-update!/default
                       country->geometry
                       country-code
                       (lambda (geometries)
                         (cons geometry geometries))
                       '()))
                   (query connection "SELECT UPPER(country_code), geometry FROM country;"))))
  (with-output-to-file
      "country-geometries.poi"
    (lambda ()
      (hash-table-walk
       country->geometry
       (lambda (country geometries)
         (format #t "~a\t~a~%" country (string-join geometries "|")))))))
source("parse-geometries.bsh");

US = parseGeometries("country-geometries.poi").get("US");
addresses = parseAddresses("us_address_sample_2.tab");

hits = misses = 0;

for (address: addresses) {
    if (covers(US, address.point)) {
        hits++;
    } else {
        misses++;
    }        
}

print(hits + ", " + misses);

There’s ad-hoc country and us_state tables, by the way; might have to pull from there. Doesn’t explain the bad performance of city-data, though; or was it state that netted us 20% performance? No, it was cities.

Parse city, state, country files, check against Factual data.

  • CLOSING NOTE [2011-11-21 Mon 08:33]
    376 nulls, 94 hits, 306 misses
addClassPath("lib/guava-10.0.1.jar");
addClassPath("lib/jts-1.12.jar");

import java.io.File;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import java.util.Arrays;
import java.util.concurrent.Callable;
import javax.xml.bind.DatatypeConverter;

import com.google.common.base.Charsets;
import com.google.common.base.Stopwatch;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.io.Files;
import com.google.common.io.LineProcessor;
import com.google.common.collect.Lists;
import com.google.common.collect.Iterables;
import com.google.common.collect.ImmutableList;
import com.vividsolutions.jts.io.WKBReader;
import com.vividsolutions.jts.geom.Point;
import com.vividsolutions.jts.geom.Polygon;
import com.vividsolutions.jts.geom.Coordinate;
import com.vividsolutions.jts.geom.GeometryFactory;
import com.vividsolutions.jts.geom.impl.CoordinateArraySequence;

time(callable) {
    watch = new Stopwatch().start();
    value = callable.call();
    print("Time elapsed: " + watch);
    return value;
}

// Can we support some kind of fuzzy get; or at least normalize by
// eliminating spaces? See
// <http://ieeexplore.ieee.org/Xplore/login.jsp?url=http%3A%2F%2Fieeexplore.ieee.org%2Fiel5%2F5550961%2F5565579%2F05565628.pdf%3Farnumber%3D5565628&authDecision=-203>.
normalizeKey(key) {
    return key.toUpperCase();
}

parseGeometries(geometries) {
    return Files.readLines(new File(geometries),
                           Charsets.UTF_8,
                           new LineProcessor() {
            nameToGeometries = new HashMap();
            reader = new WKBReader();

            public getResult() {
                return nameToGeometries;
            }

            public processLine(line) {
                nameGeometries = line.split("\t");
                name = nameGeometries[0];
                geometries = Arrays.asList(nameGeometries[1].split("\\|"));
                geometries = Lists.transform
                    (geometries,
                     new Function() {
                         apply(hex) {
                             return reader.read(DatatypeConverter.parseHexBinary(hex));
                         }
                     });
                geometries = Iterables.filter
                    (geometries,
                     new Predicate() {
                         apply(geometry) {
                             return (geometry.getClass() == Polygon.class);
                         }
                     });
                nameToGeometries.put(normalizeKey(name),
                                     ImmutableList.copyOf(geometries));
                return true;
            }
        });
}

covers(geometries, point) {
    for (geometry: geometries) {
        if (geometry.getEnvelope().covers(point) &&
            geometry.covers(point)) {
            return true;
        }
    }
    return false;
}

address(name, address, city, state, zipcode, latitude, longitude, point) {
    this.name = name;
    this.address = address;
    this.city = city;
    this.state = state;
    this.zipcode = zipcode;
    this.latitude = latitude;
    this.longitude = longitude;
    return this;
}

geometryFactory = new GeometryFactory();

point(latitude, longitude) {
    return new Point(new CoordinateArraySequence(new Coordinate[] {
                new Coordinate(longitude, latitude)
            }),
        geometryFactory);
}

parseAddresses(file) {
    return Files.readLines(new File(file),
                           Charsets.UTF_8,
                           new LineProcessor() {
            addresses = new LinkedList();

            public getResult() {
                return addresses;
            }

            public processLine(line) {
                data = Arrays.copyOf(line.split("\t"), 7, String[].class);

                // This is superfluous; want to explicitly
                // destructure, though, so I know what the fuck is
                // going on.
                name = data[0];
                address = data[1];
                city = data[2];
                state = data[3];
                zipcode = data[4];
                latitude = data[5];
                longitude = data[6];

                if (latitude != null && longitude != null) {
                    latitude = Double.valueOf(latitude);
                    longitude = Double.valueOf(longitude);
                    point = point(latitude, longitude);
                    
                    addresses.add(address(name,
                                          address,
                                          city,
                                          state,
                                          zipcode,
                                          latitude,
                                          longitude,
                                          point));
                }
                
                return true;
            }
        });
}
source("parse-geometries.bsh");

cities = parseGeometries("poi/poi.US.city.map");
addresses = parseAddresses("us_address_sample_2.tab");

hits = misses = nulls = 0;

for (address: addresses) {
    geometries = cities.get(normalizeKey(address.city));
    if (geometries == null) {
        nulls++;
    } else {
        if (covers(geometries, address.point)) {
            hits++;
        } else {
            misses++;
        }        
    }
}

print(nulls + ", " + hits + ", " + misses);

  

Map file for each country

(use srfi-1 debug)

(debug
 (with-input-from-file
     "countries.txt"
   (lambda ()
     (unfold
      eof-object?
      ->string
      (lambda x (read))
      (read)))))    
gazetteer_world=# select distinct admin_level from place;
 admin_level 
-------------
           8
          12
         100
           2
          10
           3
          23
          11
       60177
           4
           5
           9
           7
           0
       51000
           6
(16 rows)    
gazetteer_world=# select distinct class from place;
  class   
----------
 historic
 waterway
 highway
 landuse
 boundary
 building
 natural
 amenity
 aeroway
 bridge
 railway
 tunnel
 shop
 place
 tourism
 leisure
(16 rows)
gazetteer_world=# select count(1) from placex;
  count   
----------
 85345299
(1 row)

gazetteer_world=# select count(1) from place;
  count   
----------
 86979851
(1 row)
gazetteer_world=# select distinct class, type, admin_level from place where name[1].value = 'Los Angeles';
  class  |       type       | admin_level 
---------+------------------+-------------
 place   | city             |         100
 shop    | shoes            |         100
 place   | postcode         |         100
 highway | unclassified     |         100
 place   | locality         |         100
 place   | county           |         100
 place   | suburb           |         100
 place   | hamlet           |         100
 amenity | restaurant       |         100
 amenity | place_of_worship |         100
 amenity | school           |         100
 place   | city             |           8
 amenity | pharmacy         |         100
 highway | residential      |         100
 place   | village          |         100
(15 rows)    

Hmm; no postcodes:

gazetteer_world=# select distinct class, type, admin_level from placex where name[1].value = 'Los Angeles';
  class  |       type       | admin_level 
---------+------------------+-------------
 amenity | restaurant       |         100
 amenity | place_of_worship |         100
 place   | city             |         100
 shop    | shoes            |         100
 amenity | school           |         100
 highway | unclassified     |         100
 place   | locality         |         100
 place   | city             |           8
 place   | county           |         100
 place   | suburb           |         100
 amenity | pharmacy         |         100
 place   | hamlet           |         100
 highway | residential      |         100
 place   | village          |         100
(14 rows)

See what class and type look like for admin_level <= 8.

gazetteer_world=# select distinct admin_level, class, type from placex where admin_level <= 8 order by admin_level, class, type asc;
 admin_level |  class   |          type          
-------------+----------+------------------------
           0 | amenity  | school
           0 | boundary | adminitrative
           0 | bridge   | yes
           0 | building | yes
           0 | highway  | residential
           0 | highway  | secondary
           0 | highway  | tertiary
           0 | highway  | unclassified
           0 | landuse  | recreation_ground
           0 | landuse  | reserve
           0 | landuse  | residential
           0 | leisure  | nature_reserve
           0 | leisure  | park
           0 | place    | hamlet
           0 | place    | house
           0 | place    | region
           0 | place    | state
           0 | place    | village
           0 | shop     | supermarket
           0 | waterway | river
           2 | amenity  | border_control
           2 | boundary | adminitrative
           2 | highway  | road
           2 | highway  | tertiary
           2 | landuse  | island
           2 | leisure  | nature_reserve
           2 | leisure  | slipway
           2 | natural  | coastline
           2 | place    | administrative
           2 | place    | city
           2 | place    | island
           2 | place    | postcode
           2 | place    | town
           2 | waterway | river
           2 | waterway | stream
           3 | boundary | adminitrative
           3 | highway  | primary
           3 | landuse  | reserve
           3 | natural  | coastline
           3 | place    | city
           3 | place    | postcode
           3 | place    | region
           3 | waterway | river
           3 | waterway | stream
           4 | amenity  | fuel
           4 | amenity  | parking
           4 | boundary | adminitrative
           4 | highway  | motorway
           4 | highway  | path
           4 | highway  | primary
           4 | highway  | residential
           4 | highway  | secondary
           4 | highway  | service
           4 | highway  | tertiary
           4 | highway  | track
           4 | highway  | trunk
           4 | highway  | unclassified
           4 | historic | archaeological_site
           4 | historic | yes
           4 | landuse  | farm
           4 | landuse  | landfill
           4 | landuse  | reserve
           4 | leisure  | beach
           4 | leisure  | nature_reserve
           4 | leisure  | non_public-park
           4 | leisure  | park
           4 | leisure  | park_
           4 | natural  | beach
           4 | natural  | coastline
           4 | natural  | wood
           4 | place    | borough
           4 | place    | city
           4 | place    | county
           4 | place    | island
           4 | place    | islet
           4 | place    | postcode
           4 | place    | region
           4 | place    | Reserve
           4 | place    | Rserve
           4 | place    | state
           4 | place    | town
           4 | place    | village
           4 | tourism  | attraction
           4 | tourism  | guest_house
           4 | tourism  | hotel
           4 | tourism  | museum
           4 | tourism  | picnic_site
           4 | waterway | canal
           4 | waterway | coast
           4 | waterway | river
           4 | waterway | stream
           5 | boundary | adminitrative
           5 | bridge   | yes
           5 | highway  | primary
           5 | highway  | secondary
           5 | highway  | tertiary
           5 | highway  | trunk
           5 | landuse  | commercial
           5 | landuse  | residential
           5 | place    | city
           5 | place    | state
           5 | place    | town
           5 | railway  | tram
           5 | waterway | river
           5 | waterway | stream
           6 | boundary | adminitrative
           6 | bridge   | yes
           6 | building | no
           6 | highway  | footway
           6 | highway  | path
           6 | highway  | pedestrian
           6 | highway  | primary
           6 | highway  | proposed
           6 | highway  | residential
           6 | highway  | road
           6 | highway  | secondary
           6 | highway  | service
           6 | highway  | tertiary
           6 | highway  | track
           6 | highway  | trunk
           6 | highway  | unclassified
           6 | historic | boundary
           6 | historic | heritage
           6 | landuse  | administrative
           6 | landuse  | forest
           6 | landuse  | reservoir
           6 | landuse  | residential
           6 | landuse  | retail
           6 | leisure  | golf_course
           6 | natural  | coastline
           6 | natural  | land
           6 | natural  | water
           6 | place    | administrative
           6 | place    | city
           6 | place    | county
           6 | place    | hamlet
           6 | place    | island
           6 | place    | islet
           6 | place    | locality
           6 | place    | postcode
           6 | place    | region
           6 | place    | suburb
           6 | place    | town
           6 | place    | village
           6 | railway  | abandoned
           6 | railway  | tram
           6 | tunnel   | yes
           6 | waterway | canal
           6 | waterway | drain
           6 | waterway | river
           6 | waterway | stream
           7 | boundary | adminitrative
           7 | bridge   | yes
           7 | highway  | primary
           7 | highway  | residential
           7 | highway  | secondary
           7 | highway  | tertiary
           7 | highway  | unclassified
           7 | historic | boundary_stone
           7 | landuse  | residential
           7 | natural  | coastline
           7 | place    | city
           7 | place    | postcode
           7 | place    | town
           7 | place    | village
           7 | railway  | abandoned
           7 | tourism  | hotel
           7 | tunnel   | yes
           7 | waterway | canal
           7 | waterway | drain
           7 | waterway | river
           7 | waterway | stream
           8 | aeroway  | aerodrome
           8 | boundary | adminitrative
           8 | bridge   | yes
           8 | building | yes
           8 | highway  | cycleway
           8 | highway  | footway
           8 | highway  | motorway
           8 | highway  | motorway_link
           8 | highway  | path
           8 | highway  | pedestrian
           8 | highway  | primary
           8 | highway  | primary_link
           8 | highway  | residential
           8 | highway  | road
           8 | highway  | secondary
           8 | highway  | service
           8 | highway  | tertiary
           8 | highway  | track
           8 | highway  | trunk
           8 | highway  | trunk_link
           8 | highway  | unclassified
           8 | historic | boundary_stone
           8 | landuse  | administrative
           8 | landuse  | allotments
           8 | landuse  | cemetery
           8 | landuse  | farm
           8 | landuse  | forest
           8 | landuse  | industrial
           8 | landuse  | military
           8 | landuse  | r
           8 | landuse  | reservoir
           8 | landuse  | residental
           8 | landuse  | residential
           8 | leisure  | nature_reserve
           8 | leisure  | park
           8 | natural  | coastline
           8 | natural  | fell
           8 | natural  | ridge
           8 | natural  | water
           8 | place    | borough
           8 | place    | city
           8 | place    | city and borough
           8 | place    | Freguesia de Meirinhas
           8 | place    | hamlet
           8 | place    | house
           8 | place    | houses
           8 | place    | island
           8 | place    | locality
           8 | place    | municipality
           8 | place    | postcode
           8 | place    | suburb
           8 | place    | town
           8 | place    | vilage
           8 | place    | village
           8 | railway  | abandoned
           8 | tourism  | camp_site
           8 | tunnel   | no
           8 | tunnel   | yes
           8 | waterway | canal
           8 | waterway | ditch
           8 | waterway | drain
           8 | waterway | river
           8 | waterway | stream
(235 rows)    
16
43
74
AD
AE
AF
AG
AI
AL
AM
AN
AO
AQ
AR
AS
AT
AU
AW
AX
AY
AZ
BA
BB
BD
BE
BF
BG
BH
BI
BJ
BL
BM
BN
BO
BR
BS
BT
BW
BY
BZ
CA
CD
CF
CG
CH
CI
CK
CL
CM
CN
CO
CQ
CR
CU
CV
CY
CZ
DE
DJ
DK
DM
DO
DZ
EC
EE
EG
EH
ER
ES
ET
FI
FJ
FK
FM
FO
FR
GA
GB
GD
GE
GF
GG
GH
GI
GL
GM
GN
GO
GP
GQ
GR
GS
GT
GU
GW
GY
HN
HR
HT
HU
ID
IE
IL
IM
IN
IO
IQ
IR
IS
IT
JE
JM
JO
JP
JQ
KE
KG
KH
KI
KM
KN
KP
KR
KW
KY
KZ
LA
LB
LC
LI
LK
LR
LS
LT
LU
LV
LY
MA
MC
MD
ME
MF
MG
MH
MK
ML
MM
MN
MQ
MR
MS
MT
MU
MV
MW
MX
MY
MZ
NA
NC
NE
NG
NI
NL
NO
NP
NR
NU
NZ
OM
PA
PE
PF
PG
PH
PK
PL
PM
PN
PR
PS
PT
PW
PY
QA
RE
RO
RQ
RS
RU
RW
SA
SB
SC
SD
SE
SG
SH
SI
SJ
SK
SL
SM
SN
SO
SR
ST
SV
SY
SZ
TC
TD
TF
TG
TH
TJ
TK
TL
TM
TN
TO
TR
TT
TV
TW
TZ
UA
UG
UK
US
UY
UZ
VA
VC
VE
VG
VI
VN
VQ
VU
WE
WF
WQ
WS
WZ
YE
YT
ZA
ZM
ZW

Why do I need a list of countries? Just populate as you see it, uppercased.

(use
 debug
 postgresql
 utf8
 srfi-13
 srfi-14
 srfi-69
 matchable
 utils
 vector-lib
 )

(define call-with-postgresql-connection
  (case-lambda
   ((connection-spec procedure)
    (call-with-postgresql-connection
     connection-spec
     (default-type-parsers)
     procedure))
   ((connection-spec type-parsers procedure)
    (call-with-postgresql-connection
     connection-spec
     type-parsers
     (default-type-unparsers)
     procedure))
   ((connection-spec type-parsers type-unparsers procedure)
    (let ((connection #f))
      (dynamic-wind
          (lambda () (set! connection
                           (connect connection-spec
                                    type-parsers
                                    type-unparsers)))
          (lambda () (procedure connection))
          (lambda () (disconnect connection)))))))

(define (country-code->type->alist country-code->type)
  (hash-table-fold
   country-code->type
   (lambda (type type->name types->name)
     (alist-cons
      type
      (hash-table-fold
       type->name
       (lambda (name name->geometries names->geometries)
         (alist-cons name
                     (hash-table->alist name->geometries)
                     names->geometries))
       '())
      types->name))
   '()))

(define (key-value-parser key-value)
  (match ((make-composite-parser (list identity identity))
          key-value)
    ((key value) (cons key value))))

(define (walk-geometries country-code->type walk)
  (hash-table-walk
   country-code->type
   (lambda (country-code type->name)
     (hash-table-walk
      type->name
      (lambda (type name->geometries)
        (hash-table-walk
         name->geometries
         (lambda (name geometries)
           (walk country-code type name geometries))))))))

(let ((country-code->type (make-hash-table)))
  (call-with-postgresql-connection
   '((host . "bm02")
     (user . "postgres")
     (password . "postgres")
     (dbname . "gazetteer_world"))
   (alist-cons "keyvalue"
               key-value-parser
               (default-type-parsers))
   (lambda (connection)
     (query connection (read-all "places-having-names.sql"))
     (row-for-each*
      (lambda (country-code type names geometry)
        ;; (if (zero? (random 1000)) (debug country-code type names geometry))
        ;; (debug country-code type names geometry)
        ;; (debug names (vector->list names))
        (let ((names
               (delete-duplicates
                (map (compose string-trim-both cdr)
                     (remove (lambda (key-value)
                               (string=? "ref" (car key-value)))
                             (vector->list names))))))
          (if (pair? names)
              (hash-table-update!/default
               country-code->type
               country-code
               (lambda (type->name)
                 (hash-table-update!/default
                  type->name
                  type
                  (lambda (name->geometries)
                    (for-each
                     (lambda (name)
                       ;; (debug name)
                       (hash-table-update!/default
                        name->geometries
                        name
                        (lambda (geometries)
                          (cons geometry geometries))
                        '()))
                     names)
                    name->geometries)
                  (make-hash-table))
                 type->name)
               (make-hash-table)))))
      (query connection "SELECT upper(country_code), type, name, geometry FROM placex WHERE country_code IS NOT NULL AND name IS NOT NULL AND type IS NOT NULL AND class = 'place';"))))
  ;; (debug country-code->type
  ;;        (country-code->type->alist country-code->type))
  #;
  (let ((country-code-&c.->output (make-hash-table)))
    (walk-geometries
     country-code->type
     (lambda (country-code type name geometries)
       (let ((output (hash-table-ref/default
                      country-code-&c.->output
                      (cons country-code type)
                      (open-output-file
                       (format "poi/~a.~a.map" country-code type)))))
         (debug country-code
                type
                name
                (map (cute substring <> 0 10) geometries)
                (format output
                        "~a\t~a\n"
                        name
                        (string-join geometries "|"))))))
    (hash-table-walk country-code-&c.->output
                     (lambda (country-code-&c. output)
                       (debug 'harro
                              country-code-&c.)
                       (close-output-file output))))
  (walk-geometries
   country-code->type
   (lambda (country-code type name geometries)
     (if (zero? (random 1000)) (debug country-code type name geometries))
     (condition-case
      (let* ((directory (format "poi/poi.~a" (string-downcase country-code)))
             (file (begin
                     (create-directory directory)
                     (file-open (format "~a/~a.map" directory
                                        ((compose (cut string-filter char-set:letter <>)
                                                  string-titlecase) type))
                                (+ open/wronly open/append open/creat)))))
        (file-write file (format "~a\t~a\n"
                                 name
                                 (string-join geometries "|")))
        (file-close file))
      (exn (exn)
       (debug country-code type name exn))))))

Take a look at reverse-geocoding in Nominatim (bm02): progressively more precise. Establish whether or not adm_level is a hierarchy. Check the tiger: country, state, city. People should be able to create maps and throw them in.

Abritrary number of maps; point in city, country; CityCodes.map.

poi.[country].[class].map; class that initialize with filename; pass in point.

WKB instead of WKT.

Method: takes string, lat-long -> true, false.

-- DROP FUNCTION IF EXISTS places_having_names(integer, integer);

CREATE OR REPLACE FUNCTION places_having_names (integer, integer)
RETURNS TABLE (country_code TEXT,
        type TEXT,
        name TEXT,
        geometry GEOMETRY) AS $$
       DECLARE
        max_admin_level ALIAS for $1;
        max_limit ALIAS for $2;
        languages TEXT[] := ARRAY['place_name:en', 'name:en', 'place_name', 'name', 'int_name'];
       BEGIN
        RETURN QUERY SELECT UPPER(placex.country_code),
               placex.type,
               get_name_by_language(placex.name, languages),
               placex.geometry
        FROM placex WHERE
             placex.class = 'place' AND
             get_name_by_language(placex.name, languages) IS NOT NULL AND
             placex.admin_level < max_admin_level
        LIMIT max_limit;
       END
$$ LANGUAGE plpgsql;

-- SELECT country_code, type, name, substring(asBinary(geometry) from 1 for 10)
--        FROM places_having_names(100, 10);
all:
    mvn clean install exec:java -Dexec.mainClass=org.geotools.tutorial.Quickstart

If it’s a gmap, populate once with a binary format as spin-up; YAML stuff at the top of the file. Add some metadata: parse to binary.

Can specify what parser to use in the YAML. Caching strategy. Two maps: cache map, raw map.

Take the name based on the country.

Enumerate and unique.

Take country city, grab data that’s geocoded; see what passes test. Grab a bunch of points from the API; test it.

CANCELED map, reduce, filter, etc. in Java

  • CLOSING NOTE [2011-11-10 Thu 08:47]
    See Guava.
import java.util.List;
import java.util.LinkedList;
import java.util.concurrent.Callable;
import java.lang.reflect.Method;

public class Map {
    // N-ary wrapper around Callable?
    // http://stackoverflow.com/questions/1659986/java-parameterized-runnable
    public <T> List<T> map(final Callable<T> map, final List<T> list) throws Exception {
        return new LinkedList<T>() {
            {
                for (T element: list) {
                    add(map.call());
                }
            }
        };
    }

    public static void main(String[] argv) {
    }
}

Guava

  • Immutable*
  • MapMaker

Notes

Fri Nov 11 08:59:56 PST 2011

  • map file;
  • two types of files: list file, map file; if list file: is point in any of the polygons in list; if map file, is point in polygon
  • productize it: here are your files
  • vs. serialize pre-indexed object
  • scarecrow map generator
  • summaries? inputs?
  • validator: inputs, summaries

Fri Nov 18 15:16:12 PST 2011

  • create bounding boxes at spin-up