Browse files

Merge branch 'master' of github.com:mstevens/Geo-Postcode

  • Loading branch information...
2 parents 62747cb + bed9d2e commit 7689f399539ec4ce74a2e4c73e5abaf85db11ab3 @mstevens committed Jan 21, 2011
Showing with 142 additions and 5 deletions.
  1. +4 −0 README
  2. +135 −0 bin/geo_postcode_import_data.pl
  3. +1 −3 lib/Geo/Postcode.pm
  4. +2 −2 lib/Geo/Postcode/Location.pm
View
4 README
@@ -13,6 +13,10 @@ to people who want to validate input, as it should be very quick and
memory efficient and doesn't load the location data or associated libraries
until you ask for it.
+For a full UK Postcode dataset please see:
+
+http://www.ordnancesurvey.co.uk/oswebsite/opendata/
+
any trouble, write to wross@cpan.org or post a bug at rt.cpan.org.
will
View
135 bin/geo_postcode_import_data.pl
@@ -0,0 +1,135 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+
+use File::Find::Rule;
+use Geo::Coordinates::OSGB qw(grid2ll set_ellipsoid);
+use Geo::HelmertTransform;
+use IO::File;
+use List::Util qw(min max);
+use Text::CSV_XS;
+use DBI;
+$|++;
+
+set_ellipsoid( 6378137.0, 6356752.3141 ); # use WGS84
+my $AIRY1830 = Geo::HelmertTransform::datum('Airy1830');
+my $WGS84 = Geo::HelmertTransform::datum('WGS84');
+
+my $csv = Text::CSV_XS->new();
+
+# Configs
+my $data_root = $ARGV[1] || $ARGV[0];
+my $db_file = $ARGV[1]
+ ? $ARGV[0] || 'full_postcode_data.db';
+my $tablename = 'postcodes';
+
+my $test_csv = "$data_root/Data/ab.csv";
+die "Unable to find: $test_csv" unless -e $test_csv;
+
+if ( -e $db_file ) {
+ warn "You already have: $db_file - exiting";
+ exit;
+}
+
+my $dbh = DBI->connect( "dbi:SQLite:dbname=$db_file", "", "" );
+die "SQLite connection failed\n" unless $dbh;
+
+{
+
+ # Create the table
+ my $columns = join( ", ",
+ map {"$_ varchar(255)"} qw(fixed_format gride gridn latitude longitude) );
+ $dbh->do(
+ "create table $tablename (postcode varchar(12) primary key, $columns);"
+ );
+}
+
+my $sth
+ = $dbh->prepare(
+ "INSERT INTO postcodes (postcode, fixed_format, gride, gridn, latitude, longitude) VALUES (?, ?, ?, ?, ?)"
+ );
+
+my $count = 0;
+foreach my $filename ( File::Find::Rule->new->file->name('*.csv')->in("$data_root/Data/") )
+{
+ my $fh = IO::File->new($filename) || die $!;
+ my (@columns, $postcode, $osgb1936_x,
+ $osgb1936_y, $country_code, $fixed_format
+ );
+
+ while ( my $line = <$fh> ) {
+ $csv->parse($line);
+ @columns = $csv->fields();
+ ( $postcode, $osgb1936_x, $osgb1936_y, $country_code )
+ = ( $columns[0], $columns[10], $columns[11], $columns[12] );
+ $count++;
+ $postcode = uc $postcode;
+
+ # following commented out line is more flexible. CSV will probably
+ # only need other one
+ #if ( $postcode =~ m{^([A-Z]+)(\d{1,2}|\d[A-Z])\s*(\d)([A-Z]{2})$} ) {
+
+ if ($postcode =~ m{^([A-Z]{1,2})(\d{1,2}|\d[A-Z])\s?(\d)([A-Z]{2})$} )
+ {
+ $fixed_format = sprintf( "%-4s %d%2s", $1 . $2, $3, $4 );
+ } else {
+ die "Can't format postcode '" . $postcode . "'\n";
+ }
+
+ $postcode =~ s/ +//g;
+
+ my ( $latitude, $longitude );
+
+ # only get lat/long for postcodes with a grid reference
+ if ( $osgb1936_x && $osgb1936_y ) {
+
+ # convert UK National Grid coordinates to latitude, longitude
+ ( $latitude, $longitude ) = grid2ll( $osgb1936_x, $osgb1936_y );
+ ( $latitude, $longitude )
+ = Geo::HelmertTransform::convert_datum( $AIRY1830, $WGS84,
+ $latitude, $longitude, 0 );
+ }
+
+ $sth->execute( $postcode, $osgb1936_x, $osgb1936_y, $latitude,
+ $longitude );
+ }
+}
+
+__END__
+
+=head1 NAME
+
+impot_full_uk_data.pl - Import Codepoint data
+
+=head1 SYNOPSIS
+
+ % impot_full_uk_data.pl [outdatabase.db] /path/to/Code-Point Open
+
+=head1 DESCRIPTION
+
+This program imports Codepoint data - postcode data which links
+postscodes to National Grid locations.
+
+The directory 'Code-Point Open' should be the unzipped codepo_gb.zip
+which you can request from https://www.ordnancesurvey.co.uk/opendatadownload/products.html
+
+This should have the directory structure...
+
+//Data/ab.csv
+./Data/al.csv
+...
+./Data/yo.csv
+./Data/ze.csv
+./Doc/Code-Point_Open_column_headers.csv
+./Doc/Codelist.txt
+./Doc/licence.txt
+./Doc/metadata.txt
+
+
+
+
+
+
+
+
+
View
4 lib/Geo/Postcode.pm
@@ -53,9 +53,7 @@ To work with US zipcodes, you need Geo::Postalcode instead.
Any postcode, whether fully or partly specified, can be turned into a grid reference. The Post Office calls it a centroid, and it marks the approximate centre of the area covered by the code.
-Unfortunately, and inexplicably, this information is not public domain: unless you're prepared to work at a very crude level, you have to buy location data either from the Post Office or a data shop.
-
-This module comes with with a basic set of publicly-available coordinates that covers nearly all the postcode districts (ie it maps the first block of the postcode but not the second).
+You will need to download your own data set from http://www.ordnancesurvey.co.uk/oswebsite/opendata/ (Code-Point Open) if you want a full set of data. This module comes with with a basic set of publicly-available coordinates that covers nearly all the postcode districts (ie it maps the first block of the postcode but not the second).
This means that the coordinates we return and the distances we calculate are a bit crude, being based at best on the postcode area. See the POD for Geo::Delivery::Location for how to override the standard data set something more comprehensive.
View
4 lib/Geo/Postcode/Location.pm
@@ -35,14 +35,14 @@ There are at least three ways to supply your own gridref data.
=item * replace the data file
+If you get a full set of postcode data from http://www.ordnancesurvey.co.uk/oswebsite/opendata/ - Code-Point Open.
+
If you can get your data into a SQLite file, all you have to do is set the either C<Geo::Postcode::Location::datafile> or $ENV{POSTCODE_DATA} to the full path to your data file:
$Geo::Postcode::Location::datafile = '/home/site/data/postcodes.db';
# or
PerlSetEnv POSTCODE_DATA /home/site/data/postcodes.db
-I've included (in ./useful) an idiot script that I use to turn .csv data into a SQLite file suitable for use with this module.
-
=item * replace the database handle
The query that we use to retrieve location information is very simple, and should work with any DBI database handle. If your application already makes available a suitable database handle, or you would like to create one externally and make sure it is reused, it should just work:

0 comments on commit 7689f39

Please sign in to comment.