Skip to content

Commit

Permalink
GeoNames authority
Browse files Browse the repository at this point in the history
  • Loading branch information
kdid committed May 18, 2020
1 parent 1252625 commit e3fffef
Show file tree
Hide file tree
Showing 34 changed files with 677 additions and 185 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ erl_crash.dump
# Ignore package tarball (built via "mix hex.build").
authoritex-*.tar

/.elixir_ls/

1 change: 1 addition & 0 deletions config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use Mix.Config

config :authoritex,
authorities: [
Authoritex.GeoNames,
Authoritex.Getty.AAT,
Authoritex.Getty.TGN,
Authoritex.Getty.ULAN,
Expand Down
5 changes: 4 additions & 1 deletion config/test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@ use Mix.Config

config :exvcr,
vcr_cassette_library_dir: "test/fixtures/vcr_cassettes",
custom_cassette_library_dir: "test/fixtures/custom_cassettes"
custom_cassette_library_dir: "test/fixtures/custom_cassettes",
filter_sensitive_data: [
[pattern: "username=([^&#]*)", placeholder: "<<geonames_username>>"]
]
2 changes: 2 additions & 0 deletions lib/authoritex.ex
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
defmodule Authoritex do
@moduledoc "Elixir authority lookup behavior"

@type fetch_result :: %{
id: String.t(),
label: String.t() | nil,
Expand Down
166 changes: 166 additions & 0 deletions lib/authoritex/geonames.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
defmodule Authoritex.GeoNames do
@moduledoc "Authoritex implementation for GeoNames webservice"
@behaviour Authoritex

@http_uri_base "https://sws.geonames.org/"

@error_codes %{
"10" => "Authorization Exception",
"11" => "record does not exist",
"12" => "other error",
"13" => "database timeout",
"14" => "invalid parameter",
"15" => "no result found",
"16" => "duplicate exception",
"17" => "postal code not found",
"18" => "daily limit of credits exceeded",
"19" => "hourly limit of credits exceeded",
"20" => "weekly limit of credits exceeded",
"21" => "invalid input",
"22" => "server overloaded exception",
"23" => "service not implemented",
"24" => "radius too large",
"27" => "maxRows too large"
}

@impl Authoritex
def can_resolve?(@http_uri_base <> _), do: true
def can_resolve?(_), do: false

@impl Authoritex
def code, do: "geonames"

@impl Authoritex
def description, do: "GeoNames geographical database"

@impl Authoritex
def fetch(id) do
@http_uri_base <> geoname_id = id

case HTTPoison.get(
"http://api.geonames.org/getJSON",
[{"User-Agent", "Authoritex"}],
params: [
geonameId: geoname_id,
username: username()
]
) do
{:ok, %{body: response, status_code: 200}} ->
parse_fetch_result(response)

{:ok, %{body: response, status_code: status_code}} ->
{:error, parse_geonames_error(response, status_code)}

{:error, error} ->
{:error, error}
end
end

@impl Authoritex
def search(query, max_results \\ 30) do
case HTTPoison.get(
"http://api.geonames.org/searchJSON",
[{"User-Agent", "Authoritex"}],
params: [
q: query,
username: username(),
maxRows: max_results
]
) do
{:ok, %{body: response, status_code: 200}} ->
{:ok, parse_search_result(response)}

{:ok, %{body: response, status_code: status_code}} ->
{:error, parse_geonames_error(response, status_code)}

{:error, error} ->
{:error, error}
end
end

defp parse_search_result(response) do
response
|> Jason.decode!()
|> Map.get("geonames")
|> Enum.map(fn result ->
%{
id: @http_uri_base <> to_string(result["geonameId"]),
label: result["name"],
hint: parse_hint(result)
}
end)
end

defp parse_fetch_result(%{"status" => %{"message" => message, "value" => error_code}}) do
{:error, "#{error_description(to_string(error_code))} (#{to_string(error_code)}). #{message}"}
end

defp parse_fetch_result(%{"geonameId" => geoname_id, "name" => name} = response) do
hint = parse_hint(response)

{:ok,
Enum.into(
[
id: @http_uri_base <> to_string(geoname_id),
label: name,
hint: hint,
qualified_label: Enum.join(Enum.filter([name, hint], & &1), ", ")
],
%{}
)}
end

defp parse_fetch_result(response) do
case Jason.decode(response) do
{:ok, response} ->
parse_fetch_result(response)

{:error, error} ->
{:error, {:bad_response, error}}
end
end

defp parse_geonames_error(response, status_code) do
case Jason.decode(response) do
{:ok, %{"status" => %{"value" => 11}}} ->
status_code

{:ok, %{"status" => %{"message" => message, "value" => error_code}}} ->
"Status #{status_code}: #{error_description(to_string(error_code))} (#{
to_string(error_code)
}). #{message}"

{:error, error} ->
{:bad_response, error}
end
end

defp parse_hint(%{"fcode" => "PCLI"}), do: nil
defp parse_hint(%{"fcode" => "RGN", "countryName" => countryName}), do: countryName
defp parse_hint(%{"fcode" => "ADM1", "countryName" => countryName}), do: countryName

defp parse_hint(%{"fcode" => _, "countryName" => country_name, "adminName1" => admin_name}) do
case Enum.join(Enum.reject([admin_name, country_name], &(&1 == "")), ", ") do
"" ->
nil

hint ->
hint
end
end

defp parse_hint(_), do: nil

defp error_description(code) do
@error_codes
|> Map.get(code)
end

# coveralls-ignore-start
defp username() do
System.get_env("GEONAMES_USERNAME") ||
Application.get_env(:authoritex, :geonames_username)
end

# coveralls-ignore-stop
end
56 changes: 56 additions & 0 deletions test/authoritex/geonames_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
defmodule Authoritex.GeoNamesTest do
alias Authoritex.GeoNames

use Authoritex.TestCase,
module: Authoritex.GeoNames,
code: "geonames",
description: "GeoNames geographical database",
test_uris: [
"https://sws.geonames.org/4302561"
],
bad_uri: "https://sws.geonames.org/43025619",
expected: [
hint: "Kentucky, United States",
id: "https://sws.geonames.org/4302561",
label: "Nicholasville",
qualified_label: "Nicholasville, Kentucky, United States"
],
search_result_term: "Kentucky",
search_count_term: "Kentucky"

describe "errors" do
test "fetch" do
use_cassette "geonames_500", match_requests_on: [:query], custom: true do
assert GeoNames.fetch("https://sws.geonames.org/4560349") ==
{:error, "Status 500: server overloaded exception (22). Internal Server Error."}
end
end

test "search" do
use_cassette "geonames_500", match_requests_on: [:query], custom: true do
assert GeoNames.search("Authority Down") ==
{:error, "Status 500: server overloaded exception (22). Internal Server Error."}
end
end

test "GeoNames invlid parameter response" do
use_cassette "geonames_invalid_parameter", match_requests_on: [:query] do
assert GeoNames.fetch("https://sws.geonames.org/wrong") ==
{:error, "invalid parameter (14). For input string: \"wrong\""}
end
end

test "GeoNames custom hint for `fcode` `RGN` is `countryName`" do
use_cassette "geonames_custom_hint", match_requests_on: [:query] do
assert GeoNames.fetch("https://sws.geonames.org/11887750") ==
{:ok,
%{
hint: "United States",
id: "https://sws.geonames.org/11887750",
label: "Midwest",
qualified_label: "Midwest, United States"
}}
end
end
end
end
40 changes: 40 additions & 0 deletions test/fixtures/custom_cassettes/geonames_500.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
[
{
"request": {
"body": "",
"headers": {
"User-Agent": "Authoritex"
},
"method": "get",
"options": [],
"request_body": "",
"url": "http://api.geonames.org/getJSON?geonameId=4560349&<<geonames_username>>"
},
"response": {
"status_code": 500,
"headers": {
"Content-Type": "application/json;charset=UTF-8"
},
"body": "{\"status\":{\"message\":\"Internal Server Error.\",\"value\":22}}"
}
},
{
"request": {
"body": "",
"headers": {
"User-Agent": "Authoritex"
},
"method": "get",
"options": [],
"request_body": "",
"url": "http://api.geonames.org/searchJSON?q=Authority+Down&<<geonames_username>>&maxRows=30"
},
"response": {
"status_code": 500,
"headers": {
"Content-Type": "application/json;charset=UTF-8"
},
"body": "{\"status\":{\"message\":\"Internal Server Error.\",\"value\":22}}"
}
}
]
12 changes: 6 additions & 6 deletions test/fixtures/vcr_cassettes/authoritex_fetch_failure.json

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions test/fixtures/vcr_cassettes/authoritex_fetch_success.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,26 @@
"binary": false,
"body": "<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n <madsrdf:PersonalName rdf:about=\"http://id.loc.gov/authorities/names/no2011087251\" xmlns:madsrdf=\"http://www.loc.gov/mads/rdf/v1#\">\n <rdf:type rdf:resource=\"http://www.loc.gov/mads/rdf/v1#Authority\"/>\n <madsrdf:authoritativeLabel>Valim, Jose</madsrdf:authoritativeLabel>\n <madsrdf:elementList rdf:parseType=\"Collection\">\n <madsrdf:FullNameElement>\n\t<madsrdf:elementValue>Valim, Jose</madsrdf:elementValue>\n </madsrdf:FullNameElement>\n </madsrdf:elementList>\n <madsrdf:isMemberOfMADSCollection rdf:resource=\"http://id.loc.gov/authorities/names/collection_NamesAuthorizedHeadings\"/>\n <madsrdf:isMemberOfMADSCollection rdf:resource=\"http://id.loc.gov/authorities/names/collection_LCNAF\"/>\n <madsrdf:hasExactExternalAuthority rdf:resource=\"http://viaf.org/viaf/sourceID/LC%7Cno2011087251#skos:Concept\"/>\n <madsrdf:identifiesRWO>\n <madsrdf:RWO rdf:about=\"http://id.loc.gov/rwo/agents/no2011087251\">\n\t<rdf:type rdf:resource=\"http://id.loc.gov/ontologies/bibframe/Person\"/>\n\t<rdf:type rdf:resource=\"http://xmlns.com/foaf/0.1/Person\"/>\n\t<rdfs:label xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\">Valim, Jose</rdfs:label>\n\t<bflc:contributorTo xmlns:bflc=\"http://id.loc.gov/ontologies/bflc/\">\n\t <bf:Work rdf:about=\"http://id.loc.gov/resources/works/17032688\" xmlns:bf=\"http://id.loc.gov/ontologies/bibframe/\">\n\t <bflc:aap>Valim, Jose. Crafting rails applications :</bflc:aap>\n\t </bf:Work>\n\t</bflc:contributorTo>\n </madsrdf:RWO>\n </madsrdf:identifiesRWO>\n <madsrdf:isMemberOfMADSScheme rdf:resource=\"http://id.loc.gov/authorities/names\"/>\n <identifiers:lccn xmlns:identifiers=\"http://id.loc.gov/vocabulary/identifiers/\">no2011087251</identifiers:lccn>\n <identifiers:local xmlns:identifiers=\"http://id.loc.gov/vocabulary/identifiers/\">(OCoLC)oca08877947</identifiers:local>\n <madsrdf:hasSource>\n <madsrdf:Source>\n\t<madsrdf:citation-source>Crafting Rails applications, c2011:</madsrdf:citation-source>\n\t<madsrdf:citation-note>t.p. (Jose Valim)</madsrdf:citation-note>\n\t<madsrdf:citation-status>found</madsrdf:citation-status>\n </madsrdf:Source>\n </madsrdf:hasSource>\n <madsrdf:adminMetadata>\n <ri:RecordInfo xmlns:ri=\"http://id.loc.gov/ontologies/RecordInfo#\">\n\t<ri:recordChangeDate rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2011-06-06T00:00:00</ri:recordChangeDate>\n\t<ri:recordStatus rdf:datatype=\"http://www.w3.org/2001/XMLSchema#string\">new</ri:recordStatus>\n\t<ri:recordContentSource rdf:resource=\"http://id.loc.gov/vocabulary/organizations/oco\"/>\n\t<ri:languageOfCataloging rdf:resource=\"http://id.loc.gov/vocabulary/iso639-2/eng\"/>\n </ri:RecordInfo>\n </madsrdf:adminMetadata>\n <madsrdf:adminMetadata>\n <ri:RecordInfo xmlns:ri=\"http://id.loc.gov/ontologies/RecordInfo#\">\n\t<ri:recordChangeDate rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2011-06-07T06:21:47</ri:recordChangeDate>\n\t<ri:recordStatus rdf:datatype=\"http://www.w3.org/2001/XMLSchema#string\">revised</ri:recordStatus>\n\t<ri:recordContentSource rdf:resource=\"http://id.loc.gov/vocabulary/organizations/oco\"/>\n\t<ri:languageOfCataloging rdf:resource=\"http://id.loc.gov/vocabulary/iso639-2/eng\"/>\n </ri:RecordInfo>\n </madsrdf:adminMetadata>\n <rdf:type rdf:resource=\"http://www.w3.org/2004/02/skos/core#Concept\"/>\n <skos:prefLabel xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\">Valim, Jose</skos:prefLabel>\n <skos:exactMatch rdf:resource=\"http://viaf.org/viaf/sourceID/LC%7Cno2011087251#skos:Concept\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\"/>\n <skos:inScheme rdf:resource=\"http://id.loc.gov/authorities/names\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\"/>\n <skos:changeNote xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\">\n <cs:ChangeSet xmlns:cs=\"http://purl.org/vocab/changeset/schema#\">\n\t<cs:subjectOfChange rdf:resource=\"http://id.loc.gov/authorities/names/no2011087251\"/>\n\t<cs:creatorName rdf:resource=\"http://id.loc.gov/vocabulary/organizations/oco\"/>\n\t<cs:createdDate rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2011-06-06T00:00:00</cs:createdDate>\n\t<cs:changeReason rdf:datatype=\"http://www.w3.org/2001/XMLSchema#string\">new</cs:changeReason>\n </cs:ChangeSet>\n </skos:changeNote>\n <skos:changeNote xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\">\n <cs:ChangeSet xmlns:cs=\"http://purl.org/vocab/changeset/schema#\">\n\t<cs:subjectOfChange rdf:resource=\"http://id.loc.gov/authorities/names/no2011087251\"/>\n\t<cs:creatorName rdf:resource=\"http://id.loc.gov/vocabulary/organizations/oco\"/>\n\t<cs:createdDate rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2011-06-07T06:21:47</cs:createdDate>\n\t<cs:changeReason rdf:datatype=\"http://www.w3.org/2001/XMLSchema#string\">revised</cs:changeReason>\n </cs:ChangeSet>\n </skos:changeNote>\n </madsrdf:PersonalName>\n</rdf:RDF>\n",
"headers": {
"Date": "Tue, 12 May 2020 18:07:24 GMT",
"Date": "Thu, 14 May 2020 16:06:52 GMT",
"Content-Type": "application/rdf+xml; charset=UTF-8",
"Content-Length": "4907",
"Connection": "keep-alive",
"Set-Cookie": "__cfduid=df3defcb6a0081a964fa2f4947c0ef3731589306844; expires=Thu, 11-Jun-20 18:07:24 GMT; path=/; domain=.loc.gov; HttpOnly; SameSite=Lax",
"Set-Cookie": "__cfduid=d34c647aa87ca1d940ee9ea15cfbfb90c1589472412; expires=Sat, 13-Jun-20 16:06:52 GMT; path=/; domain=.loc.gov; HttpOnly; SameSite=Lax",
"X-RWO-URI": "http://id.loc.gov/rwo/agents/no2011087251",
"ETag": "4e8d8eb3f55d0911a9162daeb11be049",
"X-URI": "http://id.loc.gov/authorities/names/no2011087251",
"cache-control": "public, max-age=2419200",
"X-Varnish": "457197331 445456501",
"Age": "341955",
"X-Varnish": "469373373 463002114",
"Age": "255660",
"Via": "1.1 varnish-v4",
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "HEAD, POST, GET, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type, Access-Control-Allow-Headers, Authorization, X-Requested-With",
"Accept-Ranges": "bytes",
"CF-Cache-Status": "DYNAMIC",
"Server": "cloudflare",
"CF-RAY": "59261443fb1371b9-ORD",
"cf-request-id": "02abaafe7c000071b9c9b23200000001"
"CF-RAY": "5935de71ea72817c-ORD",
"cf-request-id": "02b5895b2f0000817cf93a8200000001"
},
"status_code": 200,
"type": "ok"
Expand Down
38 changes: 19 additions & 19 deletions test/fixtures/vcr_cassettes/authoritex_search_results.json

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions test/fixtures/vcr_cassettes/authoritex_search_results_empty.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,23 @@
"binary": false,
"body": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<feed xmlns=\"http://www.w3.org/2005/Atom\">\n<title>Library of Congress Authorities and Vocabulary Service: Search Results</title><link href=\"http://id.loc.gov/search/?q=NO_resulteeeees scheme:http://id.loc.gov/authorities/names&amp;start=1&amp;format=atom\" rel=\"self\"/><id>info:lc/search/?q=NO_resulteeeees+scheme:http://id.loc.gov/authorities/names</id><updated>2020-05-11T15:48:28.393794-04:00</updated><opensearch:totalResults xmlns:opensearch=\"http://a9.com/-/spec/opensearch/1.1/\">0</opensearch:totalResults><opensearch:startIndex xmlns:opensearch=\"http://a9.com/-/spec/opensearch/1.1/\">1</opensearch:startIndex><opensearch:itemsPerPage xmlns:opensearch=\"http://a9.com/-/spec/opensearch/1.1/\">30</opensearch:itemsPerPage>\n</feed>\n",
"headers": {
"Date": "Tue, 12 May 2020 18:07:25 GMT",
"Date": "Thu, 14 May 2020 16:06:50 GMT",
"Content-Type": "application/atom+xml; charset=UTF-8",
"Content-Length": "773",
"Connection": "keep-alive",
"Set-Cookie": "__cfduid=df6cfe2b3a8da33c387f37ba46b963f2b1589306845; expires=Thu, 11-Jun-20 18:07:25 GMT; path=/; domain=.loc.gov; HttpOnly; SameSite=Lax",
"Set-Cookie": "__cfduid=dacc51eb29381888cc6d54b4a61ca71051589472410; expires=Sat, 13-Jun-20 16:06:50 GMT; path=/; domain=.loc.gov; HttpOnly; SameSite=Lax",
"cache-control": "public, max-age=2419200",
"X-Varnish": "465850648 465993753",
"Age": "80336",
"X-Varnish": "468543935 465993753",
"Age": "245901",
"Via": "1.1 varnish-v4",
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "HEAD, POST, GET, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type, Access-Control-Allow-Headers, Authorization, X-Requested-With",
"Accept-Ranges": "bytes",
"CF-Cache-Status": "DYNAMIC",
"Server": "cloudflare",
"CF-RAY": "592614478d9b71b9-ORD",
"cf-request-id": "02abab00b8000071b9c9b45200000001"
"CF-RAY": "5935de644895817c-ORD",
"cf-request-id": "02b58952af0000817cf9283200000001"
},
"status_code": 200,
"type": "ok"
Expand Down
28 changes: 28 additions & 0 deletions test/fixtures/vcr_cassettes/geonames_custom_hint.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[
{
"request": {
"body": "",
"headers": {
"User-Agent": "Authoritex"
},
"method": "get",
"options": [],
"request_body": "",
"url": "http://api.geonames.org/getJSON?geonameId=11887750&<<geonames_username>>"
},
"response": {
"binary": false,
"body": "{\"timezone\":{\"gmtOffset\":-6,\"timeZoneId\":\"America/Chicago\",\"dstOffset\":-5},\"bbox\":{\"east\":-80.518693,\"south\":35.995683,\"north\":49.384358,\"west\":-104.05769800000002,\"accuracyLevel\":0},\"asciiName\":\"Midwest\",\"astergdem\":338,\"countryId\":\"6252001\",\"fcl\":\"L\",\"srtm3\":335,\"countryCode\":\"US\",\"lat\":\"42.65982\",\"fcode\":\"RGN\",\"continentCode\":\"NA\",\"lng\":\"-93.93863\",\"geonameId\":11887750,\"toponymName\":\"Midwest\",\"population\":65000000,\"wikipediaURL\":\"en.wikipedia.org/wiki/Midwestern_United_States\",\"adminName5\":\"\",\"adminName4\":\"\",\"adminName3\":\"\",\"alternateNames\":[{\"name\":\"https://en.wikipedia.org/wiki/Midwestern_United_States\",\"lang\":\"link\"},{\"isPreferredName\":true,\"name\":\"Midwestern United States\"}],\"adminName2\":\"\",\"name\":\"Midwest\",\"fclName\":\"parks,area, ...\",\"countryName\":\"United States\",\"fcodeName\":\"region\",\"adminName1\":\"\"}",
"headers": {
"Date": "Mon, 18 May 2020 14:04:37 GMT",
"Server": "Apache/2.4.6 (CentOS) mod_jk/1.2.41 OpenSSL/1.0.1e-fips PHP/5.4.16",
"Cache-Control": "no-cache",
"Access-Control-Allow-Origin": "*",
"Transfer-Encoding": "chunked",
"Content-Type": "application/json;charset=UTF-8"
},
"status_code": 200,
"type": "ok"
}
}
]

0 comments on commit e3fffef

Please sign in to comment.