Skip to content

Commit

Permalink
GeoNames authority
Browse files Browse the repository at this point in the history
  • Loading branch information
kdid committed May 15, 2020
1 parent 1252625 commit a82b5c5
Show file tree
Hide file tree
Showing 32 changed files with 601 additions and 185 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ erl_crash.dump
# Ignore package tarball (built via "mix hex.build").
authoritex-*.tar

/.elixir_ls/

1 change: 1 addition & 0 deletions config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use Mix.Config

config :authoritex,
authorities: [
Authoritex.GeoNames,
Authoritex.Getty.AAT,
Authoritex.Getty.TGN,
Authoritex.Getty.ULAN,
Expand Down
5 changes: 4 additions & 1 deletion config/test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@ use Mix.Config

config :exvcr,
vcr_cassette_library_dir: "test/fixtures/vcr_cassettes",
custom_cassette_library_dir: "test/fixtures/custom_cassettes"
custom_cassette_library_dir: "test/fixtures/custom_cassettes",
filter_sensitive_data: [
[pattern: "username=([^&#]*)", placeholder: "<<geonames_username>>"]
]
4 changes: 4 additions & 0 deletions lib/authoritex.ex
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
defmodule Authoritex do
@moduledoc "Elixir authority lookup behavior"

@type fetch_result :: %{
id: String.t(),
label: String.t() | nil,
Expand All @@ -12,6 +14,8 @@ defmodule Authoritex do
@callback fetch(String.t()) :: {:ok, :fetch_result} | {:error, term()}
@callback search(String.t(), integer()) :: {:ok, list(:search_result)} | {:error, term()}

@authorities [Authoritex.LCNAF, Authoritex.GeoNames]

@doc """
Returns a label given an id.
Expand Down
157 changes: 157 additions & 0 deletions lib/authoritex/geonames.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
defmodule Authoritex.GeoNames do
@moduledoc "Authoritex implementation for GeoNames webservice"
@behaviour Authoritex

@http_uri_base "https://sws.geonames.org/"

@username System.get_env("GEONAMES_USERNAME") ||
Application.get_env(:authoritex, :geonames_username)

@error_codes %{
"10" => "Authorization Exception",
"11" => "record does not exist",
"12" => "other error",
"13" => "database timeout",
"14" => "invalid parameter",
"15" => "no result found",
"16" => "duplicate exception",
"17" => "postal code not found",
"18" => "daily limit of credits exceeded",
"19" => "hourly limit of credits exceeded",
"20" => "weekly limit of credits exceeded",
"21" => "invalid input",
"22" => "server overloaded exception",
"23" => "service not implemented",
"24" => "radius too large",
"27" => "maxRows too large"
}

@impl Authoritex
def can_resolve?(@http_uri_base <> _), do: true
def can_resolve?(_), do: false

@impl Authoritex
def code, do: "geonames"

@impl Authoritex
def description, do: "GeoNames geographical database"

@impl Authoritex
def fetch(id) do
@http_uri_base <> geoname_id = id

case HTTPoison.get(
"http://api.geonames.org/getJSON",
[{"User-Agent", "Authoritex"}],
params: [
geonameId: geoname_id,
username: @username
]
) do
{:ok, %{body: response, status_code: 200}} ->
parse_fetch_result(response)

{:ok, %{body: response, status_code: status_code}} ->
{:error, parse_geonames_error(response, status_code)}

{:error, error} ->
{:error, error}
end
end

@impl Authoritex
def search(query, max_results \\ 30) do
case HTTPoison.get(
"http://api.geonames.org/searchJSON",
[{"User-Agent", "Authoritex"}],
params: [
q: query,
username: @username,
maxRows: max_results
]
) do
{:ok, %{body: response, status_code: 200}} ->
{:ok, parse_search_result(response)}

{:ok, %{body: response, status_code: status_code}} ->
{:error, parse_geonames_error(response, status_code)}

{:error, error} ->
{:error, error}
end
end

defp parse_search_result(response) do
response
|> Jason.decode!()
|> Map.get("geonames")
|> Enum.map(fn result ->
%{
id: @http_uri_base <> to_string(result["geonameId"]),
label: result["name"],
hint: parse_hint(result["adminName1"], result["countryName"])
}
end)
end

defp parse_fetch_result(%{"status" => %{"message" => message, "value" => error_code}}) do
{:error, "#{error_description(to_string(error_code))} (#{to_string(error_code)}). #{message}"}
end

defp parse_fetch_result(%{
"geonameId" => geoname_id,
"name" => name,
"adminName1" => adminName1,
"countryName" => countryName
}) do
hint = parse_hint(adminName1, countryName)

{:ok,
Enum.into(
[
id: @http_uri_base <> to_string(geoname_id),
label: name,
hint: hint,
qualified_label: Enum.join(Enum.filter([name, hint], & &1), ", ")
],
%{}
)}
end

defp parse_fetch_result(response) do
case Jason.decode(response) do
{:ok, response} ->
parse_fetch_result(response)

{:error, error} ->
{:error, {:bad_response, error}}
end
end

defp parse_geonames_error(response, status_code) do
case Jason.decode(response) do
{:ok, %{"status" => %{"value" => 11}}} ->
status_code

{:ok, %{"status" => %{"message" => message, "value" => error_code}}} ->
"Status #{status_code}: #{error_description(to_string(error_code))} (#{
to_string(error_code)
}). #{message}"

{:error, error} ->
{:bad_response, error}
end
end

defp parse_hint(admin_name, country_name) do
parse_hint(Enum.join(Enum.reject([admin_name, country_name], &(&1 == "")), ", "))
end

defp parse_hint(""), do: nil
defp parse_hint(hint), do: hint

defp error_description(code) do
@error_codes
|> Map.get(code)
end
end
43 changes: 43 additions & 0 deletions test/authoritex/geonames_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
defmodule Authoritex.GeoNamesTest do
alias Authoritex.GeoNames

use Authoritex.TestCase,
module: Authoritex.GeoNames,
code: "geonames",
description: "GeoNames geographical database",
test_uris: [
"https://sws.geonames.org/4302561"
],
bad_uri: "https://sws.geonames.org/43025619",
expected: [
hint: "Kentucky, United States",
id: "https://sws.geonames.org/4302561",
label: "Nicholasville",
qualified_label: "Nicholasville, Kentucky, United States"
],
search_result_term: "Kentucky",
search_count_term: "Kentucky"

describe "errors" do
test "fetch" do
use_cassette "geonames_500", match_requests_on: [:query], custom: true do
assert GeoNames.fetch("https://sws.geonames.org/4560349") ==
{:error, "Status 500: server overloaded exception (22). Internal Server Error."}
end
end

test "search" do
use_cassette "geonames_500", match_requests_on: [:query], custom: true do
assert GeoNames.search("Authority Down") ==
{:error, "Status 500: server overloaded exception (22). Internal Server Error."}
end
end

test "GeoNames invlid parameter response" do
use_cassette "geonames_invalid_parameter", match_requests_on: [:query] do
assert GeoNames.fetch("https://sws.geonames.org/wrong") ==
{:error, "invalid parameter (14). For input string: \"wrong\""}
end
end
end
end
40 changes: 40 additions & 0 deletions test/fixtures/custom_cassettes/geonames_500.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
[
{
"request": {
"body": "",
"headers": {
"User-Agent": "Authoritex"
},
"method": "get",
"options": [],
"request_body": "",
"url": "http://api.geonames.org/getJSON?geonameId=4560349&<<geonames_username>>"
},
"response": {
"status_code": 500,
"headers": {
"Content-Type": "application/json;charset=UTF-8"
},
"body": "{\"status\":{\"message\":\"Internal Server Error.\",\"value\":22}}"
}
},
{
"request": {
"body": "",
"headers": {
"User-Agent": "Authoritex"
},
"method": "get",
"options": [],
"request_body": "",
"url": "http://api.geonames.org/searchJSON?q=Authority+Down&<<geonames_username>>&maxRows=30"
},
"response": {
"status_code": 500,
"headers": {
"Content-Type": "application/json;charset=UTF-8"
},
"body": "{\"status\":{\"message\":\"Internal Server Error.\",\"value\":22}}"
}
}
]
12 changes: 6 additions & 6 deletions test/fixtures/vcr_cassettes/authoritex_fetch_failure.json

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions test/fixtures/vcr_cassettes/authoritex_fetch_success.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,26 @@
"binary": false,
"body": "<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n <madsrdf:PersonalName rdf:about=\"http://id.loc.gov/authorities/names/no2011087251\" xmlns:madsrdf=\"http://www.loc.gov/mads/rdf/v1#\">\n <rdf:type rdf:resource=\"http://www.loc.gov/mads/rdf/v1#Authority\"/>\n <madsrdf:authoritativeLabel>Valim, Jose</madsrdf:authoritativeLabel>\n <madsrdf:elementList rdf:parseType=\"Collection\">\n <madsrdf:FullNameElement>\n\t<madsrdf:elementValue>Valim, Jose</madsrdf:elementValue>\n </madsrdf:FullNameElement>\n </madsrdf:elementList>\n <madsrdf:isMemberOfMADSCollection rdf:resource=\"http://id.loc.gov/authorities/names/collection_NamesAuthorizedHeadings\"/>\n <madsrdf:isMemberOfMADSCollection rdf:resource=\"http://id.loc.gov/authorities/names/collection_LCNAF\"/>\n <madsrdf:hasExactExternalAuthority rdf:resource=\"http://viaf.org/viaf/sourceID/LC%7Cno2011087251#skos:Concept\"/>\n <madsrdf:identifiesRWO>\n <madsrdf:RWO rdf:about=\"http://id.loc.gov/rwo/agents/no2011087251\">\n\t<rdf:type rdf:resource=\"http://id.loc.gov/ontologies/bibframe/Person\"/>\n\t<rdf:type rdf:resource=\"http://xmlns.com/foaf/0.1/Person\"/>\n\t<rdfs:label xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\">Valim, Jose</rdfs:label>\n\t<bflc:contributorTo xmlns:bflc=\"http://id.loc.gov/ontologies/bflc/\">\n\t <bf:Work rdf:about=\"http://id.loc.gov/resources/works/17032688\" xmlns:bf=\"http://id.loc.gov/ontologies/bibframe/\">\n\t <bflc:aap>Valim, Jose. Crafting rails applications :</bflc:aap>\n\t </bf:Work>\n\t</bflc:contributorTo>\n </madsrdf:RWO>\n </madsrdf:identifiesRWO>\n <madsrdf:isMemberOfMADSScheme rdf:resource=\"http://id.loc.gov/authorities/names\"/>\n <identifiers:lccn xmlns:identifiers=\"http://id.loc.gov/vocabulary/identifiers/\">no2011087251</identifiers:lccn>\n <identifiers:local xmlns:identifiers=\"http://id.loc.gov/vocabulary/identifiers/\">(OCoLC)oca08877947</identifiers:local>\n <madsrdf:hasSource>\n <madsrdf:Source>\n\t<madsrdf:citation-source>Crafting Rails applications, c2011:</madsrdf:citation-source>\n\t<madsrdf:citation-note>t.p. (Jose Valim)</madsrdf:citation-note>\n\t<madsrdf:citation-status>found</madsrdf:citation-status>\n </madsrdf:Source>\n </madsrdf:hasSource>\n <madsrdf:adminMetadata>\n <ri:RecordInfo xmlns:ri=\"http://id.loc.gov/ontologies/RecordInfo#\">\n\t<ri:recordChangeDate rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2011-06-06T00:00:00</ri:recordChangeDate>\n\t<ri:recordStatus rdf:datatype=\"http://www.w3.org/2001/XMLSchema#string\">new</ri:recordStatus>\n\t<ri:recordContentSource rdf:resource=\"http://id.loc.gov/vocabulary/organizations/oco\"/>\n\t<ri:languageOfCataloging rdf:resource=\"http://id.loc.gov/vocabulary/iso639-2/eng\"/>\n </ri:RecordInfo>\n </madsrdf:adminMetadata>\n <madsrdf:adminMetadata>\n <ri:RecordInfo xmlns:ri=\"http://id.loc.gov/ontologies/RecordInfo#\">\n\t<ri:recordChangeDate rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2011-06-07T06:21:47</ri:recordChangeDate>\n\t<ri:recordStatus rdf:datatype=\"http://www.w3.org/2001/XMLSchema#string\">revised</ri:recordStatus>\n\t<ri:recordContentSource rdf:resource=\"http://id.loc.gov/vocabulary/organizations/oco\"/>\n\t<ri:languageOfCataloging rdf:resource=\"http://id.loc.gov/vocabulary/iso639-2/eng\"/>\n </ri:RecordInfo>\n </madsrdf:adminMetadata>\n <rdf:type rdf:resource=\"http://www.w3.org/2004/02/skos/core#Concept\"/>\n <skos:prefLabel xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\">Valim, Jose</skos:prefLabel>\n <skos:exactMatch rdf:resource=\"http://viaf.org/viaf/sourceID/LC%7Cno2011087251#skos:Concept\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\"/>\n <skos:inScheme rdf:resource=\"http://id.loc.gov/authorities/names\" xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\"/>\n <skos:changeNote xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\">\n <cs:ChangeSet xmlns:cs=\"http://purl.org/vocab/changeset/schema#\">\n\t<cs:subjectOfChange rdf:resource=\"http://id.loc.gov/authorities/names/no2011087251\"/>\n\t<cs:creatorName rdf:resource=\"http://id.loc.gov/vocabulary/organizations/oco\"/>\n\t<cs:createdDate rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2011-06-06T00:00:00</cs:createdDate>\n\t<cs:changeReason rdf:datatype=\"http://www.w3.org/2001/XMLSchema#string\">new</cs:changeReason>\n </cs:ChangeSet>\n </skos:changeNote>\n <skos:changeNote xmlns:skos=\"http://www.w3.org/2004/02/skos/core#\">\n <cs:ChangeSet xmlns:cs=\"http://purl.org/vocab/changeset/schema#\">\n\t<cs:subjectOfChange rdf:resource=\"http://id.loc.gov/authorities/names/no2011087251\"/>\n\t<cs:creatorName rdf:resource=\"http://id.loc.gov/vocabulary/organizations/oco\"/>\n\t<cs:createdDate rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2011-06-07T06:21:47</cs:createdDate>\n\t<cs:changeReason rdf:datatype=\"http://www.w3.org/2001/XMLSchema#string\">revised</cs:changeReason>\n </cs:ChangeSet>\n </skos:changeNote>\n </madsrdf:PersonalName>\n</rdf:RDF>\n",
"headers": {
"Date": "Tue, 12 May 2020 18:07:24 GMT",
"Date": "Thu, 14 May 2020 16:06:52 GMT",
"Content-Type": "application/rdf+xml; charset=UTF-8",
"Content-Length": "4907",
"Connection": "keep-alive",
"Set-Cookie": "__cfduid=df3defcb6a0081a964fa2f4947c0ef3731589306844; expires=Thu, 11-Jun-20 18:07:24 GMT; path=/; domain=.loc.gov; HttpOnly; SameSite=Lax",
"Set-Cookie": "__cfduid=d34c647aa87ca1d940ee9ea15cfbfb90c1589472412; expires=Sat, 13-Jun-20 16:06:52 GMT; path=/; domain=.loc.gov; HttpOnly; SameSite=Lax",
"X-RWO-URI": "http://id.loc.gov/rwo/agents/no2011087251",
"ETag": "4e8d8eb3f55d0911a9162daeb11be049",
"X-URI": "http://id.loc.gov/authorities/names/no2011087251",
"cache-control": "public, max-age=2419200",
"X-Varnish": "457197331 445456501",
"Age": "341955",
"X-Varnish": "469373373 463002114",
"Age": "255660",
"Via": "1.1 varnish-v4",
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "HEAD, POST, GET, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type, Access-Control-Allow-Headers, Authorization, X-Requested-With",
"Accept-Ranges": "bytes",
"CF-Cache-Status": "DYNAMIC",
"Server": "cloudflare",
"CF-RAY": "59261443fb1371b9-ORD",
"cf-request-id": "02abaafe7c000071b9c9b23200000001"
"CF-RAY": "5935de71ea72817c-ORD",
"cf-request-id": "02b5895b2f0000817cf93a8200000001"
},
"status_code": 200,
"type": "ok"
Expand Down
38 changes: 19 additions & 19 deletions test/fixtures/vcr_cassettes/authoritex_search_results.json

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions test/fixtures/vcr_cassettes/authoritex_search_results_empty.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,23 @@
"binary": false,
"body": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<feed xmlns=\"http://www.w3.org/2005/Atom\">\n<title>Library of Congress Authorities and Vocabulary Service: Search Results</title><link href=\"http://id.loc.gov/search/?q=NO_resulteeeees scheme:http://id.loc.gov/authorities/names&amp;start=1&amp;format=atom\" rel=\"self\"/><id>info:lc/search/?q=NO_resulteeeees+scheme:http://id.loc.gov/authorities/names</id><updated>2020-05-11T15:48:28.393794-04:00</updated><opensearch:totalResults xmlns:opensearch=\"http://a9.com/-/spec/opensearch/1.1/\">0</opensearch:totalResults><opensearch:startIndex xmlns:opensearch=\"http://a9.com/-/spec/opensearch/1.1/\">1</opensearch:startIndex><opensearch:itemsPerPage xmlns:opensearch=\"http://a9.com/-/spec/opensearch/1.1/\">30</opensearch:itemsPerPage>\n</feed>\n",
"headers": {
"Date": "Tue, 12 May 2020 18:07:25 GMT",
"Date": "Thu, 14 May 2020 16:06:50 GMT",
"Content-Type": "application/atom+xml; charset=UTF-8",
"Content-Length": "773",
"Connection": "keep-alive",
"Set-Cookie": "__cfduid=df6cfe2b3a8da33c387f37ba46b963f2b1589306845; expires=Thu, 11-Jun-20 18:07:25 GMT; path=/; domain=.loc.gov; HttpOnly; SameSite=Lax",
"Set-Cookie": "__cfduid=dacc51eb29381888cc6d54b4a61ca71051589472410; expires=Sat, 13-Jun-20 16:06:50 GMT; path=/; domain=.loc.gov; HttpOnly; SameSite=Lax",
"cache-control": "public, max-age=2419200",
"X-Varnish": "465850648 465993753",
"Age": "80336",
"X-Varnish": "468543935 465993753",
"Age": "245901",
"Via": "1.1 varnish-v4",
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "HEAD, POST, GET, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type, Access-Control-Allow-Headers, Authorization, X-Requested-With",
"Accept-Ranges": "bytes",
"CF-Cache-Status": "DYNAMIC",
"Server": "cloudflare",
"CF-RAY": "592614478d9b71b9-ORD",
"cf-request-id": "02abab00b8000071b9c9b45200000001"
"CF-RAY": "5935de644895817c-ORD",
"cf-request-id": "02b58952af0000817cf9283200000001"
},
"status_code": 200,
"type": "ok"
Expand Down
28 changes: 28 additions & 0 deletions test/fixtures/vcr_cassettes/geonames_fetch_failure.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[
{
"request": {
"body": "",
"headers": {
"User-Agent": "Authoritex"
},
"method": "get",
"options": [],
"request_body": "",
"url": "http://api.geonames.org/getJSON?geonameId=43025619&<<geonames_username>>"
},
"response": {
"binary": false,
"body": "{\"status\":{\"message\":\"the geoname feature does not exist.\",\"value\":11}}",
"headers": {
"Date": "Fri, 15 May 2020 19:28:44 GMT",
"Server": "Apache/2.4.6 (CentOS) mod_jk/1.2.46 OpenSSL/1.0.2k-fips",
"Cache-Control": "no-cache",
"Access-Control-Allow-Origin": "*",
"Content-Length": "71",
"Content-Type": "application/json;charset=UTF-8"
},
"status_code": 404,
"type": "ok"
}
}
]

0 comments on commit a82b5c5

Please sign in to comment.