Skip to content

Commit

Permalink
Load all DataONE member nodes as publishers. Closes #374
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Jul 11, 2015
1 parent 6a0db4c commit c498ccc
Show file tree
Hide file tree
Showing 9 changed files with 669 additions and 458 deletions.
1 change: 1 addition & 0 deletions .env.example
Expand Up @@ -46,6 +46,7 @@ CONCURRENCY=25
# sample - sample of 20 works from CrossRef REST API
# member_sample - sample of 20 works from CrossRef REST API for publishers registered in application
# datacite - all works in DataCite metadata index
# dataone - all works in DataONE index
# plos - all PLOS articles
IMPORT=

Expand Down
30 changes: 16 additions & 14 deletions app/models/imports/dataone_import.rb
Expand Up @@ -39,13 +39,27 @@ def parse_data(result)

items = result.fetch('response', {}).fetch('docs', nil)
Array(items).map do |item|
symbol = item.fetch("authoritativeMN", "").split(":").last
publisher = symbol.present? ? Publisher.where(symbol: symbol).first : nil
if publisher.present?
member_id = publisher.member_id
publisher_title = publisher.title
publisher_url = publisher.url
else
member_id = nil
publisher_title = nil
publisher_url = nil
end

id = item.fetch("id", nil)
doi = get_doi_from_id(id)
ark = id.starts_with?("ark:/") ? id.split("/")[0..2].join("/") : nil
if doi.present?
if doi.present? || ark.present?
url = nil
elsif id.starts_with?("http://")
url = get_normalized_url(id)
elsif publisher_url.present?
url = publisher.url % { id: id }
else
url = nil
end
Expand All @@ -61,18 +75,6 @@ def parse_data(result)
year, month, day = date_parts.fetch("date-parts", []).first
title = item.fetch("title", nil)

publisher_title = item.fetch("authoritativeMN", nil)
publisher_name = item.fetch("authoritativeMN", nil)
if publisher_name
member_id = publisher_name.to_i(36)
publisher = Publisher.where(member_id: member_id).first_or_create(
title: publisher_title,
name: publisher_name,
service: "dataone")
else
member_id = nil
end

type = "dataset"
work_type_id = WorkType.where(name: type).pluck(:id).first

Expand All @@ -84,7 +86,7 @@ def parse_data(result)
"type" => type,
"DOI" => doi,
"URL" => url,
"publisher" => publisher
"publisher" => publisher_title
}

{ doi: doi,
Expand Down
11 changes: 11 additions & 0 deletions db/migrate/20150710002032_add_publisher_node_symbol.rb
@@ -0,0 +1,11 @@
class AddPublisherNodeSymbol < ActiveRecord::Migration
def up
add_column :publishers, :symbol, :string
add_column :publishers, :url, :text
end

def down
remove_column :publishers, :symbol
remove_column :publishers, :url
end
end
24 changes: 23 additions & 1 deletion db/schema.rb
Expand Up @@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 20150708003940) do
ActiveRecord::Schema.define(version: 20150710002032) do

create_table "alerts", force: :cascade do |t|
t.integer "source_id", limit: 4
Expand Down Expand Up @@ -78,6 +78,18 @@
add_index "api_responses", ["total"], name: "index_api_responses_on_total", using: :btree
add_index "api_responses", ["unresolved", "id"], name: "index_api_responses_unresolved_id", using: :btree

create_table "data_exports", force: :cascade do |t|
t.string "url", limit: 255
t.string "type", limit: 255
t.datetime "started_exporting_at"
t.datetime "finished_exporting_at"
t.text "data", limit: 65535
t.text "files", limit: 65535
t.datetime "created_at"
t.datetime "updated_at"
t.string "name", limit: 255
end

create_table "data_migrations", force: :cascade do |t|
t.string "version", limit: 191
end
Expand Down Expand Up @@ -160,6 +172,9 @@
t.datetime "cached_at", default: '1970-01-01 00:00:00', null: false
t.string "name", limit: 255, null: false
t.string "service", limit: 255
t.string "node", limit: 255
t.string "symbol", limit: 255
t.text "url", limit: 65535
end

add_index "publishers", ["member_id"], name: "index_publishers_on_member_id", unique: true, using: :btree
Expand Down Expand Up @@ -187,6 +202,13 @@
add_index "relations", ["level", "work_id", "related_work_id"], name: "index_relations_on_level_work_related_work", using: :btree
add_index "relations", ["work_id", "related_work_id"], name: "index_relationships_on_work_id_related_work_id", using: :btree

create_table "report_write_logs", force: :cascade do |t|
t.string "filepath", limit: 255
t.string "report_type", limit: 255
t.datetime "created_at"
t.datetime "updated_at"
end

create_table "reports", force: :cascade do |t|
t.string "name", limit: 255
t.datetime "created_at"
Expand Down
156 changes: 156 additions & 0 deletions db/seeds/publishers.rb
@@ -0,0 +1,156 @@
if ENV["IMPORT"] == "dataone"
# DataONE member nodes
cloebird = Publisher.where(name: 'cloebird').first_or_create(
:title => 'Cornell Lab of Ornithology - eBird',
:service => 'dataone',
:symbol => 'CLOEBIRD',
:member_id => '50001',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
dryad = Publisher.where(name: 'dryad').first_or_create(
:title => 'Dryad Digital Repository',
:service => 'dataone',
:member_id => '50002',
:symbol => 'DRYAD')
edacgstore = Publisher.where(name: 'edacgstore').first_or_create(
:title => 'Earth Data Analysis Center (EDAC)',
:service => 'dataone',
:member_id => '50003',
:symbol => 'EDACGSTORE',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
edora = Publisher.where(name: 'edora').first_or_create(
:title => 'Environmental Data for the Oak Ridge Area',
:service => 'dataone',
:member_id => '50004',
:symbol => 'EDORA',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
esa = Publisher.where(name: 'esa').first_or_create(
:title => 'ESA Data Registry',
:service => 'dataone',
:member_id => '50005',
:symbol => 'ESA',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
lter_europe = Publisher.where(name: 'lter_europe').first_or_create(
:title => 'Europe Long-Term Ecosystem Research Network (LTER Europe)',
:service => 'dataone',
:symbol => 'LTER_EUROPE',
:member_id => '50006',
:url => 'http://data.lter-europe.net/deims/dataset/%{id}')
gleon = Publisher.where(name: 'gleon').first_or_create(
:title => 'Global Lake Ecological Observatory Network (GLEON)',
:service => 'dataone',
:symbol => 'GLEON',
:member_id => '50007',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
goa = Publisher.where(name: 'goa').first_or_create(
:title => 'Gulf of Alaska Data Portal',
:service => 'dataone',
:member_id => '50008',
:symbol => 'GOA')
iarc = Publisher.where(name: 'iarc').first_or_create(
:title => 'International Arctic Research Center (IARC) Data Archive',
:service => 'dataone',
:symbol => 'IARC',
:member_id => '50009',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
knb = Publisher.where(name: 'knb').first_or_create(
:title => 'Knowledge Network for Biocomplexity',
:service => 'dataone',
:member_id => '50010',
:symbol => 'KNB',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
lter = Publisher.where(name: 'lter').first_or_create(
:title => 'LTER Network Member Node',
:service => 'dataone',
:symbol => 'LTER',
:member_id => '50011',
:url => 'https://portal.lternet.edu/nis/mapbrowse?packageid=%{id}')
cdl = Publisher.where(name: 'cdl').first_or_create(
:title => 'Merritt Repository',
:service => 'dataone',
:member_id => '50012',
:symbol => 'CDL')
us_mpc = Publisher.where(name: 'us_mpc').first_or_create(
:title => 'Minnesota Population Center (MPC)',
:service => 'dataone',
:symbol => 'US_MPC',
:member_id => '50013',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
ioe = Publisher.where(name: 'ioe').first_or_create(
:title => 'Montana IoE Data Repository',
:service => 'dataone',
:symbol => 'IOE',
:member_id => '50014',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
nmepscor = Publisher.where(name: 'nmepscor').first_or_create(
:title => 'NM EPSCoR',
:service => 'dataone',
:symbol => 'NMEPSCOR',
:member_id => '50015',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
oneshare = Publisher.where(name: 'oneshare').first_or_create(
:title => 'ONEShare Repository',
:service => 'dataone',
:symbol => 'ONEShare',
:member_id => '50016')
ornldaac = Publisher.where(name: 'ornldaac').first_or_create(
:title => 'ORNL DAAC',
:service => 'dataone',
:symbol => 'ORNLDAAC',
:member_id => '50017',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
pisco = Publisher.where(name: 'pisco').first_or_create(
:title => 'PISCO MN',
:service => 'dataone',
:symbol => 'PISCO',
:member_id => '50018')
rgd = Publisher.where(name: 'rgd').first_or_create(
:title => 'Regional and Global Biogeochemical Dynamics Data (RGD)',
:service => 'dataone',
:symbol => 'RGD',
:member_id => '50019',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
sanparks = Publisher.where(name: 'sanparks').first_or_create(
:title => 'SANParks Data Repository',
:service => 'dataone',
:symbol => 'SANPARKS',
:member_id => '50020',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
sead = Publisher.where(name: 'sead').first_or_create(
:title => 'SEAD Virtual Archive',
:service => 'dataone',
:symbol => 'SEAD',
:member_id => '50021',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
tfri = Publisher.where(name: 'tfri').first_or_create(
:title => 'Taiwan Forestry Research Institute',
:service => 'dataone',
:symbol => 'TFRI',
:member_id => '50022',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
tern = Publisher.where(name: 'tern').first_or_create(
:title => 'Terrestrial Ecosystem Research Network',
:service => 'dataone',
:symbol => 'TERN',
:member_id => '50023')
kubi = Publisher.where(name: 'kubi').first_or_create(
:title => 'University of Kansas - Biodiversity Institute',
:service => 'dataone',
:symbol => 'KUBI',
:member_id => '50024',
:url => 'https://cn.dataone.org/cn/v1/resolve/%{id}')
usanpn = Publisher.where(name: 'usanpn').first_or_create(
:title => 'USA National Phenology Network',
:service => 'dataone',
:symbol => 'USANPN',
:member_id => '50025')
usgscsas = Publisher.where(name: 'usgscsas').first_or_create(
:title => 'USGS Core Sciences Clearinghouse',
:service => 'dataone',
:symbol => 'USGSCSAS',
:member_id => '50026')
nkn = Publisher.where(name: 'nkn').first_or_create(
:title => 'Northwest Knowledge Network',
:service => 'dataone',
:symbol => 'NKN',
:member_id => '50027')
end

0 comments on commit c498ccc

Please sign in to comment.