Skip to content

Commit

Permalink
added a bunch of new ones, tests probably wont work, need to fix those
Browse files Browse the repository at this point in the history
  • Loading branch information
sckott committed Nov 20, 2017
1 parent c48be28 commit a564a1b
Show file tree
Hide file tree
Showing 11 changed files with 292 additions and 0 deletions.
54 changes: 54 additions & 0 deletions ignore/ssrn.rb
@@ -0,0 +1,54 @@
require "test/unit"
require "multi_json"
require "faraday"
require "faraday_middleware"
require "faraday-cookie_jar"

class TestSSRN < Test::Unit::TestCase

def setup
@doi = "10.2139/ssrn.460001"
@ssrn = MultiJson.load(File.open('src/ssrn.json'))
end

def test_ssrn_keys
assert_equal(
@ssrn.keys().sort(),
["components", "cookies","crossref_member", "journals",
"open_access", "prefixes", "publisher",
"publisher_member", "publisher_parent", "regex", "urls"]
)
assert_not_nil(@ssrn['urls'])
assert_nil(@ssrn['journals'])
end

def test_ssrn_pdf
# scrape to get PDF URL first
conn = Faraday.new(:url => "https://doi.org/" + @doi) do |f|
f.use :cookie_jar
f.use Faraday::Response::Logger, Logger.new('faraday.log')
f.adapter Faraday.default_adapter
f.use FaradayMiddleware::FollowRedirects, limit: 3
end

res = conn.get
res.body

# then get pdf
conn = Faraday.new(:url => @ssrn['urls']['pdf'] % @doi.match(@ssrn['components']['doi']['regex']).to_s) do |f|
f.use :cookie_jar
f.adapter Faraday.default_adapter
end

res = conn.get do |f|
f.use :cookie_jar
f.adapter Faraday.default_adapter
end
assert_equal(Faraday::Response, res.class)
assert_equal(String, res.body.class)
end

end

# curl -c ssrncookies.txt 'http://ssrnoa.tandfonline.com/doi/pdf/10.1080/23312041.2015.1085296'
# curl -b ssrncookies.txt 'http://ssrnoa.tandfonline.com/doi/pdf/10.1080/23312041.2015.1085296'
22 changes: 22 additions & 0 deletions src/american_physical_society.json
@@ -0,0 +1,22 @@
{
"publisher": "american_physical_society",
"publisher_parent": null,
"crossref_member": 16,
"prefixes": [
"10.1103"
],
"urls": {
"pdf": "http://harvest.aps.org/v2/journals/articles/%s/fulltext"
},
"components": {
"html": null,
"doi": {
"regex": "[0-9]{5}$"
}
},
"cookies": false,
"regex": null,
"open_access": false,
"journals": null,
"notes": null
}
1 change: 1 addition & 0 deletions src/cogent.json
@@ -1,6 +1,7 @@
{
"publisher": "cogent",
"publisher_parent": "informa",
"publisher_member": 301,
"crossref_member": null,
"prefixes": ["10.1080"],
"urls": {
Expand Down
47 changes: 47 additions & 0 deletions src/elsevier.json
@@ -0,0 +1,47 @@
{
"publisher": "elsevier",
"publisher_parent": null,
"crossref_member": 78,
"prefixes": [
"10.7424",
"10.14219",
"10.7811",
"10.1580",
"10.1533",
"10.1529",
"10.3816",
"10.1602",
"10.3921",
"10.1240",
"10.1205",
"10.4065",
"10.1197",
"10.1157",
"10.1383",
"10.1367",
"10.2353",
"10.2111",
"10.2139",
"10.1006",
"10.1016",
"10.1054",
"10.1053",
"10.1067",
"10.1078",
"10.3182"
],
"urls": {
"xml": "http://api.elsevier.com/content/article/PII:%s?httpAccept=text/xml",
"plain": "http://api.elsevier.com/content/article/PII:%s?httpAccept=text/plain"
},
"components": {
"html": null,
"doi": null,
"id": "get `alternative-id` from Crossref API"
},
"cookies": false,
"regex": null,
"open_access": false,
"journals": null,
"notes": "need a different internal ID - get `alternative-id` from Crossref API"
}
24 changes: 24 additions & 0 deletions src/emerald.json
@@ -0,0 +1,24 @@
{
"publisher": "emerald",
"publisher_parent": null,
"crossref_member": 140,
"prefixes": [
"10.1108",
"10.5042"
],
"urls": {
"html": "http://www.emeraldinsight.com/doi/full/%s",
"pdf": "http://www.emeraldinsight.com/doi/pdfplus/%s"
},
"components": {
"html": null,
"doi": {
"regex": ".+"
}
},
"cookies": false,
"regex": null,
"open_access": false,
"journals": null,
"notes": "Crossref link gives URL for html, but in pubpatternsapi just construct by hand"
}
31 changes: 31 additions & 0 deletions src/f1000.json
@@ -0,0 +1,31 @@
{
"publisher": "f1000",
"publisher_parent": null,
"crossref_member": 4950,
"prefixes": ["10.12688"],
"urls": null,
"components": null,
"cookies": false,
"regex": null,
"open_access": true,
"journals": [
{
"journal": "f1000",
"open_access": true,
"issn": "2050-084X",
"urls": {
"pdf": "https://f1000research.com/articles/6-221/v2/pdf",
"xml": "https://f1000research.com/articles/6-221/v2/xml"
},
"components": {
"html": null,
"doi": {
"regex": "[0-9]{5}$"
}
}
}
]
}

// e.g.
// 10.12688/f1000research.10554.2
22 changes: 22 additions & 0 deletions src/karger.json
@@ -0,0 +1,22 @@
{
"publisher": "karger",
"publisher_parent": null,
"crossref_member": 127,
"prefixes": [
"10.1159"
],
"urls": {
"pdf": "https://www.karger.com/Article/Pdf/%s"
},
"components": {
"html": null,
"doi": {
"regex": "[0-9]{5}$"
}
},
"cookies": false,
"regex": null,
"open_access": true,
"journals": null,
"notes": null
}
23 changes: 23 additions & 0 deletions src/pleiades.json
@@ -0,0 +1,23 @@
{
"publisher": "pleiades",
"publisher_parent": null,
"crossref_member": 137,
"prefixes": [
"10.1108",
"10.5042"
],
"urls": {
"pdf": "https://link.springer.com/content/pdf/%s"
},
"components": {
"html": null,
"doi": {
"regex": ".+"
}
},
"cookies": false,
"regex": null,
"open_access": false,
"journals": null,
"notes": "follow redirects so that using a single URL pattern will work for new and old URLs"
}
22 changes: 22 additions & 0 deletions src/royal_society_chemistry.json
@@ -0,0 +1,22 @@
{
"publisher": "royal_society_chemistry",
"publisher_parent": null,
"crossref_member": 292,
"prefixes": [
"10.1039"
],
"urls": {
"pdf": "http://pubs.rsc.org/en/content/articlepdf/%s/JA/%s"
},
"components": {
"html": null,
"doi": {
"regex": "[0-9]{5}$"
}
},
"cookies": false,
"regex": null,
"open_access": false,
"journals": null,
"notes": null
}
24 changes: 24 additions & 0 deletions src/ssrn.json
@@ -0,0 +1,24 @@
{
"publisher": "ssrn",
"publisher_parent": "elsevier",
"publisher_member": 78,
"crossref_member": null,
"prefixes": ["10.2139"],
"urls": {
"pdf": "https://papers.ssrn.com/sol3/%s"
},
"components": {
"html": null,
"pdf" : {
"selector": "//a[@id=\"downloadPdf\"]",
"attribute": "href"
},
"doi": {
"regex": ".+"
}
},
"cookies": true,
"regex": null,
"open_access": true,
"journals": null
}
22 changes: 22 additions & 0 deletions src/transtech.json
@@ -0,0 +1,22 @@
{
"publisher": "trans-tech-publications",
"publisher_parent": null,
"crossref_member": 2457,
"prefixes": [
"10.4028"
],
"urls": {
"pdf": "http://%s.pdf"
},
"components": {
"html": null,
"doi": {
"regex": "www.scientific.net.+"
}
},
"cookies": false,
"regex": null,
"open_access": true,
"journals": null,
"notes": null
}

0 comments on commit a564a1b

Please sign in to comment.