Skip to content

Commit

Permalink
more generic parsing of authors. #191
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Dec 14, 2014
1 parent b77f448 commit 779f5d5
Show file tree
Hide file tree
Showing 12 changed files with 52 additions and 37 deletions.
27 changes: 27 additions & 0 deletions app/models/concerns/authorable.rb
@@ -0,0 +1,27 @@
module Authorable
extend ActiveSupport::Concern

included do
# parse author string into CSL format
def get_one_author(author, sep = " ", reversed = false)
return "" if author.blank?

name_parts = author.split(sep)
if reversed
family = name_parts.first
given = name_parts.length > 1 ? name_parts[1..-1].join(" ") : ""
else
family = name_parts.last
given = name_parts.length > 1 ? name_parts[0..-2].join(" ") : ""
end

{ "family" => String(family).titleize,
"given" => String(given).titleize }
end

# parse array of author strings into CSL format
def get_authors(authors, sep = " ", reversed = false)
authors.map { |author| get_one_author(author, sep, reversed) }
end
end
end
2 changes: 0 additions & 2 deletions app/models/concerns/dateable.rb
@@ -1,5 +1,3 @@
# encoding: UTF-8

module Dateable
extend ActiveSupport::Concern

Expand Down
18 changes: 4 additions & 14 deletions app/models/source.rb
@@ -1,5 +1,3 @@
# encoding: UTF-8

require 'cgi'
require "addressable/uri"

Expand All @@ -19,7 +17,10 @@ class Source < ActiveRecord::Base
# include CouchDB helpers
include Couchable

# include date methods concern
# include author methods
include Authorable

# include date methods
include Dateable

# include summary counts
Expand Down Expand Up @@ -221,17 +222,6 @@ def get_events_url(work)
end
end

def get_author(author)
return '' if author.blank?

name_parts = author.split(' ')
family = name_parts.last
given = name_parts.length > 1 ? name_parts[0..-2].join(' ') : ''

[{ 'family' => String(family).titleize,
'given' => String(given).titleize }]
end

# fields with publisher-specific settings such as API keys,
# i.e. everything that is not a URL
def publisher_fields
Expand Down
4 changes: 2 additions & 2 deletions app/models/sources/cross_ref.rb
Expand Up @@ -60,7 +60,7 @@ def get_events(result)

# the rest is CSL (citation style language)
event_csl: {
'author' => get_author(item.fetch('contributors', {}).fetch('contributor', [])),
'author' => get_authors(item.fetch('contributors', {}).fetch('contributor', [])),
'title' => String(item.fetch('article_title') { '' }).titleize,
'container-title' => item.fetch('journal_title') { '' },
'issued' => get_date_parts_from_parts(item['year']),
Expand All @@ -70,7 +70,7 @@ def get_events(result)
end.compact
end

def get_author(contributors)
def get_authors(contributors)
contributors = [contributors] if contributors.is_a?(Hash)
contributors.map do |contributor|
{ 'family' => String(contributor['surname']).titleize,
Expand Down
4 changes: 2 additions & 2 deletions app/models/sources/plos_comments.rb
Expand Up @@ -32,8 +32,8 @@ def get_events(result, work)

# the rest is CSL (citation style language)
event_csl: {
'author' => get_author(item['creatorFormattedName']),
'title' => item.fetch('title') { '' },
'author' => get_authors([item.fetch('creatorFormattedName', "")]),
'title' => item.fetch('title', ""),
'container-title' => 'PLOS Comments',
'issued' => get_date_parts(event_time),
'url' => work.doi_as_url,
Expand Down
6 changes: 3 additions & 3 deletions app/models/sources/pmc_europe_data.rb
Expand Up @@ -42,9 +42,9 @@ def get_events(result)

# the rest is CSL (citation style language)
event_csl: {
'author' => get_author(item['authorString']),
'title' => item.fetch('title') { '' },
'container-title' => item.fetch('journalTitle') { '' },
'author' => get_authors([item.fetch('authorString', "")]),
'title' => item.fetch('title', ""),
'container-title' => item.fetch('journalTitle', ""),
'issued' => get_date_parts_from_parts((item['pubYear']).to_i),
'url' => url,
'type' => 'article-journal' }
Expand Down
4 changes: 2 additions & 2 deletions app/models/sources/reddit.rb
Expand Up @@ -32,8 +32,8 @@ def get_events(result)

# the rest is CSL (citation style language)
event_csl: {
'author' => get_author(data['author']),
'title' => data.fetch('title') { '' },
'author' => get_authors([data.fetch('author', "")]),
'title' => data.fetch('title', ""),
'container-title' => 'Reddit',
'issued' => get_date_parts(event_time),
'url' => url,
Expand Down
6 changes: 3 additions & 3 deletions app/models/sources/researchblogging.rb
Expand Up @@ -18,9 +18,9 @@ def get_events(result)

# the rest is CSL (citation style language)
event_csl: {
'author' => get_author(item['blogger_name']),
'title' => item.fetch('post_title') { '' },
'container-title' => item.fetch('blog_name') { '' },
'author' => get_authors([item.fetch('blogger_name', "")]),
'title' => item.fetch('post_title', ""),
'container-title' => item.fetch('blog_name', ""),
'issued' => get_date_parts(event_time),
'url' => url,
'type' => 'post'
Expand Down
6 changes: 3 additions & 3 deletions app/models/sources/science_seeker.rb
Expand Up @@ -19,9 +19,9 @@ def get_events(result)

# the rest is CSL (citation style language)
event_csl: {
'author' => get_author(item.deep_fetch('author', 'name') { '' }),
'title' => item.fetch('title') { '' },
'container-title' => item.deep_fetch('source', 'title') { '' },
'author' => get_authors([item.fetch('author', {}).fetch('name', "")]),
'title' => item.fetch('title', ""),
'container-title' => item.fetch('source', {}).fetch('title', ""),
'issued' => get_date_parts(event_time),
'url' => url,
'type' => 'post'
Expand Down
6 changes: 3 additions & 3 deletions app/models/sources/twitter.rb
Expand Up @@ -25,8 +25,8 @@ def get_events(result)
event_time = get_iso8601_from_time(data['created_at'])
url = "http://twitter.com/#{user}/status/#{data['id_str']}"

{ event: { id: data["id_str"],
text: data["text"],
{ event: { id: data.fetch("id_str", nil),
text: data.fetch("text", nil),
created_at: event_time,
user: user,
user_name: user_name,
Expand All @@ -36,7 +36,7 @@ def get_events(result)

# the rest is CSL (citation style language)
event_csl: {
'author' => get_author(user_name),
'author' => get_authors([user_name]),
'title' => data.fetch('text') { '' },
'container-title' => 'Twitter',
'issued' => get_date_parts(event_time),
Expand Down
4 changes: 2 additions & 2 deletions app/models/sources/twitter_search.rb
Expand Up @@ -58,8 +58,8 @@ def get_events(result)

# the rest is CSL (citation style language)
event_csl: {
'author' => get_author(user_name),
'title' => item.fetch('text') { '' },
'author' => get_authors([user_name]),
'title' => item.fetch('text', ""),
'container-title' => 'Twitter',
'issued' => get_date_parts(event_time),
'url' => url,
Expand Down
2 changes: 1 addition & 1 deletion app/models/sources/wordpress.rb
Expand Up @@ -13,7 +13,7 @@ def get_events(result)

# the rest is CSL (citation style language)
event_csl: {
'author' => get_author(item['author']),
'author' => get_authors([item.fetch('author', "")]),
'title' => item.fetch('title') { '' },
'container-title' => '',
'issued' => get_date_parts(event_time),
Expand Down

0 comments on commit 779f5d5

Please sign in to comment.