forked from openSNP/snpr
/
snps_phenotypes.rb
58 lines (52 loc) · 1.94 KB
/
snps_phenotypes.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
class SnpToPhenotype
include Sidekiq::Worker
sidekiq_options :queue => :snp_phenotype, :retry => 5, :unique => true
attr_reader :snp, :client
def perform(snp_id)
@snp = Snp.find(snp_id)
if snp && valid_snp_names.include?(snp.name) && snp.phenotypes_updated < 31.days.ago
score_pehnotype
end
end
def score_phenotype
# TODO:
# - fetch papers metadata corresponding to the SNP
# - fetch all the stored phenotypes
# - search for each phenotype one by one in the papers' metadata
# - maintain a hash of the phenotypes along with score
# - return entire hash
end
def top_phenotypes(limit)
# - return top phenotypes based on score from the hash
end
def perform_search
# return an array of page-titles
pages = client.list("#{snp.name}(")
snpedia_updated = false
(pages || []).each do |page|
next unless page.include?('(')
url = "http://www.snpedia.com/index.php/#{page}"
# revision returns an int which grows with changes
rev_id = client.revision(page).to_i
snpedia_paper = SnpediaPaper.find_or_initialize_by(url: url)
next if snpedia_paper.persisted? && snpedia_paper.revision == rev_id
to_parse = client.get(page)
next if to_parse.to_s.include?('#REDIRECT')
/summary=(?<summary>.*)\}\}/m =~ to_parse
snpedia_paper.update_attributes!(
url: url, summary: summary, revision: rev_id)
snpedia_paper.snps << snp unless snpedia_paper.snps.include? snp
snpedia_updated = true
end
snp.snpedia_updated! if snpedia_updated
if Rails.env == 'production'
# Increase this value if the following error keeps on showing up
# 'MediaWiki::APIError: API error: code 'internal_api_error_DBConnectionError',
# info 'Exception Caught: DB connection error: Too many connections'
sleep(10)
end
end
def valid_snp_names
Marshal.load(File.read(Rails.root.join('marshalled_snpedia_array')))
end
end