-
Notifications
You must be signed in to change notification settings - Fork 0
/
agent.rb
138 lines (111 loc) · 3.7 KB
/
agent.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# encoding: UTF-8
require 'gmusic/search/result'
require 'gmusic/search/errors'
require 'fileutils'
require 'mechanize'
module Gmusic
module Search
class Agent
class << self
#NOTE not finish
def search(query)
raise InvalidParameter unless query_valid?(query)
query_url = format_url(SEARCH_URL, query)
#TODO should wrapped in begin rescue pair
# retry or raise, make it more robust
page = agent.get(query_url)
info = extract_info_from page
details = collect_details_from page
Result.new(info, details)
end
#NOTE not finish
def download(song, dir=nil)
#TODO
#`get` should be rescue too
#maybe pass a song list and then can retry 3 times
agent.pluggable_parser.default = Mechanize::Download
agent.get(song.link) do |page|
times = 0
begin
file = page.links.last.click
rescue Errno::ETIMEDOUT => e
return false if times > 2
times += 1
retry
end
file.save("#{mkdir dir}/#{song.title}.mp3")
end
true
end
private
def mkdir(dirname)
return FileUtils.mkdir_p("#{Dir.home}/Downloads/gmusic").first unless dirname
return dirname if Dir.exists? dirname
dir = sanitize_dirname dirname
return FileUtils.mkdir_p(dir).first if /\/.*/.match dir
FileUtils.mkdir_p(File.join(Dir.pwd, dir)).first
end
def sanitize_dirname(dirname)
dirname.strip.tap do |dir|
dir.sub!(/^\~/, Dir.home)
dir.gsub!(/[^\.|^\/|^[:word:]]/, '_')
end
end
def collect_details_from(page)
page.search('#song_list tbody').map do |tbody|
id = tbody.attributes['id'].text
title = extract_text_from(tbody, '.Title b')
artist = extract_text_from(tbody, '.Artist a')
#TODO
#make link as an URI object may be better, more OO anyway
link = DOWNLOAD_URL % id
{ title: title, artist: artist, link: link }
end
end
#def collect_links_from(page)
#page.search('#song_list tbody').map do |tbody|
#id = tbody.attributes['id'].text
#title = extract_text_from(tbody, '.Title b')
#url = DOWNLOAD_URL % id
##{ title: title, link: link }
#Link.new(title, url)
#end
#end
def extract_info_from(page)
text = extract_text_from(page, '.topheadline')
pattern = /\((\d+)\)/
figures = text.scan(pattern).flatten.map { |item| item.to_i }
mappings = %w{歌曲 专辑 歌手}.zip(figures)
info = Hash[mappings]
raise NotFound if not_found?(info)
info
end
def extract_text_from(scope, element)
scope.search(element).text
end
#TODO
#set a logger for the agent
#http://mechanize.rubyforge.org/Mechanize.html
def agent
@agent ||= Mechanize.new
end
def query_valid?(hash)
return false if hash.empty?
hash.each_key { |key| return false unless SEARCH_OPTSTIONS.include?(key.to_sym) }
true
end
def not_found?(hash)
hash.each_value { |v| return false if v != 0 }
true
end
def format_url(base_url, hash)
base_url + encode_www_form(hash.values)
end
def encode_www_form(ary)
str = ary * ' '
str.downcase.strip.squeeze(' ').gsub(/\s+/, '+')
end
end
end
end
end