/
maven_central.rb
51 lines (44 loc) · 1.98 KB
/
maven_central.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# frozen_string_literal: true
class PackageManager::Maven::MavenCentral < PackageManager::Maven::Common
REPOSITORY_SOURCE_NAME = "Maven"
HIDDEN = true
def self.repository_base
"https://repo1.maven.org/maven2"
end
def self.recent_names
get("https://maven.libraries.io/mavenCentral/recent")
end
def self.missing_version_remover
PackageManager::Base::MissingVersionRemover
end
# Attempt to scrape MavenCentral's index HTML to infer the latest version.
def self.latest_version_scraped(name)
get_html(MavenUrl.from_name(name, repository_base, NAME_DELIMITER).base)
.css("#contents a") # scrape the list of file/folders
.map(&:text) # get each innerText
.select { |text| text.end_with?("/") } # only look at folders
.map { |folder| folder.chomp("/") } # remove folder trailing slash
.grep(/^\d+.\d/) # only folders that look like versions
.max_by do |text|
# Maven versions range from 1 to many "." and may not be valid SemVer. Use the more forgiving Gem::Version to sort
Gem::Version.new(text)
rescue ArgumentError
Bugsnag.notify("Couldn't find scraped HTML version for #{name}. Check the HTML and ensure scraping still works.")
nil
end
end
# maven-metadata.xml for Maven Central does not appear to be guaranteed to contain all relevant versions for a package
# So instead, if needed, we will retrieve the versions from the raw HTML index page
def self.versions(raw_project, name)
if raw_project && raw_project[:versions]
raw_project[:versions]
else
retrieve_versions(versions_from_html(name), name)
end
end
def self.versions_from_html(name)
get_html(MavenUrl.from_name(name, repository_base, NAME_DELIMITER).base).css("a").filter_map do |a|
a.text.chomp("/") if a.text.ends_with?("/") && a.text != "../"
end
end
end