Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a Class To Pass Around Repository Host Data #3338

Merged
merged 12 commits into from Mar 27, 2024
8 changes: 4 additions & 4 deletions app/models/concerns/github_identity.rb
Expand Up @@ -43,17 +43,17 @@ def update_repo_permissions

update_column(:currently_syncing, true)
download_orgs
r = github_client.repos
r = github_client.repos.map { |repo_data| RepositoryHost::RawUpstreamDataConverter.convert_from_github_api(repo_data) }

current_repo_ids = []

existing_permissions = repository_permissions.all
new_repo_ids = r.map(&:id)
existing_repos = Repository.where(host_type: "GitHub").where(uuid: new_repo_ids).select(:id, :uuid)

r.each do |repo|
unless (github_repo = existing_repos.find { |re| re.uuid.to_s == repo.id.to_s })
github_repo = Repository.host("GitHub").find_by("lower(full_name) = ?", repo.full_name.downcase) || Repository.create_from_hash(repo)
r.each do |repo_data|
unless (github_repo = existing_repos.find { |re| re.uuid.to_s == repo_data.repository_uuid.to_s })
github_repo = Repository.host("GitHub").find_by("lower(full_name) = ?", repo_data.lower_name) || Repository.create_from_data(repo_data)
end
next if github_repo.nil?

Expand Down
32 changes: 17 additions & 15 deletions app/models/repository.rb
Expand Up @@ -211,6 +211,10 @@ def owner
repository_organisation_id.present? ? repository_organisation : repository_user
end

def lower_name
full_name&.downcase
end

def to_s
full_name
end
Expand Down Expand Up @@ -310,23 +314,21 @@ def self.create_from_host(host_type, full_name, token = nil)
RepositoryHost.const_get(host_type.capitalize).create(full_name, token)
end

def self.create_from_hash(repo_hash)
return unless repo_hash

repo_hash = repo_hash.to_hash.with_indifferent_access
def self.create_from_data(repo_host_data)
return unless repo_host_data

ActiveRecord::Base.transaction do
g = Repository.where(host_type: (repo_hash[:host_type] || "GitHub")).find_by(uuid: repo_hash[:id])
g = Repository.host(repo_hash[:host_type] || "GitHub").find_by("lower(full_name) = ?", repo_hash[:full_name].downcase) if g.nil?
g = Repository.new(uuid: repo_hash[:id], full_name: repo_hash[:full_name]) if g.nil?
g.host_type = repo_hash[:host_type] || "GitHub"
g.full_name = repo_hash[:full_name] if g.full_name.downcase != repo_hash[:full_name].downcase
g.uuid = repo_hash[:id] if g.uuid.nil?
g.license = repo_hash[:license][:key] if repo_hash[:license]
g.source_name = (repo_hash[:parent][:full_name] if repo_hash[:fork] && repo_hash[:parent])

g.status = g.correct_status_from_upstream(archived_upstream: repo_hash[:archived])
g.assign_attributes repo_hash.slice(*Repository::API_FIELDS)
g = Repository.where(host_type: (repo_host_data.host_type || "GitHub")).find_by(uuid: repo_host_data.repository_uuid)
g = Repository.host(repo_host_data.host_type || "GitHub").find_by("lower(full_name) = ?", repo_host_data.lower_name) if g.nil?
g = Repository.new(uuid: repo_host_data.repository_uuid, full_name: repo_host_data.full_name) if g.nil?
g.host_type = repo_host_data.host_type || "GitHub"
g.full_name = repo_host_data.full_name if g.lower_name != repo_host_data.lower_name
g.uuid = repo_host_data.repository_uuid if g.uuid.nil?
g.license = repo_host_data.formatted_license if repo_host_data.formatted_license
g.source_name = (repo_host_data.source_name if repo_host_data.source_name.present?)

g.status = g.correct_status_from_upstream(archived_upstream: repo_host_data.archived)
g.assign_attributes repo_host_data.to_repository_attrs

if g.changed?
g.save ? g : nil
Expand Down
19 changes: 8 additions & 11 deletions app/models/repository_host/base.rb
Expand Up @@ -7,7 +7,7 @@ def initialize(repository)
end

def self.create(full_name, token = nil)
Repository.create_from_hash(fetch_repo(full_name, token))
Repository.create_from_data(fetch_repo(full_name, token))
rescue *self::IGNORABLE_EXCEPTIONS
nil
end
Expand Down Expand Up @@ -85,22 +85,19 @@ def repository_owner_class
end

def update_from_host(token = nil)
r = self.class.fetch_repo(repository.id_or_name)
return unless r.present?
repo_data = self.class.fetch_repo(repository.id_or_name)
return unless repo_data.present?

repository.uuid = r[:id] unless repository.uuid.to_s == r[:id].to_s
if repository.full_name.downcase != r[:full_name].downcase
clash = Repository.host(r[:host_type]).where("lower(full_name) = ?", r[:full_name].downcase).first
if repository.lower_name != repo_data.lower_name
clash = Repository.host(repo_data.host_type).where("lower(full_name) = ?", repo_data.lower_name).first
clash.destroy if clash && (!clash.repository_host.update_from_host(token) || clash.status == "Removed")
repository.full_name = r[:full_name]
repository.full_name = repo_data.full_name
end
repository.license = Project.format_license(r[:license][:key]) if r[:license]
repository.source_name = (r[:parent][:full_name] if r[:fork])

# set unmaintained status for the Repository based on if the repository has been archived upstream
# if the Repository already has another status then just leave it alone
repository.status = repository.correct_status_from_upstream(archived_upstream: r[:archived])
repository.assign_attributes r.slice(*Repository::API_FIELDS)
repository.status = repository.correct_status_from_upstream(archived_upstream: repo_data.archived)
repository.assign_attributes(repo_data.to_repository_attrs.slice(*Repository::API_FIELDS))
repository.save! if repository.changed?
rescue self.class.api_missing_error_class
repository.update_attribute(:status, "Removed") unless repository.private?
Expand Down
23 changes: 2 additions & 21 deletions app/models/repository_host/bitbucket.rb
Expand Up @@ -182,27 +182,8 @@ def self.fetch_repo(full_name, token = nil)
client = api_client(token)
user_name, repo_name = full_name.split("/")
project = client.repos.get(user_name, repo_name)
v1_project = client.repos.get(user_name, repo_name, api_version: "1.0")
repo_hash = project.to_hash.with_indifferent_access.slice(:description, :language, :full_name, :name, :has_wiki, :has_issues, :scm)

repo_hash.merge!({
id: project.uuid,
host_type: "Bitbucket",
owner: {},
homepage: project.website,
fork: project.parent.present?,
created_at: project.created_on,
updated_at: project.updated_on,
subscribers_count: v1_project.followers_count,
forks_count: v1_project.forks_count,
default_branch: project.fetch("mainbranch", {}).try(:fetch, "name", nil),
private: project.is_private,
size: project[:size].to_f / 1000,
parent: {
full_name: project.fetch("parent", {}).fetch("full_name", nil),
},
archived: false,
})

RawUpstreamDataConverter.convert_from_bitbucket_api(project)
rescue *IGNORABLE_EXCEPTIONS
nil
end
Expand Down
7 changes: 2 additions & 5 deletions app/models/repository_host/github.rb
Expand Up @@ -59,10 +59,7 @@ def commits_url(author = nil)
def self.fetch_repo(id_or_name, token = nil)
id_or_name = id_or_name.to_i if id_or_name.match(/\A\d+\Z/)
hash = AuthToken.fallback_client(token).repo(id_or_name, accept: "application/vnd.github.drax-preview+json,application/vnd.github.mercy-preview+json").to_hash
hash[:keywords] = hash[:topics]
hash[:host_type] = "GitHub"
hash[:scm] = "git"
hash
RawUpstreamDataConverter.convert_from_github_api(hash)
rescue *IGNORABLE_EXCEPTIONS
nil
end
Expand Down Expand Up @@ -154,7 +151,7 @@ def download_forks(token = nil)
return true if repository.forks_count == repository.forked_repositories.host(repository.host_type).count

AuthToken.new_client(token).forks(repository.full_name).each do |fork|
Repository.create_from_hash(fork)
Repository.create_from_data(RawUpstreamDataConverter.convert_from_github_api(fork))
end
end

Expand Down
24 changes: 3 additions & 21 deletions app/models/repository_host/gitlab.rb
Expand Up @@ -162,27 +162,9 @@ def self.recursive_gitlab_repos(page_number = 1, limit = 5, order = "created_asc
end

def self.fetch_repo(full_name, token = nil)
project = api_client(token).project(full_name)
repo_hash = project.to_hash.with_indifferent_access.slice(:id, :description, :created_at, :name, :open_issues_count, :forks_count, :default_branch, :archived)

repo_hash.merge!({
host_type: "GitLab",
full_name: project.path_with_namespace,
owner: {},
fork: project.try(:forked_from_project).present?,
updated_at: project.last_activity_at,
stargazers_count: project.star_count,
has_issues: project.issues_enabled,
has_wiki: project.wiki_enabled,
scm: "git",
private: project.visibility != "public",
pull_requests_enabled: project.merge_requests_enabled,
logo_url: project.avatar_url,
keywords: project.tag_list,
parent: {
full_name: project.try(:forked_from_project).try(:path_with_namespace),
},
})
project = api_client(token).project(full_name, { license: true })

RawUpstreamDataConverter.convert_from_gitlab_api(project)
rescue *IGNORABLE_EXCEPTIONS
nil
end
Expand Down
49 changes: 49 additions & 0 deletions app/models/repository_host/raw_upstream_data.rb
@@ -0,0 +1,49 @@
# frozen_string_literal: true

# This class is meant to be a facade over the raw upstream data coming
# from the different repository hosts' repository data. It's main goal
# is to standardize the output from each repository host into a concrete
# set of data so we can make sure the raw data is being mapped to the same
# schema within the Libraries.io models and code.
RepositoryHost::RawUpstreamData = Struct.new(
:archived, :default_branch, :description, :fork, :full_name, :has_issues, :has_wiki, :homepage, :host_type,
:keywords, :language, :license, :name, :owner, :parent, :is_private, :repository_uuid, :scm, :repository_size,
keyword_init: true
) do
def to_repository_attrs
attrs = {
default_branch: default_branch,
description: description,
full_name: full_name,
has_issues: has_issues,
has_wiki: has_wiki,
homepage: homepage,
host_type: host_type,
keywords: keywords,
language: language,
license: formatted_license,
name: name,
private: is_private,
scm: scm,
size: repository_size,
uuid: repository_uuid,
}
attrs[:source_name] = source_name if fork

attrs
end

def formatted_license
if license
Project.format_license(license)
end
end

def source_name
parent[:full_name] if fork
end

def lower_name
full_name&.downcase
end
end
79 changes: 79 additions & 0 deletions app/models/repository_host/raw_upstream_data_converter.rb
@@ -0,0 +1,79 @@
# frozen_string_literal: true

class RepositoryHost::RawUpstreamDataConverter
def self.convert_from_github_api(upstream_repository_data_hash)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the error handling get standardized in here too? Saw a few rescue *self::IGNORABLE_EXCEPTIONS nil in the code.

Alternatively, now that this is written out, it looks like there's some pretty tight coupling w/ the api return objects, so the convert_ methods could be moved to their respective methods in the repository_host/.rb files, as there doesn't seem to be a ton that's similar between the convert methods besides the object that's tying everything together, unless stuff like error handling is getting standardized. I was thinking there'd be more similarities between the objects being converted that there could be more extracted out and shared during the conversion process, like defaulting values/data validations/logging/etc.

RepositoryHost::RawUpstreamData.new(
repository_uuid: upstream_repository_data_hash[:id],
archived: upstream_repository_data_hash[:archived],
default_branch: upstream_repository_data_hash[:default_branch],
description: upstream_repository_data_hash[:description],
fork: upstream_repository_data_hash[:fork],
full_name: upstream_repository_data_hash[:full_name],
has_issues: upstream_repository_data_hash[:has_issues],
has_wiki: upstream_repository_data_hash[:has_wiki],
homepage: upstream_repository_data_hash[:homepage],
host_type: "GitHub",
keywords: upstream_repository_data_hash[:topics],
language: upstream_repository_data_hash[:language],
license: upstream_repository_data_hash.dig(:license, :key),
name: upstream_repository_data_hash[:name],
owner: upstream_repository_data_hash[:owner],
parent: upstream_repository_data_hash[:parent],
is_private: upstream_repository_data_hash[:private],
scm: "git",
repository_size: upstream_repository_data_hash[:size]
)
end

def self.convert_from_gitlab_api(api_project)
RepositoryHost::RawUpstreamData.new(
repository_uuid: api_project.id,
description: api_project.description,
name: api_project.name,
default_branch: api_project.default_branch,
archived: api_project.archived,
host_type: "GitLab",
full_name: api_project.path_with_namespace,
owner: {},
fork: api_project.try(:forked_from_project).present?,
has_issues: api_project.issues_enabled,
has_wiki: api_project.wiki_enabled,
scm: "git",
is_private: api_project.visibility != "public",
keywords: api_project.topics,
parent: {
full_name: api_project.try(:forked_from_project).try(:path_with_namespace),
},
homepage: api_project.web_url,
license: api_project.license.key,
repository_size: 0, # locked to admins only?,
language: nil # separate API endpoint that doesn't seem to be supported by the API gem we use
)
end

def self.convert_from_bitbucket_api(api_project)
RepositoryHost::RawUpstreamData.new(
description: api_project.description,
language: api_project.language,
full_name: api_project.full_name,
name: api_project.name,
has_wiki: api_project.has_wiki,
has_issues: api_project.has_issues,
scm: api_project.scm,
repository_uuid: api_project.uuid,
host_type: "Bitbucket",
owner: api_project.owner,
homepage: api_project.website,
fork: api_project.parent.present?,
default_branch: api_project.fetch("mainbranch", {}).try(:fetch, "name", nil),
is_private: api_project.is_private,
repository_size: api_project[:size].to_f / 1000,
parent: {
full_name: api_project.fetch("parent", {}).try(:fetch, "full_name", nil),
},
archived: false,
keywords: [],
license: nil
)
end
end