From a05f64df70baf76dd14f0eb4c9d9fc461f02ee37 Mon Sep 17 00:00:00 2001 From: Mike Young Date: Mon, 25 Mar 2024 10:28:46 -0400 Subject: [PATCH 01/12] use a class to pass around repository host data --- app/models/repository.rb | 24 ++- app/models/repository_host/base.rb | 20 +- app/models/repository_host/bitbucket.rb | 23 +-- .../bitbucket_repository_host_data_factory.rb | 31 ++++ app/models/repository_host/github.rb | 5 +- .../github_repository_host_data_factory.rb | 29 +++ app/models/repository_host/gitlab.rb | 24 +-- .../gitlab_repository_host_data_factory.rb | 31 ++++ .../repository_host/repository_host_data.rb | 89 +++++++++ .../vcr_cassettes/bitbucket/node_iostat.yml | 174 ++++++++++++++++++ spec/fixtures/vcr_cassettes/gitlab/ase.yml | 84 +++++++++ .../google-maven/memory-advice.yml | 116 ++++++++++++ spec/models/repository_host/bitbucket_spec.rb | 16 ++ spec/models/repository_host/gitlab_spec.rb | 10 +- 14 files changed, 606 insertions(+), 70 deletions(-) create mode 100644 app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb create mode 100644 app/models/repository_host/github/github_repository_host_data_factory.rb create mode 100644 app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb create mode 100644 app/models/repository_host/repository_host_data.rb create mode 100644 spec/fixtures/vcr_cassettes/bitbucket/node_iostat.yml create mode 100644 spec/fixtures/vcr_cassettes/gitlab/ase.yml create mode 100644 spec/models/repository_host/bitbucket_spec.rb diff --git a/app/models/repository.rb b/app/models/repository.rb index 45c9c5fc3..36ad8742b 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -310,23 +310,21 @@ def self.create_from_host(host_type, full_name, token = nil) RepositoryHost.const_get(host_type.capitalize).create(full_name, token) end - def self.create_from_hash(repo_hash) - return unless repo_hash - - repo_hash = repo_hash.to_hash.with_indifferent_access + def self.create_from_hash(repo_host_data) + return unless repo_host_data ActiveRecord::Base.transaction do - g = Repository.where(host_type: (repo_hash[:host_type] || "GitHub")).find_by(uuid: repo_hash[:id]) - g = Repository.host(repo_hash[:host_type] || "GitHub").find_by("lower(full_name) = ?", repo_hash[:full_name].downcase) if g.nil? - g = Repository.new(uuid: repo_hash[:id], full_name: repo_hash[:full_name]) if g.nil? - g.host_type = repo_hash[:host_type] || "GitHub" - g.full_name = repo_hash[:full_name] if g.full_name.downcase != repo_hash[:full_name].downcase - g.uuid = repo_hash[:id] if g.uuid.nil? - g.license = repo_hash[:license][:key] if repo_hash[:license] - g.source_name = (repo_hash[:parent][:full_name] if repo_hash[:fork] && repo_hash[:parent]) + g = Repository.where(host_type: (repo_host_data.host_type || "GitHub")).find_by(uuid: repo_host_data.repository_uuid) + g = Repository.host(repo_host_data.host_type || "GitHub").find_by("lower(full_name) = ?", repo_host_data.full_name.downcase) if g.nil? + g = Repository.new(uuid: repo_host_data.repository_uuid, full_name: repo_host_data.full_name) if g.nil? + g.host_type = repo_host_data.host_type || "GitHub" + g.full_name = repo_host_data.full_name if g.full_name.downcase != repo_host_data.full_name.downcase + g.uuid = repo_host_data.repository_uuid if g.uuid.nil? + g.license = repo_host_data.formatted_license if repo_host_data.formatted_license + g.source_name = (repo_host_data.source_name if repo_host_data.source_name.present?) g.status = g.correct_status_from_upstream(archived_upstream: repo_hash[:archived]) - g.assign_attributes repo_hash.slice(*Repository::API_FIELDS) + g.assign_attributes repo_host_data.to_repository_attrs if g.changed? g.save ? g : nil diff --git a/app/models/repository_host/base.rb b/app/models/repository_host/base.rb index 92dead6db..4f7eb4bab 100644 --- a/app/models/repository_host/base.rb +++ b/app/models/repository_host/base.rb @@ -85,22 +85,22 @@ def repository_owner_class end def update_from_host(token = nil) - r = self.class.fetch_repo(repository.id_or_name) - return unless r.present? + repo_data = self.class.fetch_repo(repository.id_or_name) + return unless repo_data.present? - repository.uuid = r[:id] unless repository.uuid.to_s == r[:id].to_s - if repository.full_name.downcase != r[:full_name].downcase - clash = Repository.host(r[:host_type]).where("lower(full_name) = ?", r[:full_name].downcase).first + # repository.uuid = r[:id] unless repository.uuid.to_s == r[:id].to_s + if repository.full_name.downcase != repo_data.full_name.downcase + clash = Repository.host(repo_data.host_type).where("lower(full_name) = ?", repo_data.full_name.downcase).first clash.destroy if clash && (!clash.repository_host.update_from_host(token) || clash.status == "Removed") - repository.full_name = r[:full_name] + repository.full_name = repo_data.full_name end - repository.license = Project.format_license(r[:license][:key]) if r[:license] - repository.source_name = (r[:parent][:full_name] if r[:fork]) + # repository.license = Project.format_license(r[:license][:key]) if r[:license] + # repository.source_name = (r[:parent][:full_name] if r[:fork]) # set unmaintained status for the Repository based on if the repository has been archived upstream # if the Repository already has another status then just leave it alone - repository.status = repository.correct_status_from_upstream(archived_upstream: r[:archived]) - repository.assign_attributes r.slice(*Repository::API_FIELDS) + repository.status = repository.correct_status_from_upstream(archived_upstream: repo_data.archived) + repository.assign_attributes(repo_data.to_repository_attrs.slice(*Repository::API_FIELDS)) repository.save! if repository.changed? rescue self.class.api_missing_error_class repository.update_attribute(:status, "Removed") unless repository.private? diff --git a/app/models/repository_host/bitbucket.rb b/app/models/repository_host/bitbucket.rb index d90ad30d8..75ad46d4c 100644 --- a/app/models/repository_host/bitbucket.rb +++ b/app/models/repository_host/bitbucket.rb @@ -182,27 +182,8 @@ def self.fetch_repo(full_name, token = nil) client = api_client(token) user_name, repo_name = full_name.split("/") project = client.repos.get(user_name, repo_name) - v1_project = client.repos.get(user_name, repo_name, api_version: "1.0") - repo_hash = project.to_hash.with_indifferent_access.slice(:description, :language, :full_name, :name, :has_wiki, :has_issues, :scm) - - repo_hash.merge!({ - id: project.uuid, - host_type: "Bitbucket", - owner: {}, - homepage: project.website, - fork: project.parent.present?, - created_at: project.created_on, - updated_at: project.updated_on, - subscribers_count: v1_project.followers_count, - forks_count: v1_project.forks_count, - default_branch: project.fetch("mainbranch", {}).try(:fetch, "name", nil), - private: project.is_private, - size: project[:size].to_f / 1000, - parent: { - full_name: project.fetch("parent", {}).fetch("full_name", nil), - }, - archived: false, - }) + + BitbucketRepositoryHostDataFactory.generate_from_api(project) rescue *IGNORABLE_EXCEPTIONS nil end diff --git a/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb b/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb new file mode 100644 index 000000000..6bd1786ee --- /dev/null +++ b/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +class RepositoryHost::Bitbucket::BitbucketRepositoryHostDataFactory + def self.generate_from_api(api_project) + input_hash = { + description: api_project.description, + language: api_project.language, + full_name: api_project.full_name, + name: api_project.name, + has_wiki: api_project.has_wiki, + has_issues: api_project.has_issues, + scm: api_project.scm, + repository_uuid: api_project.uuid, + host_type: "Bitbucket", + owner: api_project.owner, + homepage: api_project.website, + fork: api_project.parent.present?, + default_branch: api_project.fetch("mainbranch", {}).try(:fetch, "name", nil), + private: api_project.is_private, + size: api_project[:size].to_f / 1000, + parent: { + full_name: api_project.fetch("parent", {}).try(:fetch, "full_name", nil), + }, + archived: false, + keywords: [], + license: nil, + } + + RepositoryHost::RepositoryHostData.new(**input_hash) + end +end diff --git a/app/models/repository_host/github.rb b/app/models/repository_host/github.rb index 29fa66ab1..a28933d7c 100644 --- a/app/models/repository_host/github.rb +++ b/app/models/repository_host/github.rb @@ -59,10 +59,7 @@ def commits_url(author = nil) def self.fetch_repo(id_or_name, token = nil) id_or_name = id_or_name.to_i if id_or_name.match(/\A\d+\Z/) hash = AuthToken.fallback_client(token).repo(id_or_name, accept: "application/vnd.github.drax-preview+json,application/vnd.github.mercy-preview+json").to_hash - hash[:keywords] = hash[:topics] - hash[:host_type] = "GitHub" - hash[:scm] = "git" - hash + GithubRepositoryHostDataFactory.generate_from_api(hash) rescue *IGNORABLE_EXCEPTIONS nil end diff --git a/app/models/repository_host/github/github_repository_host_data_factory.rb b/app/models/repository_host/github/github_repository_host_data_factory.rb new file mode 100644 index 000000000..0028309d5 --- /dev/null +++ b/app/models/repository_host/github/github_repository_host_data_factory.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +class RepositoryHost::Github::GithubRepositoryHostDataFactory + def self.generate_from_api(upstream_repository_data_hash) + input_hash = { + repository_uuid: upstream_repository_data_hash[:id], + archived: upstream_repository_data_hash[:archived], + default_branch: upstream_repository_data_hash[:default_branch], + description: upstream_repository_data_hash[:description], + fork: upstream_repository_data_hash[:fork], + full_name: upstream_repository_data_hash[:full_name], + has_issues: upstream_repository_data_hash[:has_issues], + has_wiki: upstream_repository_data_hash[:has_wiki], + homepage: upstream_repository_data_hash[:homepage], + host_type: "GitHub", + keywords: upstream_repository_data_hash[:topics], + language: upstream_repository_data_hash[:language], + license: upstream_repository_data_hash.dig(:license, :key), + name: upstream_repository_data_hash[:name], + owner: upstream_repository_data_hash[:owner], + parent: upstream_repository_data_hash[:parent], + private: upstream_repository_data_hash[:private], + scm: "git", + size: upstream_repository_data_hash[:size], + } + + RepositoryHost::RepositoryHostData.new(**input_hash) + end +end diff --git a/app/models/repository_host/gitlab.rb b/app/models/repository_host/gitlab.rb index d8bda4a19..be9c37d63 100644 --- a/app/models/repository_host/gitlab.rb +++ b/app/models/repository_host/gitlab.rb @@ -162,27 +162,9 @@ def self.recursive_gitlab_repos(page_number = 1, limit = 5, order = "created_asc end def self.fetch_repo(full_name, token = nil) - project = api_client(token).project(full_name) - repo_hash = project.to_hash.with_indifferent_access.slice(:id, :description, :created_at, :name, :open_issues_count, :forks_count, :default_branch, :archived) - - repo_hash.merge!({ - host_type: "GitLab", - full_name: project.path_with_namespace, - owner: {}, - fork: project.try(:forked_from_project).present?, - updated_at: project.last_activity_at, - stargazers_count: project.star_count, - has_issues: project.issues_enabled, - has_wiki: project.wiki_enabled, - scm: "git", - private: project.visibility != "public", - pull_requests_enabled: project.merge_requests_enabled, - logo_url: project.avatar_url, - keywords: project.tag_list, - parent: { - full_name: project.try(:forked_from_project).try(:path_with_namespace), - }, - }) + project = api_client(token).project(full_name, { license: true }) + + GitlabRepositoryHostDataFactory.generate_from_api(project) rescue *IGNORABLE_EXCEPTIONS nil end diff --git a/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb b/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb new file mode 100644 index 000000000..7ca9f5362 --- /dev/null +++ b/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +class RepositoryHost::Gitlab::GitlabRepositoryHostDataFactory + def self.generate_from_api(api_project) + repo_hash = { + repository_uuid: api_project.id, + description: api_project.description, + name: api_project.name, + default_branch: api_project.default_branch, + archived: api_project.archived, + host_type: "GitLab", + full_name: api_project.path_with_namespace, + owner: {}, + fork: api_project.try(:forked_from_project).present?, + has_issues: api_project.issues_enabled, + has_wiki: api_project.wiki_enabled, + scm: "git", + private: api_project.visibility != "public", + keywords: api_project.topics, + parent: { + full_name: api_project.try(:forked_from_project).try(:path_with_namespace), + }, + homepage: api_project.web_url, + license: api_project.license.key, + size: 0, # locked to admins only?, + language: nil, # separate API endpoint that doesn't seem to be supported by the API gem we use + } + + RepositoryHost::RepositoryHostData.new(**repo_hash) + end +end diff --git a/app/models/repository_host/repository_host_data.rb b/app/models/repository_host/repository_host_data.rb new file mode 100644 index 000000000..94c0e2dd2 --- /dev/null +++ b/app/models/repository_host/repository_host_data.rb @@ -0,0 +1,89 @@ +# frozen_string_literal: true + +# This class is meant to be a facade over the raw upstream data coming +# from the different repository hosts' repository data. It's main goal +# is to standardize the output from each repository host into a concrete +# set of data so we can make sure the raw data is being mapped to the same +# schema within the Libraries.io models and code. +class RepositoryHost::RepositoryHostData + attr_reader :archived, :default_branch, :description, :fork, :full_name, :has_issues, :has_wiki, :homepage, :host_type, + :keywords, :language, :license, :name, :owner, :parent, :private, :repository_uuid, :scm, :size + + def initialize( + archived:, + default_branch:, + description:, + fork:, + full_name:, + has_issues:, + has_wiki:, + homepage:, + host_type:, + keywords:, + language:, + license:, + name:, + owner:, + parent:, + private:, + repository_uuid:, + scm:, + size:, + **kwargs + ) + @archived = archived + @default_branch = default_branch + @description = description + @fork = fork + @full_name = full_name + @has_issues = has_issues + @has_wiki = has_wiki + @homepage = homepage + @host_type = host_type + @keywords = keywords + @language = language + @license = license + @name = name + @owner = owner + @parent = parent + @private = private + @repository_uuid = repository_uuid + @scm = scm + @size = size + + raise "Unexpected arguments sent: #{kwargs.keys.join(', ')}" unless kwargs.keys.empty? + end + + def to_repository_attrs + attrs = { + default_branch: default_branch, + description: description, + full_name: full_name, + has_issues: has_issues, + has_wiki: has_wiki, + homepage: homepage, + host_type: host_type, + keywords: keywords, + language: language, + license: formatted_license, + name: name, + private: private, + scm: scm, + size: size, + uuid: repository_uuid, + } + attrs[:source_name] = source_name if fork + + attrs + end + + def formatted_license + if license + Project.format_license(license) + end + end + + def source_name + parent[:full_name] if fork + end +end diff --git a/spec/fixtures/vcr_cassettes/bitbucket/node_iostat.yml b/spec/fixtures/vcr_cassettes/bitbucket/node_iostat.yml new file mode 100644 index 000000000..28aa512bb --- /dev/null +++ b/spec/fixtures/vcr_cassettes/bitbucket/node_iostat.yml @@ -0,0 +1,174 @@ +--- +http_interactions: +- request: + method: get + uri: https://api.bitbucket.org/2.0/repositories/codekoala/node-iostat + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - BitBucket Ruby Gem 0.1.7 + X-Datadog-Trace-Id: + - '4557842422794699314' + X-Datadog-Parent-Id: + - '205080452599422352' + X-Datadog-Sampling-Priority: + - '1' + X-Datadog-Tags: + - _dd.p.dm=-0 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 200 + message: OK + headers: + Server: + - envoy + Vary: + - Authorization, Origin, Accept-Encoding + Cache-Control: + - max-age=900 + Content-Type: + - application/json; charset=utf-8 + X-B3-Traceid: + - de1f0bf40868eb74 + X-Usage-Output-Ops: + - '0' + X-Used-Mesh: + - 'False' + X-Dc-Location: + - Micros-3 + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Date: + - Fri, 22 Mar 2024 19:39:32 GMT + X-Request-Id: + - de1f0bf40868eb74 + X-Usage-User-Time: + - '0.045665' + X-Usage-System-Time: + - '0.002523' + X-Served-By: + - cc3f4f04269b + X-Xss-Protection: + - 1; mode=block + Transfer-Encoding: + - chunked + X-Envoy-Upstream-Service-Time: + - '75' + X-View-Name: + - bitbucket.apps.repo2.api.v20.repo.RepositoryHandler + X-B3-Spanid: + - 39ceb53a82d0b2bc + Etag: + - '"5f33a27eb5af6217d32855db5addc5ec"' + X-Static-Version: + - e379ea7f1d4a + X-Content-Type-Options: + - nosniff + X-Accepted-Oauth-Scopes: + - repository + X-Render-Time: + - '0.06647086143493652' + X-Trace-Id: + - de1f0bf40868eb74 + Connection: + - keep-alive + X-Usage-Input-Ops: + - '0' + X-Version: + - e379ea7f1d4a + X-Request-Count: + - '1938' + X-Frame-Options: + - SAMEORIGIN + X-Cache-Info: + - caching + body: + encoding: ASCII-8BIT + string: '{"type": "repository", "full_name": "codekoala/node-iostat", "links": + {"self": {"href": "https://api.bitbucket.org/2.0/repositories/codekoala/node-iostat"}, + "html": {"href": "https://bitbucket.org/codekoala/node-iostat"}, "avatar": + {"href": "https://bytebucket.org/ravatar/%7B219c49b1-9aad-4b14-a697-b672377baebb%7D?ts=js"}, + "pullrequests": {"href": "https://api.bitbucket.org/2.0/repositories/codekoala/node-iostat/pullrequests"}, + "commits": {"href": "https://api.bitbucket.org/2.0/repositories/codekoala/node-iostat/commits"}, + "forks": {"href": "https://api.bitbucket.org/2.0/repositories/codekoala/node-iostat/forks"}, + "watchers": {"href": "https://api.bitbucket.org/2.0/repositories/codekoala/node-iostat/watchers"}, + "branches": {"href": "https://api.bitbucket.org/2.0/repositories/codekoala/node-iostat/refs/branches"}, + "tags": {"href": "https://api.bitbucket.org/2.0/repositories/codekoala/node-iostat/refs/tags"}, + "downloads": {"href": "https://api.bitbucket.org/2.0/repositories/codekoala/node-iostat/downloads"}, + "source": {"href": "https://api.bitbucket.org/2.0/repositories/codekoala/node-iostat/src"}, + "clone": [{"name": "https", "href": "https://bitbucket.org/codekoala/node-iostat.git"}, + {"name": "ssh", "href": "git@bitbucket.org:codekoala/node-iostat.git"}], "issues": + {"href": "https://api.bitbucket.org/2.0/repositories/codekoala/node-iostat/issues"}, + "hooks": {"href": "https://api.bitbucket.org/2.0/repositories/codekoala/node-iostat/hooks"}}, + "name": "node-iostat", "slug": "node-iostat", "description": "Uses Node.js + and flot to produce a \"real-time\" graph of data from iostat.", "scm": "git", + "website": "http://www.codekoala.com/", "owner": {"display_name": "Josh VanderLinden", + "links": {"self": {"href": "https://api.bitbucket.org/2.0/users/%7B3afa667d-3308-4857-986d-fd84e463051c%7D"}, + "avatar": {"href": "https://secure.gravatar.com/avatar/dbfb3f4586ef3e9695911dd55c63da42?d=https%3A%2F%2Favatar-management--avatars.us-west-2.prod.public.atl-paas.net%2Finitials%2FJV-0.png"}, + "html": {"href": "https://bitbucket.org/%7B3afa667d-3308-4857-986d-fd84e463051c%7D/"}}, + "type": "user", "uuid": "{3afa667d-3308-4857-986d-fd84e463051c}", "account_id": + "557058:d443575b-8557-44c0-8542-f57d6f33c0ac", "nickname": "codekoala"}, "workspace": + {"type": "workspace", "uuid": "{3afa667d-3308-4857-986d-fd84e463051c}", "name": + "Josh VanderLinden", "slug": "codekoala", "links": {"avatar": {"href": "https://bitbucket.org/workspaces/codekoala/avatar/?ts=1543457468"}, + "html": {"href": "https://bitbucket.org/codekoala/"}, "self": {"href": "https://api.bitbucket.org/2.0/workspaces/codekoala"}}}, + "is_private": false, "project": {"type": "project", "key": "PROJ", "uuid": + "{66910831-6b20-41b2-9c5c-bed0000e9fff}", "name": "Untitled project", "links": + {"self": {"href": "https://api.bitbucket.org/2.0/workspaces/codekoala/projects/PROJ"}, + "html": {"href": "https://bitbucket.org/codekoala/workspace/projects/PROJ"}, + "avatar": {"href": "https://bitbucket.org/account/user/codekoala/projects/PROJ/avatar/32?ts=1543457468"}}}, + "fork_policy": "allow_forks", "created_on": "2012-02-08T20:33:39.336628+00:00", + "updated_on": "2012-02-12T07:02:41.642077+00:00", "size": 118925, "language": + "javascript", "uuid": "{219c49b1-9aad-4b14-a697-b672377baebb}", "mainbranch": + {"name": "master", "type": "branch"}, "override_settings": {"default_merge_strategy": + false, "branching_model": false}, "parent": null, "has_issues": true, "has_wiki": + false}' + recorded_at: Fri, 22 Mar 2024 19:39:32 GMT +- request: + method: get + uri: https://api.bitbucket.org/1.0/repositories/codekoala/node-iostat?api_version=1.0 + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - BitBucket Ruby Gem 0.1.7 + X-Datadog-Trace-Id: + - '1626514665168621202' + X-Datadog-Parent-Id: + - '1804328855498564914' + X-Datadog-Sampling-Priority: + - '1' + X-Datadog-Tags: + - _dd.p.dm=-0 + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - "*/*" + response: + status: + code: 410 + message: Gone + headers: + Vary: + - User-Agent + Content-Type: + - application/json; charset=utf-8 + Date: + - Fri, 22 Mar 2024 19:39:32 GMT + Connection: + - Keep-Alive + Content-Length: + - '262' + body: + encoding: UTF-8 + string: '{"type": "error", "error": {"message": "Resource removed", "detail": + "This API is no longer supported.\n\nFor information about its removal, please + refer to the deprecation notice at: https://developer.atlassian.com/cloud/bitbucket/deprecation-notice-v1-apis/"}}' + recorded_at: Fri, 22 Mar 2024 19:39:32 GMT +recorded_with: VCR 6.2.0 diff --git a/spec/fixtures/vcr_cassettes/gitlab/ase.yml b/spec/fixtures/vcr_cassettes/gitlab/ase.yml new file mode 100644 index 000000000..7ee8f4749 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/gitlab/ase.yml @@ -0,0 +1,84 @@ +--- +http_interactions: +- request: + method: get + uri: https://gitlab.com/api/v4/projects/ase%2Fase?license=true + body: + encoding: US-ASCII + string: '' + headers: + Accept: + - application/json + Content-Type: + - application/x-www-form-urlencoded + User-Agent: + - Gitlab Ruby Gem 4.19.0 + Authorization: + - Bearer TEST_TOKEN + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + response: + status: + code: 200 + message: OK + headers: + Date: + - Fri, 22 Mar 2024 21:18:35 GMT + Content-Type: + - application/json + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Cache-Control: + - max-age=0, private, must-revalidate + Content-Security-Policy: + - default-src 'none' + Etag: + - W/"3eac09171bbe56185f17b20bee622a9a" + Vary: + - Origin, Accept-Encoding + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gitlab-Meta: + - '{"correlation_id":"c33be4ad4eb7e9d2be1e0ffc20797381","version":"1"}' + X-Request-Id: + - c33be4ad4eb7e9d2be1e0ffc20797381 + X-Runtime: + - '0.118458' + Strict-Transport-Security: + - max-age=31536000 + Referrer-Policy: + - strict-origin-when-cross-origin + Gitlab-Lb: + - haproxy-main-02-lb-gprd + Gitlab-Sv: + - gke-cny-api + Cf-Cache-Status: + - MISS + Report-To: + - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v4?s=%2F1UX%2BNiITUQW5GD6LHY2HJIb0eHicN3shpwWvhieKC%2BGZVWRAZ2ZS1pdaSU0REzRaDZNnwTjk8DZHSmG3jpScFXZpxVJ%2FmI03vji23Rf0F6h8dV%2FcvcOzYYrfrw%3D"}],"group":"cf-nel","max_age":604800}' + Nel: + - '{"success_fraction":0.01,"report_to":"cf-nel","max_age":604800}' + Set-Cookie: + - _cfuvid=QtJ9pS1HHDJr0YQaHysa2u6qay8jfEoThC5tm_4bJmY-1711142315021-0.0.1.1-604800000; + path=/; domain=.gitlab.com; HttpOnly; Secure; SameSite=None + Server: + - cloudflare + Cf-Ray: + - 8689370bc8418f79-BOS + body: + encoding: ASCII-8BIT + string: '{"id":470007,"description":"[Atomic Simulation Environment](https://wiki.fysik.dtu.dk/ase/): + A Python library for working with atoms","name":"ase","name_with_namespace":"ase + / ase","path":"ase","path_with_namespace":"ase/ase","created_at":"2015-09-17T12:32:33.814Z","default_branch":"master","tag_list":["Atomistic + simulations","chemistry","materials","physics"],"topics":["Atomistic simulations","chemistry","materials","physics"],"ssh_url_to_repo":"git@gitlab.com:ase/ase.git","http_url_to_repo":"https://gitlab.com/ase/ase.git","web_url":"https://gitlab.com/ase/ase","readme_url":"https://gitlab.com/ase/ase/-/blob/master/README.rst","forks_count":770,"license_url":"https://gitlab.com/ase/ase/-/blob/master/COPYING","license":{"key":"lgpl-2.1","name":"GNU + Lesser General Public License v2.1 only","nickname":"GNU LGPLv2.1","html_url":"https://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html","source_url":null},"avatar_url":"https://gitlab.com/uploads/-/system/project/avatar/470007/ase256.png","star_count":421,"last_activity_at":"2024-03-21T22:39:06.389Z","namespace":{"id":293845,"name":"ase","path":"ase","kind":"group","full_path":"ase","parent_id":null,"avatar_url":"/uploads/-/system/group/avatar/293845/users.png","web_url":"https://gitlab.com/groups/ase"},"container_registry_image_prefix":"registry.gitlab.com/ase/ase","_links":{"self":"https://gitlab.com/api/v4/projects/470007","issues":"https://gitlab.com/api/v4/projects/470007/issues","merge_requests":"https://gitlab.com/api/v4/projects/470007/merge_requests","repo_branches":"https://gitlab.com/api/v4/projects/470007/repository/branches","labels":"https://gitlab.com/api/v4/projects/470007/labels","events":"https://gitlab.com/api/v4/projects/470007/events","members":"https://gitlab.com/api/v4/projects/470007/members","cluster_agents":"https://gitlab.com/api/v4/projects/470007/cluster_agents"},"packages_enabled":true,"empty_repo":false,"archived":false,"visibility":"public","resolve_outdated_diff_discussions":false,"repository_object_format":"sha1","issues_enabled":true,"merge_requests_enabled":true,"wiki_enabled":false,"jobs_enabled":true,"snippets_enabled":false,"container_registry_enabled":true,"service_desk_enabled":null,"can_create_merge_request_in":true,"issues_access_level":"enabled","repository_access_level":"enabled","merge_requests_access_level":"enabled","forking_access_level":"enabled","wiki_access_level":"disabled","builds_access_level":"enabled","snippets_access_level":"disabled","pages_access_level":"enabled","analytics_access_level":"enabled","container_registry_access_level":"enabled","security_and_compliance_access_level":"private","releases_access_level":"enabled","environments_access_level":"enabled","feature_flags_access_level":"enabled","infrastructure_access_level":"enabled","monitor_access_level":"enabled","model_experiments_access_level":"enabled","model_registry_access_level":"enabled","emails_disabled":false,"emails_enabled":true,"shared_runners_enabled":true,"lfs_enabled":true,"creator_id":71312,"import_status":"none","open_issues_count":515,"description_html":"\u003cp + data-sourcepos=\"1:1-1:104\" dir=\"auto\"\u003e\u003ca data-sourcepos=\"1:1-1:63\" + href=\"https://wiki.fysik.dtu.dk/ase/\" rel=\"nofollow noreferrer noopener\" + target=\"_blank\"\u003eAtomic Simulation Environment\u003c/a\u003e: A Python + library for working with atoms\u003c/p\u003e","updated_at":"2024-03-21T22:39:06.389Z","ci_config_path":"","public_jobs":true,"shared_with_groups":[],"only_allow_merge_if_pipeline_succeeds":false,"allow_merge_on_skipped_pipeline":null,"request_access_enabled":true,"only_allow_merge_if_all_discussions_are_resolved":false,"remove_source_branch_after_merge":true,"printing_merge_request_link_enabled":true,"merge_method":"merge","squash_option":"default_off","enforce_auth_checks_on_uploads":true,"suggestion_commit_message":null,"merge_commit_template":null,"squash_commit_template":null,"issue_branch_template":null,"warn_about_potentially_unwanted_characters":true,"autoclose_referenced_issues":true,"approvals_before_merge":0,"mirror":false,"external_authorization_classification_label":"","marked_for_deletion_at":null,"marked_for_deletion_on":null,"requirements_enabled":true,"requirements_access_level":"enabled","security_and_compliance_enabled":false,"compliance_frameworks":[],"issues_template":"","merge_requests_template":"","merge_pipelines_enabled":false,"merge_trains_enabled":false,"merge_trains_skip_train_allowed":false,"only_allow_merge_if_all_status_checks_passed":false,"allow_pipeline_trigger_approve_deployment":false,"prevent_merge_without_jira_issue":false,"permissions":{"project_access":null,"group_access":null}}' + recorded_at: Fri, 22 Mar 2024 21:18:34 GMT +recorded_with: VCR 6.2.0 diff --git a/spec/fixtures/vcr_cassettes/google-maven/memory-advice.yml b/spec/fixtures/vcr_cassettes/google-maven/memory-advice.yml index b419f215b..7029f57f8 100644 --- a/spec/fixtures/vcr_cassettes/google-maven/memory-advice.yml +++ b/spec/fixtures/vcr_cassettes/google-maven/memory-advice.yml @@ -355,4 +355,120 @@ http_interactions: recorded_at: Tue, 14 Nov 2023 20:26:52 GMT +- request: + method: get + uri: https://dl.google.com/dl/android/maven2/com/google/android/games/memory-advice/0.22/memory-advice-0.22.pom + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Faraday v0.17.6 + Accept-Encoding: + - gzip,deflate,br + X-Datadog-Trace-Id: + - '2463807712154666496' + X-Datadog-Parent-Id: + - '526226468481262336' + X-Datadog-Sampling-Priority: + - '1' + X-Datadog-Tags: + - _dd.p.dm=-0 + Expect: + - '' + response: + status: + code: 200 + message: '' + headers: + Accept-Ranges: + - bytes + Cache-Control: + - public,max-age=86400 + Content-Disposition: + - attachment + Content-Length: + - '2286' + Content-Security-Policy: + - default-src 'none' + Content-Type: + - application/octet-stream + Etag: + - '"aaf84f"' + Last-Modified: + - Mon, 06 Sep 2021 15:18:46 GMT + Server: + - downloads + Vary: + - Origin + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Xss-Protection: + - '0' + Date: + - Fri, 22 Mar 2024 19:57:27 GMT + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + body: + encoding: ASCII-8BIT + string: | + + + + + + + + 4.0.0 + com.google.android.games + memory-advice + 0.22 + aar + The Android Memory Assistance API + An experimental library to help applications avoid exceeding safe limits of memory use on devices. + https://android.googlesource.com/platform/frameworks/opt/gamesdk/+/refs/heads/master/test/memoryadvice + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + com.google.android.games + memory-advice-common + 0.22 + runtime + + + androidx.lifecycle + lifecycle-extensions + 2.2.0 + runtime + + + androidx.multidex + multidex + 2.0.1 + runtime + + + com.fasterxml.jackson.core + jackson-databind + 2.12.2 + runtime + + + org.tensorflow + tensorflow-lite + 2.4.0 + runtime + + + + recorded_at: Fri, 22 Mar 2024 19:57:27 GMT recorded_with: VCR 6.2.0 diff --git a/spec/models/repository_host/bitbucket_spec.rb b/spec/models/repository_host/bitbucket_spec.rb new file mode 100644 index 000000000..ad76d8270 --- /dev/null +++ b/spec/models/repository_host/bitbucket_spec.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +require "rails_helper" + +describe RepositoryHost::Bitbucket do + let(:repository) { build(:repository, host_type: "Bitbucket", full_name: "codekoala/node-iostat") } + let(:repository_host) { described_class.new(repository) } + let(:api_token) { "TEST_TOKEN" } + + it "can fetch repository data" do + VCR.use_cassette("bitbucket/node_iostat") do + repository_data = repository_host.class.fetch_repo(repository.id_or_name, api_token) + expect(repository_data).not_to be_nil + end + end +end diff --git a/spec/models/repository_host/gitlab_spec.rb b/spec/models/repository_host/gitlab_spec.rb index feb03f1e0..c58caf08f 100644 --- a/spec/models/repository_host/gitlab_spec.rb +++ b/spec/models/repository_host/gitlab_spec.rb @@ -3,6 +3,14 @@ require "rails_helper" describe RepositoryHost::Gitlab do - let(:repository) { build(:repository, host_type: "GitLab") } + let(:repository) { build(:repository, host_type: "GitLab", full_name: "ase/ase") } let(:repository_host) { described_class.new(repository) } + let(:api_token) { "TEST_TOKEN" } + + it "can fetch repository data" do + VCR.use_cassette("gitlab/ase") do + repository_data = described_class.fetch_repo(repository.id_or_name, api_token) + expect(repository_data).not_to be_nil + end + end end From 9cfed08009e90498cb94c459df4b479dc959ed18 Mon Sep 17 00:00:00 2001 From: Mike Young Date: Mon, 25 Mar 2024 11:02:00 -0400 Subject: [PATCH 02/12] don't use private as var name --- .../bitbucket/bitbucket_repository_host_data_factory.rb | 2 +- .../github/github_repository_host_data_factory.rb | 2 +- .../gitlab/gitlab_repository_host_data_factory.rb | 2 +- app/models/repository_host/repository_host_data.rb | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb b/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb index 6bd1786ee..9e9d22191 100644 --- a/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb +++ b/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb @@ -16,7 +16,7 @@ def self.generate_from_api(api_project) homepage: api_project.website, fork: api_project.parent.present?, default_branch: api_project.fetch("mainbranch", {}).try(:fetch, "name", nil), - private: api_project.is_private, + is_private: api_project.is_private, size: api_project[:size].to_f / 1000, parent: { full_name: api_project.fetch("parent", {}).try(:fetch, "full_name", nil), diff --git a/app/models/repository_host/github/github_repository_host_data_factory.rb b/app/models/repository_host/github/github_repository_host_data_factory.rb index 0028309d5..5b54e4a23 100644 --- a/app/models/repository_host/github/github_repository_host_data_factory.rb +++ b/app/models/repository_host/github/github_repository_host_data_factory.rb @@ -19,7 +19,7 @@ def self.generate_from_api(upstream_repository_data_hash) name: upstream_repository_data_hash[:name], owner: upstream_repository_data_hash[:owner], parent: upstream_repository_data_hash[:parent], - private: upstream_repository_data_hash[:private], + is_private: upstream_repository_data_hash[:private], scm: "git", size: upstream_repository_data_hash[:size], } diff --git a/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb b/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb index 7ca9f5362..ef74889a6 100644 --- a/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb +++ b/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb @@ -15,7 +15,7 @@ def self.generate_from_api(api_project) has_issues: api_project.issues_enabled, has_wiki: api_project.wiki_enabled, scm: "git", - private: api_project.visibility != "public", + is_private: api_project.visibility != "public", keywords: api_project.topics, parent: { full_name: api_project.try(:forked_from_project).try(:path_with_namespace), diff --git a/app/models/repository_host/repository_host_data.rb b/app/models/repository_host/repository_host_data.rb index 94c0e2dd2..ba0e8ae10 100644 --- a/app/models/repository_host/repository_host_data.rb +++ b/app/models/repository_host/repository_host_data.rb @@ -7,7 +7,7 @@ # schema within the Libraries.io models and code. class RepositoryHost::RepositoryHostData attr_reader :archived, :default_branch, :description, :fork, :full_name, :has_issues, :has_wiki, :homepage, :host_type, - :keywords, :language, :license, :name, :owner, :parent, :private, :repository_uuid, :scm, :size + :keywords, :language, :license, :name, :owner, :parent, :is_private, :repository_uuid, :scm, :size def initialize( archived:, @@ -25,7 +25,7 @@ def initialize( name:, owner:, parent:, - private:, + is_private:, repository_uuid:, scm:, size:, @@ -46,7 +46,7 @@ def initialize( @name = name @owner = owner @parent = parent - @private = private + @private = is_private @repository_uuid = repository_uuid @scm = scm @size = size @@ -67,7 +67,7 @@ def to_repository_attrs language: language, license: formatted_license, name: name, - private: private, + private: is_private, scm: scm, size: size, uuid: repository_uuid, From 5d4dedcd3c920922441889d9f453974343571b79 Mon Sep 17 00:00:00 2001 From: Mike Young Date: Mon, 25 Mar 2024 11:09:41 -0400 Subject: [PATCH 03/12] remove some commented code --- app/models/repository_host/base.rb | 3 --- 1 file changed, 3 deletions(-) diff --git a/app/models/repository_host/base.rb b/app/models/repository_host/base.rb index 4f7eb4bab..da6a63a89 100644 --- a/app/models/repository_host/base.rb +++ b/app/models/repository_host/base.rb @@ -88,14 +88,11 @@ def update_from_host(token = nil) repo_data = self.class.fetch_repo(repository.id_or_name) return unless repo_data.present? - # repository.uuid = r[:id] unless repository.uuid.to_s == r[:id].to_s if repository.full_name.downcase != repo_data.full_name.downcase clash = Repository.host(repo_data.host_type).where("lower(full_name) = ?", repo_data.full_name.downcase).first clash.destroy if clash && (!clash.repository_host.update_from_host(token) || clash.status == "Removed") repository.full_name = repo_data.full_name end - # repository.license = Project.format_license(r[:license][:key]) if r[:license] - # repository.source_name = (r[:parent][:full_name] if r[:fork]) # set unmaintained status for the Repository based on if the repository has been archived upstream # if the Repository already has another status then just leave it alone From 71337edc13dba4da41442938e68815c6ecaf14e5 Mon Sep 17 00:00:00 2001 From: Mike Young Date: Mon, 25 Mar 2024 11:22:08 -0400 Subject: [PATCH 04/12] add spec for github api call --- spec/models/repository_host/github_spec.rb | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 spec/models/repository_host/github_spec.rb diff --git a/spec/models/repository_host/github_spec.rb b/spec/models/repository_host/github_spec.rb new file mode 100644 index 000000000..cee2bda44 --- /dev/null +++ b/spec/models/repository_host/github_spec.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +require "rails_helper" + +describe RepositoryHost::Github do + let(:repository) { build(:repository, host_type: "GitLab", full_name: "vuejs/vue") } + let(:repository_host) { described_class.new(repository) } + let(:api_token) { "TEST_TOKEN" } + + it "can fetch repository data" do + VCR.insert_cassette("github/vue") do + repository_data = described_class.fetch_repo(repository.id_or_name, api_token) + expect(repository_data).not_to be_nil + end + end +end From 6c312c2f451fcd21e1d601a8f86e4633f6ed4237 Mon Sep 17 00:00:00 2001 From: Mike Young Date: Wed, 27 Mar 2024 11:07:21 -0400 Subject: [PATCH 05/12] change to struct and some fixes --- app/models/repository.rb | 2 +- .../bitbucket_repository_host_data_factory.rb | 2 +- .../github_repository_host_data_factory.rb | 2 +- .../gitlab_repository_host_data_factory.rb | 2 +- .../repository_host/repository_host_data.rb | 56 ++----------------- spec/models/repository_host/github_spec.rb | 2 +- 6 files changed, 11 insertions(+), 55 deletions(-) diff --git a/app/models/repository.rb b/app/models/repository.rb index 36ad8742b..058d530b1 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -323,7 +323,7 @@ def self.create_from_hash(repo_host_data) g.license = repo_host_data.formatted_license if repo_host_data.formatted_license g.source_name = (repo_host_data.source_name if repo_host_data.source_name.present?) - g.status = g.correct_status_from_upstream(archived_upstream: repo_hash[:archived]) + g.status = g.correct_status_from_upstream(archived_upstream: repo_host_data[:archived]) g.assign_attributes repo_host_data.to_repository_attrs if g.changed? diff --git a/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb b/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb index 9e9d22191..5e0d3a170 100644 --- a/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb +++ b/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb @@ -17,7 +17,7 @@ def self.generate_from_api(api_project) fork: api_project.parent.present?, default_branch: api_project.fetch("mainbranch", {}).try(:fetch, "name", nil), is_private: api_project.is_private, - size: api_project[:size].to_f / 1000, + repository_size: api_project[:size].to_f / 1000, parent: { full_name: api_project.fetch("parent", {}).try(:fetch, "full_name", nil), }, diff --git a/app/models/repository_host/github/github_repository_host_data_factory.rb b/app/models/repository_host/github/github_repository_host_data_factory.rb index 5b54e4a23..d757e0741 100644 --- a/app/models/repository_host/github/github_repository_host_data_factory.rb +++ b/app/models/repository_host/github/github_repository_host_data_factory.rb @@ -21,7 +21,7 @@ def self.generate_from_api(upstream_repository_data_hash) parent: upstream_repository_data_hash[:parent], is_private: upstream_repository_data_hash[:private], scm: "git", - size: upstream_repository_data_hash[:size], + repository_size: upstream_repository_data_hash[:size], } RepositoryHost::RepositoryHostData.new(**input_hash) diff --git a/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb b/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb index ef74889a6..957683cef 100644 --- a/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb +++ b/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb @@ -22,7 +22,7 @@ def self.generate_from_api(api_project) }, homepage: api_project.web_url, license: api_project.license.key, - size: 0, # locked to admins only?, + repository_size: 0, # locked to admins only?, language: nil, # separate API endpoint that doesn't seem to be supported by the API gem we use } diff --git a/app/models/repository_host/repository_host_data.rb b/app/models/repository_host/repository_host_data.rb index ba0e8ae10..9bbbba322 100644 --- a/app/models/repository_host/repository_host_data.rb +++ b/app/models/repository_host/repository_host_data.rb @@ -5,55 +5,11 @@ # is to standardize the output from each repository host into a concrete # set of data so we can make sure the raw data is being mapped to the same # schema within the Libraries.io models and code. -class RepositoryHost::RepositoryHostData - attr_reader :archived, :default_branch, :description, :fork, :full_name, :has_issues, :has_wiki, :homepage, :host_type, - :keywords, :language, :license, :name, :owner, :parent, :is_private, :repository_uuid, :scm, :size - - def initialize( - archived:, - default_branch:, - description:, - fork:, - full_name:, - has_issues:, - has_wiki:, - homepage:, - host_type:, - keywords:, - language:, - license:, - name:, - owner:, - parent:, - is_private:, - repository_uuid:, - scm:, - size:, - **kwargs - ) - @archived = archived - @default_branch = default_branch - @description = description - @fork = fork - @full_name = full_name - @has_issues = has_issues - @has_wiki = has_wiki - @homepage = homepage - @host_type = host_type - @keywords = keywords - @language = language - @license = license - @name = name - @owner = owner - @parent = parent - @private = is_private - @repository_uuid = repository_uuid - @scm = scm - @size = size - - raise "Unexpected arguments sent: #{kwargs.keys.join(', ')}" unless kwargs.keys.empty? - end - +RepositoryHost::RepositoryHostData = Struct.new( + :archived, :default_branch, :description, :fork, :full_name, :has_issues, :has_wiki, :homepage, :host_type, + :keywords, :language, :license, :name, :owner, :parent, :is_private, :repository_uuid, :scm, :repository_size, + keyword_init: true +) do def to_repository_attrs attrs = { default_branch: default_branch, @@ -69,7 +25,7 @@ def to_repository_attrs name: name, private: is_private, scm: scm, - size: size, + size: repository_size, uuid: repository_uuid, } attrs[:source_name] = source_name if fork diff --git a/spec/models/repository_host/github_spec.rb b/spec/models/repository_host/github_spec.rb index cee2bda44..620f9c309 100644 --- a/spec/models/repository_host/github_spec.rb +++ b/spec/models/repository_host/github_spec.rb @@ -8,7 +8,7 @@ let(:api_token) { "TEST_TOKEN" } it "can fetch repository data" do - VCR.insert_cassette("github/vue") do + VCR.use_cassette("github/vue") do repository_data = described_class.fetch_repo(repository.id_or_name, api_token) expect(repository_data).not_to be_nil end From b58405dbc31e3e592e7a41f220733ad0605dbc4a Mon Sep 17 00:00:00 2001 From: Mike Young Date: Wed, 27 Mar 2024 11:12:43 -0400 Subject: [PATCH 06/12] rename to create_from_data --- app/models/concerns/github_identity.rb | 4 ++-- app/models/repository.rb | 2 +- app/models/repository_host/base.rb | 2 +- app/models/repository_host/github.rb | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/models/concerns/github_identity.rb b/app/models/concerns/github_identity.rb index 59fe0e9de..114e431f4 100644 --- a/app/models/concerns/github_identity.rb +++ b/app/models/concerns/github_identity.rb @@ -43,7 +43,7 @@ def update_repo_permissions update_column(:currently_syncing, true) download_orgs - r = github_client.repos + r = github_client.repos.map { |repo_data| GithubRepositoryHostDataFactory.generate_from_api(repo_data) } current_repo_ids = [] @@ -53,7 +53,7 @@ def update_repo_permissions r.each do |repo| unless (github_repo = existing_repos.find { |re| re.uuid.to_s == repo.id.to_s }) - github_repo = Repository.host("GitHub").find_by("lower(full_name) = ?", repo.full_name.downcase) || Repository.create_from_hash(repo) + github_repo = Repository.host("GitHub").find_by("lower(full_name) = ?", repo.full_name.downcase) || Repository.create_from_data(repo) end next if github_repo.nil? diff --git a/app/models/repository.rb b/app/models/repository.rb index 058d530b1..2ce8c705d 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -310,7 +310,7 @@ def self.create_from_host(host_type, full_name, token = nil) RepositoryHost.const_get(host_type.capitalize).create(full_name, token) end - def self.create_from_hash(repo_host_data) + def self.create_from_data(repo_host_data) return unless repo_host_data ActiveRecord::Base.transaction do diff --git a/app/models/repository_host/base.rb b/app/models/repository_host/base.rb index da6a63a89..83562164f 100644 --- a/app/models/repository_host/base.rb +++ b/app/models/repository_host/base.rb @@ -7,7 +7,7 @@ def initialize(repository) end def self.create(full_name, token = nil) - Repository.create_from_hash(fetch_repo(full_name, token)) + Repository.create_from_data(fetch_repo(full_name, token)) rescue *self::IGNORABLE_EXCEPTIONS nil end diff --git a/app/models/repository_host/github.rb b/app/models/repository_host/github.rb index a28933d7c..9b7e067cf 100644 --- a/app/models/repository_host/github.rb +++ b/app/models/repository_host/github.rb @@ -151,7 +151,7 @@ def download_forks(token = nil) return true if repository.forks_count == repository.forked_repositories.host(repository.host_type).count AuthToken.new_client(token).forks(repository.full_name).each do |fork| - Repository.create_from_hash(fork) + Repository.create_from_data(GithubRepositoryHostDataFactory.generate_from_api(fork)) end end From 6218ca59afb78147b4565da26ba2755495d4d163 Mon Sep 17 00:00:00 2001 From: Mike Young Date: Wed, 27 Mar 2024 11:17:21 -0400 Subject: [PATCH 07/12] rename to RawUpstreamData --- .../bitbucket/bitbucket_repository_host_data_factory.rb | 2 +- .../github/github_repository_host_data_factory.rb | 2 +- .../gitlab/gitlab_repository_host_data_factory.rb | 2 +- .../{repository_host_data.rb => raw_upstream_data.rb} | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) rename app/models/repository_host/{repository_host_data.rb => raw_upstream_data.rb} (96%) diff --git a/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb b/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb index 5e0d3a170..1c1665f98 100644 --- a/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb +++ b/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb @@ -26,6 +26,6 @@ def self.generate_from_api(api_project) license: nil, } - RepositoryHost::RepositoryHostData.new(**input_hash) + RepositoryHost::RawUpstreamData.new(**input_hash) end end diff --git a/app/models/repository_host/github/github_repository_host_data_factory.rb b/app/models/repository_host/github/github_repository_host_data_factory.rb index d757e0741..50fead595 100644 --- a/app/models/repository_host/github/github_repository_host_data_factory.rb +++ b/app/models/repository_host/github/github_repository_host_data_factory.rb @@ -24,6 +24,6 @@ def self.generate_from_api(upstream_repository_data_hash) repository_size: upstream_repository_data_hash[:size], } - RepositoryHost::RepositoryHostData.new(**input_hash) + RepositoryHost::RawUpstreamData.new(**input_hash) end end diff --git a/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb b/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb index 957683cef..5181340e0 100644 --- a/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb +++ b/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb @@ -26,6 +26,6 @@ def self.generate_from_api(api_project) language: nil, # separate API endpoint that doesn't seem to be supported by the API gem we use } - RepositoryHost::RepositoryHostData.new(**repo_hash) + RepositoryHost::RawUpstreamData.new(**repo_hash) end end diff --git a/app/models/repository_host/repository_host_data.rb b/app/models/repository_host/raw_upstream_data.rb similarity index 96% rename from app/models/repository_host/repository_host_data.rb rename to app/models/repository_host/raw_upstream_data.rb index 9bbbba322..0cf1a9c2d 100644 --- a/app/models/repository_host/repository_host_data.rb +++ b/app/models/repository_host/raw_upstream_data.rb @@ -5,7 +5,7 @@ # is to standardize the output from each repository host into a concrete # set of data so we can make sure the raw data is being mapped to the same # schema within the Libraries.io models and code. -RepositoryHost::RepositoryHostData = Struct.new( +RepositoryHost::RawUpstreamData = Struct.new( :archived, :default_branch, :description, :fork, :full_name, :has_issues, :has_wiki, :homepage, :host_type, :keywords, :language, :license, :name, :owner, :parent, :is_private, :repository_uuid, :scm, :repository_size, keyword_init: true From 5f85a015837df5237fb234d7304d3cbb5848e6dc Mon Sep 17 00:00:00 2001 From: Mike Young Date: Wed, 27 Mar 2024 11:26:33 -0400 Subject: [PATCH 08/12] move conversions into single factory --- app/models/repository_host/bitbucket.rb | 2 +- .../bitbucket_repository_host_data_factory.rb | 31 -------- app/models/repository_host/github.rb | 2 +- .../github_repository_host_data_factory.rb | 29 ------- app/models/repository_host/gitlab.rb | 2 +- .../gitlab_repository_host_data_factory.rb | 31 -------- .../raw_upstream_data_converter.rb | 79 +++++++++++++++++++ 7 files changed, 82 insertions(+), 94 deletions(-) delete mode 100644 app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb delete mode 100644 app/models/repository_host/github/github_repository_host_data_factory.rb delete mode 100644 app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb create mode 100644 app/models/repository_host/raw_upstream_data_converter.rb diff --git a/app/models/repository_host/bitbucket.rb b/app/models/repository_host/bitbucket.rb index 75ad46d4c..4a66d8fa5 100644 --- a/app/models/repository_host/bitbucket.rb +++ b/app/models/repository_host/bitbucket.rb @@ -183,7 +183,7 @@ def self.fetch_repo(full_name, token = nil) user_name, repo_name = full_name.split("/") project = client.repos.get(user_name, repo_name) - BitbucketRepositoryHostDataFactory.generate_from_api(project) + RawUpstreamDataConverter.convert_from_bitbucket_api(project) rescue *IGNORABLE_EXCEPTIONS nil end diff --git a/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb b/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb deleted file mode 100644 index 1c1665f98..000000000 --- a/app/models/repository_host/bitbucket/bitbucket_repository_host_data_factory.rb +++ /dev/null @@ -1,31 +0,0 @@ -# frozen_string_literal: true - -class RepositoryHost::Bitbucket::BitbucketRepositoryHostDataFactory - def self.generate_from_api(api_project) - input_hash = { - description: api_project.description, - language: api_project.language, - full_name: api_project.full_name, - name: api_project.name, - has_wiki: api_project.has_wiki, - has_issues: api_project.has_issues, - scm: api_project.scm, - repository_uuid: api_project.uuid, - host_type: "Bitbucket", - owner: api_project.owner, - homepage: api_project.website, - fork: api_project.parent.present?, - default_branch: api_project.fetch("mainbranch", {}).try(:fetch, "name", nil), - is_private: api_project.is_private, - repository_size: api_project[:size].to_f / 1000, - parent: { - full_name: api_project.fetch("parent", {}).try(:fetch, "full_name", nil), - }, - archived: false, - keywords: [], - license: nil, - } - - RepositoryHost::RawUpstreamData.new(**input_hash) - end -end diff --git a/app/models/repository_host/github.rb b/app/models/repository_host/github.rb index 9b7e067cf..cde5734cf 100644 --- a/app/models/repository_host/github.rb +++ b/app/models/repository_host/github.rb @@ -59,7 +59,7 @@ def commits_url(author = nil) def self.fetch_repo(id_or_name, token = nil) id_or_name = id_or_name.to_i if id_or_name.match(/\A\d+\Z/) hash = AuthToken.fallback_client(token).repo(id_or_name, accept: "application/vnd.github.drax-preview+json,application/vnd.github.mercy-preview+json").to_hash - GithubRepositoryHostDataFactory.generate_from_api(hash) + RawUpstreamDataConverter.convert_from_github_api(hash) rescue *IGNORABLE_EXCEPTIONS nil end diff --git a/app/models/repository_host/github/github_repository_host_data_factory.rb b/app/models/repository_host/github/github_repository_host_data_factory.rb deleted file mode 100644 index 50fead595..000000000 --- a/app/models/repository_host/github/github_repository_host_data_factory.rb +++ /dev/null @@ -1,29 +0,0 @@ -# frozen_string_literal: true - -class RepositoryHost::Github::GithubRepositoryHostDataFactory - def self.generate_from_api(upstream_repository_data_hash) - input_hash = { - repository_uuid: upstream_repository_data_hash[:id], - archived: upstream_repository_data_hash[:archived], - default_branch: upstream_repository_data_hash[:default_branch], - description: upstream_repository_data_hash[:description], - fork: upstream_repository_data_hash[:fork], - full_name: upstream_repository_data_hash[:full_name], - has_issues: upstream_repository_data_hash[:has_issues], - has_wiki: upstream_repository_data_hash[:has_wiki], - homepage: upstream_repository_data_hash[:homepage], - host_type: "GitHub", - keywords: upstream_repository_data_hash[:topics], - language: upstream_repository_data_hash[:language], - license: upstream_repository_data_hash.dig(:license, :key), - name: upstream_repository_data_hash[:name], - owner: upstream_repository_data_hash[:owner], - parent: upstream_repository_data_hash[:parent], - is_private: upstream_repository_data_hash[:private], - scm: "git", - repository_size: upstream_repository_data_hash[:size], - } - - RepositoryHost::RawUpstreamData.new(**input_hash) - end -end diff --git a/app/models/repository_host/gitlab.rb b/app/models/repository_host/gitlab.rb index be9c37d63..69270023a 100644 --- a/app/models/repository_host/gitlab.rb +++ b/app/models/repository_host/gitlab.rb @@ -164,7 +164,7 @@ def self.recursive_gitlab_repos(page_number = 1, limit = 5, order = "created_asc def self.fetch_repo(full_name, token = nil) project = api_client(token).project(full_name, { license: true }) - GitlabRepositoryHostDataFactory.generate_from_api(project) + RawUpstreamDataConverter.convert_from_gitlab_api(project) rescue *IGNORABLE_EXCEPTIONS nil end diff --git a/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb b/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb deleted file mode 100644 index 5181340e0..000000000 --- a/app/models/repository_host/gitlab/gitlab_repository_host_data_factory.rb +++ /dev/null @@ -1,31 +0,0 @@ -# frozen_string_literal: true - -class RepositoryHost::Gitlab::GitlabRepositoryHostDataFactory - def self.generate_from_api(api_project) - repo_hash = { - repository_uuid: api_project.id, - description: api_project.description, - name: api_project.name, - default_branch: api_project.default_branch, - archived: api_project.archived, - host_type: "GitLab", - full_name: api_project.path_with_namespace, - owner: {}, - fork: api_project.try(:forked_from_project).present?, - has_issues: api_project.issues_enabled, - has_wiki: api_project.wiki_enabled, - scm: "git", - is_private: api_project.visibility != "public", - keywords: api_project.topics, - parent: { - full_name: api_project.try(:forked_from_project).try(:path_with_namespace), - }, - homepage: api_project.web_url, - license: api_project.license.key, - repository_size: 0, # locked to admins only?, - language: nil, # separate API endpoint that doesn't seem to be supported by the API gem we use - } - - RepositoryHost::RawUpstreamData.new(**repo_hash) - end -end diff --git a/app/models/repository_host/raw_upstream_data_converter.rb b/app/models/repository_host/raw_upstream_data_converter.rb new file mode 100644 index 000000000..759d24a0d --- /dev/null +++ b/app/models/repository_host/raw_upstream_data_converter.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +class RepositoryHost::RawUpstreamDataConverter + def self.convert_from_github_api(upstream_repository_data_hash) + RepositoryHost::RawUpstreamData.new( + repository_uuid: upstream_repository_data_hash[:id], + archived: upstream_repository_data_hash[:archived], + default_branch: upstream_repository_data_hash[:default_branch], + description: upstream_repository_data_hash[:description], + fork: upstream_repository_data_hash[:fork], + full_name: upstream_repository_data_hash[:full_name], + has_issues: upstream_repository_data_hash[:has_issues], + has_wiki: upstream_repository_data_hash[:has_wiki], + homepage: upstream_repository_data_hash[:homepage], + host_type: "GitHub", + keywords: upstream_repository_data_hash[:topics], + language: upstream_repository_data_hash[:language], + license: upstream_repository_data_hash.dig(:license, :key), + name: upstream_repository_data_hash[:name], + owner: upstream_repository_data_hash[:owner], + parent: upstream_repository_data_hash[:parent], + is_private: upstream_repository_data_hash[:private], + scm: "git", + repository_size: upstream_repository_data_hash[:size] + ) + end + + def self.convert_from_gitlab_api(api_project) + RepositoryHost::RawUpstreamData.new( + repository_uuid: api_project.id, + description: api_project.description, + name: api_project.name, + default_branch: api_project.default_branch, + archived: api_project.archived, + host_type: "GitLab", + full_name: api_project.path_with_namespace, + owner: {}, + fork: api_project.try(:forked_from_project).present?, + has_issues: api_project.issues_enabled, + has_wiki: api_project.wiki_enabled, + scm: "git", + is_private: api_project.visibility != "public", + keywords: api_project.topics, + parent: { + full_name: api_project.try(:forked_from_project).try(:path_with_namespace), + }, + homepage: api_project.web_url, + license: api_project.license.key, + repository_size: 0, # locked to admins only?, + language: nil # separate API endpoint that doesn't seem to be supported by the API gem we use + ) + end + + def self.convert_from_bitbucket_api(api_project) + RepositoryHost::RawUpstreamData.new( + description: api_project.description, + language: api_project.language, + full_name: api_project.full_name, + name: api_project.name, + has_wiki: api_project.has_wiki, + has_issues: api_project.has_issues, + scm: api_project.scm, + repository_uuid: api_project.uuid, + host_type: "Bitbucket", + owner: api_project.owner, + homepage: api_project.website, + fork: api_project.parent.present?, + default_branch: api_project.fetch("mainbranch", {}).try(:fetch, "name", nil), + is_private: api_project.is_private, + repository_size: api_project[:size].to_f / 1000, + parent: { + full_name: api_project.fetch("parent", {}).try(:fetch, "full_name", nil), + }, + archived: false, + keywords: [], + license: nil + ) + end +end From 79ea0058c16e662595aebba3d0d277ab4eb5b9cc Mon Sep 17 00:00:00 2001 From: Mike Young Date: Wed, 27 Mar 2024 11:33:10 -0400 Subject: [PATCH 09/12] add method to standardize full_name.downcase --- app/models/repository.rb | 8 ++++++-- app/models/repository_host/base.rb | 4 ++-- app/models/repository_host/raw_upstream_data.rb | 4 ++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/app/models/repository.rb b/app/models/repository.rb index 2ce8c705d..df48d2fa0 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -211,6 +211,10 @@ def owner repository_organisation_id.present? ? repository_organisation : repository_user end + def lower_name + full_name&.downcase + end + def to_s full_name end @@ -315,10 +319,10 @@ def self.create_from_data(repo_host_data) ActiveRecord::Base.transaction do g = Repository.where(host_type: (repo_host_data.host_type || "GitHub")).find_by(uuid: repo_host_data.repository_uuid) - g = Repository.host(repo_host_data.host_type || "GitHub").find_by("lower(full_name) = ?", repo_host_data.full_name.downcase) if g.nil? + g = Repository.host(repo_host_data.host_type || "GitHub").find_by("lower(full_name) = ?", repo_host_data.lower_name) if g.nil? g = Repository.new(uuid: repo_host_data.repository_uuid, full_name: repo_host_data.full_name) if g.nil? g.host_type = repo_host_data.host_type || "GitHub" - g.full_name = repo_host_data.full_name if g.full_name.downcase != repo_host_data.full_name.downcase + g.full_name = repo_host_data.full_name if g.lower_name != repo_host_data.lower_name g.uuid = repo_host_data.repository_uuid if g.uuid.nil? g.license = repo_host_data.formatted_license if repo_host_data.formatted_license g.source_name = (repo_host_data.source_name if repo_host_data.source_name.present?) diff --git a/app/models/repository_host/base.rb b/app/models/repository_host/base.rb index 83562164f..2e13eeca9 100644 --- a/app/models/repository_host/base.rb +++ b/app/models/repository_host/base.rb @@ -88,8 +88,8 @@ def update_from_host(token = nil) repo_data = self.class.fetch_repo(repository.id_or_name) return unless repo_data.present? - if repository.full_name.downcase != repo_data.full_name.downcase - clash = Repository.host(repo_data.host_type).where("lower(full_name) = ?", repo_data.full_name.downcase).first + if repository.lower_name != repo_data.lower_name + clash = Repository.host(repo_data.host_type).where("lower(full_name) = ?", repo_data.lower_name).first clash.destroy if clash && (!clash.repository_host.update_from_host(token) || clash.status == "Removed") repository.full_name = repo_data.full_name end diff --git a/app/models/repository_host/raw_upstream_data.rb b/app/models/repository_host/raw_upstream_data.rb index 0cf1a9c2d..a0b682bd6 100644 --- a/app/models/repository_host/raw_upstream_data.rb +++ b/app/models/repository_host/raw_upstream_data.rb @@ -42,4 +42,8 @@ def formatted_license def source_name parent[:full_name] if fork end + + def lower_name + full_name&.downcase + end end From f37b8657e160ff1994eed8dd1da68a11ab10a229 Mon Sep 17 00:00:00 2001 From: Mike Young Date: Wed, 27 Mar 2024 11:44:07 -0400 Subject: [PATCH 10/12] fix one more rename --- app/models/repository_host/github.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/repository_host/github.rb b/app/models/repository_host/github.rb index cde5734cf..218400c77 100644 --- a/app/models/repository_host/github.rb +++ b/app/models/repository_host/github.rb @@ -151,7 +151,7 @@ def download_forks(token = nil) return true if repository.forks_count == repository.forked_repositories.host(repository.host_type).count AuthToken.new_client(token).forks(repository.full_name).each do |fork| - Repository.create_from_data(GithubRepositoryHostDataFactory.generate_from_api(fork)) + Repository.create_from_data(RawUpstreamDataConverter.convert_from_github_api(fork)) end end From 7a6f86b7bc19c67622fff188b99097bb1ae6c432 Mon Sep 17 00:00:00 2001 From: Mike Young Date: Wed, 27 Mar 2024 11:45:52 -0400 Subject: [PATCH 11/12] call method for var instead treating like hash --- app/models/repository.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/repository.rb b/app/models/repository.rb index df48d2fa0..e30f777f4 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -327,7 +327,7 @@ def self.create_from_data(repo_host_data) g.license = repo_host_data.formatted_license if repo_host_data.formatted_license g.source_name = (repo_host_data.source_name if repo_host_data.source_name.present?) - g.status = g.correct_status_from_upstream(archived_upstream: repo_host_data[:archived]) + g.status = g.correct_status_from_upstream(archived_upstream: repo_host_data.archived) g.assign_attributes repo_host_data.to_repository_attrs if g.changed? From d60d572a6d546bba4c5a41059563b621960972fa Mon Sep 17 00:00:00 2001 From: Mike Young Date: Wed, 27 Mar 2024 13:29:34 -0400 Subject: [PATCH 12/12] use lower name and fix class rename --- app/models/concerns/github_identity.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/models/concerns/github_identity.rb b/app/models/concerns/github_identity.rb index 114e431f4..fab4cd5f1 100644 --- a/app/models/concerns/github_identity.rb +++ b/app/models/concerns/github_identity.rb @@ -43,7 +43,7 @@ def update_repo_permissions update_column(:currently_syncing, true) download_orgs - r = github_client.repos.map { |repo_data| GithubRepositoryHostDataFactory.generate_from_api(repo_data) } + r = github_client.repos.map { |repo_data| RepositoryHost::RawUpstreamDataConverter.convert_from_github_api(repo_data) } current_repo_ids = [] @@ -51,9 +51,9 @@ def update_repo_permissions new_repo_ids = r.map(&:id) existing_repos = Repository.where(host_type: "GitHub").where(uuid: new_repo_ids).select(:id, :uuid) - r.each do |repo| - unless (github_repo = existing_repos.find { |re| re.uuid.to_s == repo.id.to_s }) - github_repo = Repository.host("GitHub").find_by("lower(full_name) = ?", repo.full_name.downcase) || Repository.create_from_data(repo) + r.each do |repo_data| + unless (github_repo = existing_repos.find { |re| re.uuid.to_s == repo_data.repository_uuid.to_s }) + github_repo = Repository.host("GitHub").find_by("lower(full_name) = ?", repo_data.lower_name) || Repository.create_from_data(repo_data) end next if github_repo.nil?