From 8cafe8a17fa59ab3f6d0bd797f5f63fb31a74ad8 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Wed, 19 Jul 2017 18:54:48 +0100 Subject: [PATCH 01/26] WIP dependency activity collection --- Gemfile | 1 + Gemfile.lock | 4 + app/models/concerns/dependency_miner.rb | 84 +++++++++++++++++++ app/models/dependency_activity.rb | 4 + app/models/repository.rb | 1 + ...0719162634_create_dependency_activities.rb | 22 +++++ db/schema.rb | 46 ++++++++-- spec/models/dependency_activity_spec.rb | 5 ++ 8 files changed, 161 insertions(+), 6 deletions(-) create mode 100644 app/models/concerns/dependency_miner.rb create mode 100644 app/models/dependency_activity.rb create mode 100644 db/migrate/20170719162634_create_dependency_activities.rb create mode 100644 spec/models/dependency_activity_spec.rb diff --git a/Gemfile b/Gemfile index d9af3017c..2e0060a03 100644 --- a/Gemfile +++ b/Gemfile @@ -82,6 +82,7 @@ gem 'pghero' gem 'pg_query' gem 'schema_plus_pg_indexes' gem 'autoprefixer-rails', '~> 7.1.2.1' +gem 'repo_miner' group :development do gem 'spring' diff --git a/Gemfile.lock b/Gemfile.lock index e5e07b69f..3cea0a912 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -382,6 +382,9 @@ GEM rb-readline (0.5.4) rdoc (5.1.0) redis (3.3.3) + repo_miner (0.2.2) + bibliothecary + rugged rspec (3.6.0) rspec-core (~> 3.6.0) rspec-expectations (~> 3.6.0) @@ -616,6 +619,7 @@ DEPENDENCIES rb-readline rdoc redis + repo_miner rspec-rails rspec-sidekiq rspec_junit_formatter diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb new file mode 100644 index 000000000..32947d3ee --- /dev/null +++ b/app/models/concerns/dependency_miner.rb @@ -0,0 +1,84 @@ +module DependencyMiner + def mine_dependencies + return if scm == 'hg' # only works with git repositories at the moment + + tmp_dir_name = "#{host_type}-#{owner_name}-#{project_name}".downcase + + tmp_path = Rails.root.join("tmp/#{tmp_dir_name}") + + # download code + `git clone #{url} #{tmp_path}` + + # mine dependency activity from git repository + miner = RepoMiner::Repository.new(tmp_path.to_s) + + # store activities as DependencyActivity records + commits = miner.analyse(default_branch) + + # only consider commits with dependency data + dependency_commits = commits.select{|c| c.data[:dependencies].present? } + + activities = [] + if dependency_commits.any? + dependency_commits.each do |commit| + dependency_data = commit.data[:dependencies] + + dependency_data[:added_manifests].each do |added_manifest| + added_manifest[:added_dependencies].each do |added_dependency| + activities << format_activity(commit, added_manifest, added_dependency, 'added') + end + end + + dependency_data[:modified_manifests].each do |modified_manifest| + modified_manifest[:added_dependencies].each do |added_dependency| + activities << format_activity(commit, modified_manifest, added_dependency, 'added') + end + + modified_manifest[:modified_dependencies].each do |modified_dependency| + activities << format_activity(commit, modified_manifest, modified_dependency, 'modified') + end + + modified_manifest[:removed_dependencies].each do |removed_dependency| + activities << format_activity(commit, modified_manifest, removed_dependency, 'removed') + end + end + + dependency_data[:removed_manifests].each do |removed_manifest| + removed_manifests[:removed_dependencies].each do |removed_dependency| + activities << format_activity(commit, removed_manifest, removed_dependency, 'removed') + end + end + end + end + + # delete code + `rm -rf #{tmp_path}` + + activities + end + + def find_project_id(project_name, platform) + project_id = Project.platform(platform).where(name: project_name.try(:strip)).limit(1).pluck(:id).first + return project_id if project_id + Project.lower_platform(platform).lower_name(project_name.try(:strip)).limit(1).pluck(:id).first + end + + def format_activity(commit, manifest, dependency, action) + { + repository_id: id, + project_id: find_project_id(dependency[:name], manifest[:platform]), + action: action, + project_name: dependency[:name], + commit_message: commit.message, + requirement: dependency[:requirement], + kind: dependency[:type], + manifest_path: manifest[:path], + manifest_kind: manifest[:kind], + commit_sha: commit.sha, + platform: manifest[:platform], + previous_requirement: dependency[:previous_requirement], + previous_kind: dependency[:previous_type], + committed_at: commit.timestamp + } + end +end diff --git a/app/models/dependency_activity.rb b/app/models/dependency_activity.rb new file mode 100644 index 000000000..4259cc54c --- /dev/null +++ b/app/models/dependency_activity.rb @@ -0,0 +1,4 @@ +class DependencyActivity < ApplicationRecord + belongs_to :repository + belongs_to :project +end diff --git a/app/models/repository.rb b/app/models/repository.rb index cf55e411b..8d0851d31 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -3,6 +3,7 @@ class Repository < ApplicationRecord include Status include RepoManifests include RepositorySourceRank + include DependencyMiner # eager load this module to avoid clashing with Gitlab gem in development RepositoryHost::Gitlab diff --git a/db/migrate/20170719162634_create_dependency_activities.rb b/db/migrate/20170719162634_create_dependency_activities.rb new file mode 100644 index 000000000..e1b186d40 --- /dev/null +++ b/db/migrate/20170719162634_create_dependency_activities.rb @@ -0,0 +1,22 @@ +class CreateDependencyActivities < ActiveRecord::Migration[5.0] + def change + create_table :dependency_activities do |t| + t.references :repository_id, index: true + t.references :project_id, index: true + t.string :action + t.string :project_name + t.string :commit_message + t.string :requirement + t.string :kind + t.string :manifest_path + t.string :manifest_kind + t.string :commit_sha + t.string :platform + t.string :previous_requirement + t.string :previous_kind + t.datetime :committed_at, index: true + + t.timestamps + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 63ae2dc43..1eec59b98 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20170607094720) do +ActiveRecord::Schema.define(version: 20170719162634) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -54,6 +54,25 @@ t.datetime "updated_at" end + create_table "dependency_activities", force: :cascade do |t| + t.integer "repository_id_id", :index=>{:name=>"index_dependency_activities_on_repository_id_id"} + t.integer "project_id_id", :index=>{:name=>"index_dependency_activities_on_project_id_id"} + t.string "action" + t.string "project_name" + t.string "commit_message" + t.string "requirement" + t.string "kind" + t.string "manifest_path" + t.string "manifest_kind" + t.string "commit_sha" + t.string "platform" + t.string "previous_requirement" + t.string "previous_kind" + t.datetime "committed_at", :index=>{:name=>"index_dependency_activities_on_committed_at"} + t.datetime "created_at", :null=>false + t.datetime "updated_at", :null=>false + end + create_table "identities", force: :cascade do |t| t.string "uid", :index=>{:name=>"index_identities_on_uid"} t.string "provider" @@ -205,13 +224,13 @@ end create_table "projects", force: :cascade do |t| - t.string "name" - t.string "platform", :index=>{:name=>"index_projects_on_platform_and_name", :with=>["name"], :unique=>true} + t.string "name", :limit=>255 + t.string "platform", :limit=>255, :index=>{:name=>"index_projects_on_platform_and_name", :with=>["name"], :unique=>true} t.datetime "created_at", :index=>{:name=>"index_projects_on_created_at"} - t.datetime "updated_at", :index=>{:name=>"index_projects_on_updated_at"} + t.datetime "updated_at" t.text "description" t.text "keywords" - t.string "homepage" + t.string "homepage", :limit=>255 t.string "licenses" t.string "repository_url" t.integer "repository_id", :index=>{:name=>"index_projects_on_repository_id"} @@ -238,8 +257,23 @@ t.datetime "updated_at", :null=>false end + create_table "registry_permissions", force: :cascade do |t| + t.integer "registry_user_id" + t.integer "project_id" + t.string "kind" + end + + create_table "registry_users", force: :cascade do |t| + t.string "platform" + t.integer "uuid" + t.string "login" + t.string "email" + t.string "name" + t.string "url" + end + create_table "repositories", force: :cascade do |t| - t.string "full_name" + t.string "full_name", :index=>{:name=>"index_repositories_on_full_name"} t.string "description" t.boolean "fork" t.datetime "created_at", :null=>false diff --git a/spec/models/dependency_activity_spec.rb b/spec/models/dependency_activity_spec.rb new file mode 100644 index 000000000..7351be9e8 --- /dev/null +++ b/spec/models/dependency_activity_spec.rb @@ -0,0 +1,5 @@ +require 'rails_helper' + +RSpec.describe DependencyActivity, type: :model do + pending "add some examples to (or delete) #{__FILE__}" +end From e96a851da4f492ee2e28061b0a78d498fe51cf35 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Thu, 20 Jul 2017 13:30:49 +0100 Subject: [PATCH 02/26] Update repo_miner --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 3cea0a912..fedd9b18b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -382,7 +382,7 @@ GEM rb-readline (0.5.4) rdoc (5.1.0) redis (3.3.3) - repo_miner (0.2.2) + repo_miner (0.3.0) bibliothecary rugged rspec (3.6.0) From 8d710bba91f44780f996ff774030806b00ba63b2 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Thu, 20 Jul 2017 14:05:48 +0100 Subject: [PATCH 03/26] Save mined activities to the database --- app/models/concerns/dependency_miner.rb | 5 +++-- app/models/repository.rb | 2 +- db/migrate/20170719162634_create_dependency_activities.rb | 4 ++-- db/schema.rb | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 32947d3ee..1c0d28d79 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -51,10 +51,11 @@ def mine_dependencies end end + # write activities to the database + activities.each {|activity| dependency_activities.create(activity) } + # delete code `rm -rf #{tmp_path}` - - activities end def find_project_id(project_name, platform) diff --git a/app/models/repository.rb b/app/models/repository.rb index 8d0851d31..4e8f0e41d 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -24,7 +24,7 @@ class Repository < ApplicationRecord has_many :dependencies, through: :manifests, source: :repository_dependencies has_many :dependency_projects, -> { group('projects.id').order("COUNT(projects.id) DESC") }, through: :dependencies, source: :project has_many :dependency_repos, -> { group('repositories.id') }, through: :dependency_projects, source: :repository - + has_many :dependency_activities has_many :repository_subscriptions, dependent: :delete_all has_many :web_hooks, dependent: :delete_all has_many :issues, dependent: :delete_all diff --git a/db/migrate/20170719162634_create_dependency_activities.rb b/db/migrate/20170719162634_create_dependency_activities.rb index e1b186d40..c8ed8c3bb 100644 --- a/db/migrate/20170719162634_create_dependency_activities.rb +++ b/db/migrate/20170719162634_create_dependency_activities.rb @@ -1,8 +1,8 @@ class CreateDependencyActivities < ActiveRecord::Migration[5.0] def change create_table :dependency_activities do |t| - t.references :repository_id, index: true - t.references :project_id, index: true + t.references :repository, index: true + t.references :project, index: true t.string :action t.string :project_name t.string :commit_message diff --git a/db/schema.rb b/db/schema.rb index 1eec59b98..499a8e119 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -55,8 +55,8 @@ end create_table "dependency_activities", force: :cascade do |t| - t.integer "repository_id_id", :index=>{:name=>"index_dependency_activities_on_repository_id_id"} - t.integer "project_id_id", :index=>{:name=>"index_dependency_activities_on_project_id_id"} + t.integer "repository_id", :index=>{:name=>"index_dependency_activities_on_repository_id"} + t.integer "project_id", :index=>{:name=>"index_dependency_activities_on_project_id"} t.string "action" t.string "project_name" t.string "commit_message" From f87e4fd84697fc75c0ac257148fa4505c5272753 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Thu, 20 Jul 2017 14:06:08 +0100 Subject: [PATCH 04/26] Skip previously mined commits --- app/models/concerns/dependency_miner.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 1c0d28d79..4a94d6e5d 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -12,8 +12,11 @@ def mine_dependencies # mine dependency activity from git repository miner = RepoMiner::Repository.new(tmp_path.to_s) + # Find last commit analysed + last_commit_sha = dependency_activities.order('committed_at DESC').first.try(:commit_sha) + # store activities as DependencyActivity records - commits = miner.analyse(default_branch) + commits = miner.analyse(default_branch, last_commit_sha) # only consider commits with dependency data dependency_commits = commits.select{|c| c.data[:dependencies].present? } From dd1265e175d03dba308366c74ba6d7fd6933a28d Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Thu, 20 Jul 2017 14:22:33 +0100 Subject: [PATCH 05/26] Fix typo --- app/models/concerns/dependency_miner.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 4a94d6e5d..6b8b8b840 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -47,7 +47,7 @@ def mine_dependencies end dependency_data[:removed_manifests].each do |removed_manifest| - removed_manifests[:removed_dependencies].each do |removed_dependency| + removed_manifest[:removed_dependencies].each do |removed_dependency| activities << format_activity(commit, removed_manifest, removed_dependency, 'removed') end end From 6293412bcc6bfc46525e9ddddf50ecdc2d9a9212 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Thu, 20 Jul 2017 16:08:10 +0100 Subject: [PATCH 06/26] Better error handling --- app/models/concerns/dependency_miner.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 6b8b8b840..3672955d8 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -9,6 +9,8 @@ def mine_dependencies # download code `git clone #{url} #{tmp_path}` + return unless tmp_path.exist? # handle failed clones + # mine dependency activity from git repository miner = RepoMiner::Repository.new(tmp_path.to_s) @@ -57,6 +59,9 @@ def mine_dependencies # write activities to the database activities.each {|activity| dependency_activities.create(activity) } + + + ensure # delete code `rm -rf #{tmp_path}` end From 04d5236f69a96f0f0125e3d84a226ce7ff97e2a3 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Tue, 25 Jul 2017 11:56:24 -0400 Subject: [PATCH 07/26] Update bibliothecary and repo_miner --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index fedd9b18b..f91d792b9 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -382,7 +382,7 @@ GEM rb-readline (0.5.4) rdoc (5.1.0) redis (3.3.3) - repo_miner (0.3.0) + repo_miner (0.3.1) bibliothecary rugged rspec (3.6.0) From 7359657f1e230dd434b6bd7acf67b0289ce97d73 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Tue, 25 Jul 2017 11:56:37 -0400 Subject: [PATCH 08/26] Projects have dependency activities --- app/models/project.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/models/project.rb b/app/models/project.rb index 975462383..40f0b8700 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -30,6 +30,7 @@ class Project < ApplicationRecord has_many :dependent_repositories, -> { group('repositories.id').order('repositories.rank DESC NULLS LAST, repositories.stargazers_count DESC') }, through: :dependent_manifests, source: :repository has_many :subscriptions has_many :project_suggestions, dependent: :delete_all + has_many :dependency_activities belongs_to :repository has_one :readme, through: :repository From c2329d5fec24baca2c89c182ef4b81fd01916d36 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Fri, 28 Jul 2017 10:32:14 +0100 Subject: [PATCH 09/26] Only clone the default branch --- app/models/concerns/dependency_miner.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 3672955d8..ed7693158 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -7,7 +7,7 @@ def mine_dependencies tmp_path = Rails.root.join("tmp/#{tmp_dir_name}") # download code - `git clone #{url} #{tmp_path}` + system "git clone -b #{default_branch} --single-branch #{url} #{tmp_path}" return unless tmp_path.exist? # handle failed clones From 10a7113a793172cc4812f306327291ffc602ba25 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Fri, 28 Jul 2017 10:33:05 +0100 Subject: [PATCH 10/26] Import dependency activities in one sql query using activerecord-import --- Gemfile | 1 + Gemfile.lock | 3 +++ app/models/concerns/dependency_miner.rb | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index 2e0060a03..af3b9a0bf 100644 --- a/Gemfile +++ b/Gemfile @@ -83,6 +83,7 @@ gem 'pg_query' gem 'schema_plus_pg_indexes' gem 'autoprefixer-rails', '~> 7.1.2.1' gem 'repo_miner' +gem 'activerecord-import' group :development do gem 'spring' diff --git a/Gemfile.lock b/Gemfile.lock index f91d792b9..8adeaf0a6 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -72,6 +72,8 @@ GEM activemodel (= 5.0.3) activesupport (= 5.0.3) arel (~> 7.0) + activerecord-import (0.19.1) + activerecord (>= 3.2) activesupport (5.0.3) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (~> 0.7) @@ -541,6 +543,7 @@ PLATFORMS DEPENDENCIES RedCloth active_model_serializers + activerecord-import api-pagination appsignal (~> 2.3.0) asciidoctor diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index ed7693158..30dc9d4ea 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -57,7 +57,7 @@ def mine_dependencies end # write activities to the database - activities.each {|activity| dependency_activities.create(activity) } + DependencyActivity.import(activities.map{|a| DependencyActivity.new(a) }) From d03922bbb179099bd933c1b70508dedf47b5b483 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Fri, 28 Jul 2017 10:35:57 +0100 Subject: [PATCH 11/26] Update appsignal --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 8adeaf0a6..3c2faf3f0 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -87,7 +87,7 @@ GEM airbrussh (1.3.0) sshkit (>= 1.6.1, != 1.7.0) api-pagination (4.6.3) - appsignal (2.3.0) + appsignal (2.3.1) rack arel (7.1.4) asciidoctor (1.5.6.1) From e69c83b81f0d7fd69885e6b1127e5288bb511e77 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Fri, 28 Jul 2017 11:29:12 +0100 Subject: [PATCH 12/26] Skip mining dependency activities from forks --- app/models/concerns/dependency_miner.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 30dc9d4ea..1ee50e1ac 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -1,6 +1,7 @@ module DependencyMiner def mine_dependencies return if scm == 'hg' # only works with git repositories at the moment + return if fork? tmp_dir_name = "#{host_type}-#{owner_name}-#{project_name}".downcase From d208c3cb871707370c21ab4e2f1a2e21d57c8002 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Wed, 19 Jul 2017 18:54:48 +0100 Subject: [PATCH 13/26] WIP dependency activity collection --- Gemfile | 1 + Gemfile.lock | 4 + app/models/concerns/dependency_miner.rb | 84 +++++++++++++++++++ app/models/dependency_activity.rb | 4 + app/models/repository.rb | 1 + ...0719162634_create_dependency_activities.rb | 22 +++++ db/schema.rb | 46 ++++++++-- spec/models/dependency_activity_spec.rb | 5 ++ 8 files changed, 161 insertions(+), 6 deletions(-) create mode 100644 app/models/concerns/dependency_miner.rb create mode 100644 app/models/dependency_activity.rb create mode 100644 db/migrate/20170719162634_create_dependency_activities.rb create mode 100644 spec/models/dependency_activity_spec.rb diff --git a/Gemfile b/Gemfile index b3a38e458..5aead9344 100644 --- a/Gemfile +++ b/Gemfile @@ -82,6 +82,7 @@ gem 'pghero' gem 'pg_query' gem 'schema_plus_pg_indexes' gem 'autoprefixer-rails', '~> 7.1.2.1' +gem 'repo_miner' group :development do gem 'spring' diff --git a/Gemfile.lock b/Gemfile.lock index 3528086eb..f58101f86 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -382,6 +382,9 @@ GEM rb-readline (0.5.5) rdoc (5.1.0) redis (3.3.3) + repo_miner (0.2.2) + bibliothecary + rugged rspec (3.6.0) rspec-core (~> 3.6.0) rspec-expectations (~> 3.6.0) @@ -616,6 +619,7 @@ DEPENDENCIES rb-readline rdoc redis + repo_miner rspec-rails rspec-sidekiq rspec_junit_formatter diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb new file mode 100644 index 000000000..32947d3ee --- /dev/null +++ b/app/models/concerns/dependency_miner.rb @@ -0,0 +1,84 @@ +module DependencyMiner + def mine_dependencies + return if scm == 'hg' # only works with git repositories at the moment + + tmp_dir_name = "#{host_type}-#{owner_name}-#{project_name}".downcase + + tmp_path = Rails.root.join("tmp/#{tmp_dir_name}") + + # download code + `git clone #{url} #{tmp_path}` + + # mine dependency activity from git repository + miner = RepoMiner::Repository.new(tmp_path.to_s) + + # store activities as DependencyActivity records + commits = miner.analyse(default_branch) + + # only consider commits with dependency data + dependency_commits = commits.select{|c| c.data[:dependencies].present? } + + activities = [] + if dependency_commits.any? + dependency_commits.each do |commit| + dependency_data = commit.data[:dependencies] + + dependency_data[:added_manifests].each do |added_manifest| + added_manifest[:added_dependencies].each do |added_dependency| + activities << format_activity(commit, added_manifest, added_dependency, 'added') + end + end + + dependency_data[:modified_manifests].each do |modified_manifest| + modified_manifest[:added_dependencies].each do |added_dependency| + activities << format_activity(commit, modified_manifest, added_dependency, 'added') + end + + modified_manifest[:modified_dependencies].each do |modified_dependency| + activities << format_activity(commit, modified_manifest, modified_dependency, 'modified') + end + + modified_manifest[:removed_dependencies].each do |removed_dependency| + activities << format_activity(commit, modified_manifest, removed_dependency, 'removed') + end + end + + dependency_data[:removed_manifests].each do |removed_manifest| + removed_manifests[:removed_dependencies].each do |removed_dependency| + activities << format_activity(commit, removed_manifest, removed_dependency, 'removed') + end + end + end + end + + # delete code + `rm -rf #{tmp_path}` + + activities + end + + def find_project_id(project_name, platform) + project_id = Project.platform(platform).where(name: project_name.try(:strip)).limit(1).pluck(:id).first + return project_id if project_id + Project.lower_platform(platform).lower_name(project_name.try(:strip)).limit(1).pluck(:id).first + end + + def format_activity(commit, manifest, dependency, action) + { + repository_id: id, + project_id: find_project_id(dependency[:name], manifest[:platform]), + action: action, + project_name: dependency[:name], + commit_message: commit.message, + requirement: dependency[:requirement], + kind: dependency[:type], + manifest_path: manifest[:path], + manifest_kind: manifest[:kind], + commit_sha: commit.sha, + platform: manifest[:platform], + previous_requirement: dependency[:previous_requirement], + previous_kind: dependency[:previous_type], + committed_at: commit.timestamp + } + end +end diff --git a/app/models/dependency_activity.rb b/app/models/dependency_activity.rb new file mode 100644 index 000000000..4259cc54c --- /dev/null +++ b/app/models/dependency_activity.rb @@ -0,0 +1,4 @@ +class DependencyActivity < ApplicationRecord + belongs_to :repository + belongs_to :project +end diff --git a/app/models/repository.rb b/app/models/repository.rb index dddd4b8c5..9862697ab 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -3,6 +3,7 @@ class Repository < ApplicationRecord include Status include RepoManifests include RepositorySourceRank + include DependencyMiner # eager load this module to avoid clashing with Gitlab gem in development RepositoryHost::Gitlab diff --git a/db/migrate/20170719162634_create_dependency_activities.rb b/db/migrate/20170719162634_create_dependency_activities.rb new file mode 100644 index 000000000..e1b186d40 --- /dev/null +++ b/db/migrate/20170719162634_create_dependency_activities.rb @@ -0,0 +1,22 @@ +class CreateDependencyActivities < ActiveRecord::Migration[5.0] + def change + create_table :dependency_activities do |t| + t.references :repository_id, index: true + t.references :project_id, index: true + t.string :action + t.string :project_name + t.string :commit_message + t.string :requirement + t.string :kind + t.string :manifest_path + t.string :manifest_kind + t.string :commit_sha + t.string :platform + t.string :previous_requirement + t.string :previous_kind + t.datetime :committed_at, index: true + + t.timestamps + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 63ae2dc43..1eec59b98 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20170607094720) do +ActiveRecord::Schema.define(version: 20170719162634) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -54,6 +54,25 @@ t.datetime "updated_at" end + create_table "dependency_activities", force: :cascade do |t| + t.integer "repository_id_id", :index=>{:name=>"index_dependency_activities_on_repository_id_id"} + t.integer "project_id_id", :index=>{:name=>"index_dependency_activities_on_project_id_id"} + t.string "action" + t.string "project_name" + t.string "commit_message" + t.string "requirement" + t.string "kind" + t.string "manifest_path" + t.string "manifest_kind" + t.string "commit_sha" + t.string "platform" + t.string "previous_requirement" + t.string "previous_kind" + t.datetime "committed_at", :index=>{:name=>"index_dependency_activities_on_committed_at"} + t.datetime "created_at", :null=>false + t.datetime "updated_at", :null=>false + end + create_table "identities", force: :cascade do |t| t.string "uid", :index=>{:name=>"index_identities_on_uid"} t.string "provider" @@ -205,13 +224,13 @@ end create_table "projects", force: :cascade do |t| - t.string "name" - t.string "platform", :index=>{:name=>"index_projects_on_platform_and_name", :with=>["name"], :unique=>true} + t.string "name", :limit=>255 + t.string "platform", :limit=>255, :index=>{:name=>"index_projects_on_platform_and_name", :with=>["name"], :unique=>true} t.datetime "created_at", :index=>{:name=>"index_projects_on_created_at"} - t.datetime "updated_at", :index=>{:name=>"index_projects_on_updated_at"} + t.datetime "updated_at" t.text "description" t.text "keywords" - t.string "homepage" + t.string "homepage", :limit=>255 t.string "licenses" t.string "repository_url" t.integer "repository_id", :index=>{:name=>"index_projects_on_repository_id"} @@ -238,8 +257,23 @@ t.datetime "updated_at", :null=>false end + create_table "registry_permissions", force: :cascade do |t| + t.integer "registry_user_id" + t.integer "project_id" + t.string "kind" + end + + create_table "registry_users", force: :cascade do |t| + t.string "platform" + t.integer "uuid" + t.string "login" + t.string "email" + t.string "name" + t.string "url" + end + create_table "repositories", force: :cascade do |t| - t.string "full_name" + t.string "full_name", :index=>{:name=>"index_repositories_on_full_name"} t.string "description" t.boolean "fork" t.datetime "created_at", :null=>false diff --git a/spec/models/dependency_activity_spec.rb b/spec/models/dependency_activity_spec.rb new file mode 100644 index 000000000..7351be9e8 --- /dev/null +++ b/spec/models/dependency_activity_spec.rb @@ -0,0 +1,5 @@ +require 'rails_helper' + +RSpec.describe DependencyActivity, type: :model do + pending "add some examples to (or delete) #{__FILE__}" +end From 345fb1a16bd41a7eee4c8e1f10e44720193676ed Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Thu, 20 Jul 2017 13:30:49 +0100 Subject: [PATCH 14/26] Update repo_miner --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index f58101f86..1e2fe1c3d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -382,7 +382,7 @@ GEM rb-readline (0.5.5) rdoc (5.1.0) redis (3.3.3) - repo_miner (0.2.2) + repo_miner (0.3.0) bibliothecary rugged rspec (3.6.0) From cde76416770fcd71111278d4873f24d92e61b022 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Thu, 20 Jul 2017 14:05:48 +0100 Subject: [PATCH 15/26] Save mined activities to the database --- app/models/concerns/dependency_miner.rb | 5 +++-- app/models/repository.rb | 2 +- db/migrate/20170719162634_create_dependency_activities.rb | 4 ++-- db/schema.rb | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 32947d3ee..1c0d28d79 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -51,10 +51,11 @@ def mine_dependencies end end + # write activities to the database + activities.each {|activity| dependency_activities.create(activity) } + # delete code `rm -rf #{tmp_path}` - - activities end def find_project_id(project_name, platform) diff --git a/app/models/repository.rb b/app/models/repository.rb index 9862697ab..e46ee4e5c 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -24,7 +24,7 @@ class Repository < ApplicationRecord has_many :dependencies, through: :manifests, source: :repository_dependencies has_many :dependency_projects, -> { group('projects.id').order("COUNT(projects.id) DESC") }, through: :dependencies, source: :project has_many :dependency_repos, -> { group('repositories.id') }, through: :dependency_projects, source: :repository - + has_many :dependency_activities has_many :repository_subscriptions, dependent: :delete_all has_many :web_hooks, dependent: :delete_all has_many :issues, dependent: :delete_all diff --git a/db/migrate/20170719162634_create_dependency_activities.rb b/db/migrate/20170719162634_create_dependency_activities.rb index e1b186d40..c8ed8c3bb 100644 --- a/db/migrate/20170719162634_create_dependency_activities.rb +++ b/db/migrate/20170719162634_create_dependency_activities.rb @@ -1,8 +1,8 @@ class CreateDependencyActivities < ActiveRecord::Migration[5.0] def change create_table :dependency_activities do |t| - t.references :repository_id, index: true - t.references :project_id, index: true + t.references :repository, index: true + t.references :project, index: true t.string :action t.string :project_name t.string :commit_message diff --git a/db/schema.rb b/db/schema.rb index 1eec59b98..499a8e119 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -55,8 +55,8 @@ end create_table "dependency_activities", force: :cascade do |t| - t.integer "repository_id_id", :index=>{:name=>"index_dependency_activities_on_repository_id_id"} - t.integer "project_id_id", :index=>{:name=>"index_dependency_activities_on_project_id_id"} + t.integer "repository_id", :index=>{:name=>"index_dependency_activities_on_repository_id"} + t.integer "project_id", :index=>{:name=>"index_dependency_activities_on_project_id"} t.string "action" t.string "project_name" t.string "commit_message" From f9066ef83b539a618e922388c4c30a03439e2517 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Thu, 20 Jul 2017 14:06:08 +0100 Subject: [PATCH 16/26] Skip previously mined commits --- app/models/concerns/dependency_miner.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 1c0d28d79..4a94d6e5d 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -12,8 +12,11 @@ def mine_dependencies # mine dependency activity from git repository miner = RepoMiner::Repository.new(tmp_path.to_s) + # Find last commit analysed + last_commit_sha = dependency_activities.order('committed_at DESC').first.try(:commit_sha) + # store activities as DependencyActivity records - commits = miner.analyse(default_branch) + commits = miner.analyse(default_branch, last_commit_sha) # only consider commits with dependency data dependency_commits = commits.select{|c| c.data[:dependencies].present? } From 3858e37198539ca38e0bca8622ac94288d4dfd35 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Thu, 20 Jul 2017 14:22:33 +0100 Subject: [PATCH 17/26] Fix typo --- app/models/concerns/dependency_miner.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 4a94d6e5d..6b8b8b840 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -47,7 +47,7 @@ def mine_dependencies end dependency_data[:removed_manifests].each do |removed_manifest| - removed_manifests[:removed_dependencies].each do |removed_dependency| + removed_manifest[:removed_dependencies].each do |removed_dependency| activities << format_activity(commit, removed_manifest, removed_dependency, 'removed') end end From 506670e9e4fec8b7edf61a67b3856d1936c544e6 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Thu, 20 Jul 2017 16:08:10 +0100 Subject: [PATCH 18/26] Better error handling --- app/models/concerns/dependency_miner.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 6b8b8b840..3672955d8 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -9,6 +9,8 @@ def mine_dependencies # download code `git clone #{url} #{tmp_path}` + return unless tmp_path.exist? # handle failed clones + # mine dependency activity from git repository miner = RepoMiner::Repository.new(tmp_path.to_s) @@ -57,6 +59,9 @@ def mine_dependencies # write activities to the database activities.each {|activity| dependency_activities.create(activity) } + + + ensure # delete code `rm -rf #{tmp_path}` end From 0b1f532250075822bf26384cfc63ccb68563dff9 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Tue, 25 Jul 2017 11:56:24 -0400 Subject: [PATCH 19/26] Update bibliothecary and repo_miner --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 1e2fe1c3d..9e3e7a013 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -382,7 +382,7 @@ GEM rb-readline (0.5.5) rdoc (5.1.0) redis (3.3.3) - repo_miner (0.3.0) + repo_miner (0.3.1) bibliothecary rugged rspec (3.6.0) From 817600b007d00e0bf428e33bb149f22c9c63d5a2 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Tue, 25 Jul 2017 11:56:37 -0400 Subject: [PATCH 20/26] Projects have dependency activities --- app/models/project.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/models/project.rb b/app/models/project.rb index af2a92b19..4a34c4be6 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -31,6 +31,7 @@ class Project < ApplicationRecord has_many :dependent_repositories, -> { group('repositories.id').order('repositories.rank DESC NULLS LAST, repositories.stargazers_count DESC') }, through: :dependent_manifests, source: :repository has_many :subscriptions has_many :project_suggestions, dependent: :delete_all + has_many :dependency_activities has_one :readme, through: :repository scope :platform, ->(platform) { where(platform: PackageManager::Base.format_name(platform)) } From 3658a9caa646b8590a291b75face48511c423625 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Fri, 28 Jul 2017 10:32:14 +0100 Subject: [PATCH 21/26] Only clone the default branch --- app/models/concerns/dependency_miner.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 3672955d8..ed7693158 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -7,7 +7,7 @@ def mine_dependencies tmp_path = Rails.root.join("tmp/#{tmp_dir_name}") # download code - `git clone #{url} #{tmp_path}` + system "git clone -b #{default_branch} --single-branch #{url} #{tmp_path}" return unless tmp_path.exist? # handle failed clones From dcf8604243aa2f3e4ac02b3b5ee3fbd98dfbc35c Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Fri, 28 Jul 2017 10:33:05 +0100 Subject: [PATCH 22/26] Import dependency activities in one sql query using activerecord-import --- Gemfile | 1 + Gemfile.lock | 3 +++ app/models/concerns/dependency_miner.rb | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index 5aead9344..7e27d0617 100644 --- a/Gemfile +++ b/Gemfile @@ -83,6 +83,7 @@ gem 'pg_query' gem 'schema_plus_pg_indexes' gem 'autoprefixer-rails', '~> 7.1.2.1' gem 'repo_miner' +gem 'activerecord-import' group :development do gem 'spring' diff --git a/Gemfile.lock b/Gemfile.lock index 9e3e7a013..23dac651d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -72,6 +72,8 @@ GEM activemodel (= 5.0.4) activesupport (= 5.0.4) arel (~> 7.0) + activerecord-import (0.19.1) + activerecord (>= 3.2) activesupport (5.0.4) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (~> 0.7) @@ -541,6 +543,7 @@ PLATFORMS DEPENDENCIES RedCloth active_model_serializers + activerecord-import api-pagination appsignal (~> 2.3.0) asciidoctor diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index ed7693158..30dc9d4ea 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -57,7 +57,7 @@ def mine_dependencies end # write activities to the database - activities.each {|activity| dependency_activities.create(activity) } + DependencyActivity.import(activities.map{|a| DependencyActivity.new(a) }) From 29b0a58697ec7c4aff51993d833ced46af3f9840 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Fri, 28 Jul 2017 11:29:12 +0100 Subject: [PATCH 23/26] Skip mining dependency activities from forks --- app/models/concerns/dependency_miner.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 30dc9d4ea..1ee50e1ac 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -1,6 +1,7 @@ module DependencyMiner def mine_dependencies return if scm == 'hg' # only works with git repositories at the moment + return if fork? tmp_dir_name = "#{host_type}-#{owner_name}-#{project_name}".downcase From 797c1057a02b9aae08b298228aa16d8dccbb73b4 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Fri, 24 Nov 2017 10:41:20 +0000 Subject: [PATCH 24/26] Fix elasticsearch import --- Gemfile | 2 +- config/application.rb | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index 4ed6aa456..b0e78b498 100644 --- a/Gemfile +++ b/Gemfile @@ -12,6 +12,7 @@ gem 'jquery-rails' gem 'octokit' gem 'bootstrap-sass' gem 'will_paginate-bootstrap' +gem 'activerecord-import' gem 'elasticsearch', '~> 2' gem 'elasticsearch-model' gem 'elasticsearch-rails' @@ -81,7 +82,6 @@ gem 'pg_query' gem 'schema_plus_pg_indexes' gem 'autoprefixer-rails', '~> 7.1.2.1' gem 'repo_miner' -gem 'activerecord-import' gem 'amatch' gem 'concurrent-ruby-ext' gem 'charlock_holmes', '>= 0.7.5' diff --git a/config/application.rb b/config/application.rb index 162fb76f4..3ac54ab3c 100644 --- a/config/application.rb +++ b/config/application.rb @@ -9,6 +9,15 @@ require "action_view/railtie" require "sprockets/railtie" +require 'activerecord-import/base' + +class ActiveRecord::Base + class << self + alias :ar_import :import + remove_method :import + end +end + # Require the gems listed in Gemfile, including any gems # you've limited to :test, :development, or :production. Bundler.require(*Rails.groups) From 10abc648bc96564632ba316fb5442c9cb8b6d8d6 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Fri, 24 Nov 2017 10:42:31 +0000 Subject: [PATCH 25/26] Avoid conflict with elasticsearch import method --- app/models/concerns/dependency_miner.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 1ee50e1ac..09d77329c 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -58,7 +58,7 @@ def mine_dependencies end # write activities to the database - DependencyActivity.import(activities.map{|a| DependencyActivity.new(a) }) + DependencyActivity.ar_import(activities.map{|a| DependencyActivity.new(a) }) From 46cfb2d7cd5a8afeea9ec14135d75f0b9100a278 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Mon, 27 Nov 2017 12:07:10 +0000 Subject: [PATCH 26/26] Store branch when mining dependency activities --- app/models/concerns/dependency_miner.rb | 3 ++- .../20171127120330_add_branch_to_dependency_activities.rb | 5 +++++ db/schema.rb | 3 ++- 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 db/migrate/20171127120330_add_branch_to_dependency_activities.rb diff --git a/app/models/concerns/dependency_miner.rb b/app/models/concerns/dependency_miner.rb index 09d77329c..a6b7fc1df 100644 --- a/app/models/concerns/dependency_miner.rb +++ b/app/models/concerns/dependency_miner.rb @@ -88,7 +88,8 @@ def format_activity(commit, manifest, dependency, action) platform: manifest[:platform], previous_requirement: dependency[:previous_requirement], previous_kind: dependency[:previous_type], - committed_at: commit.timestamp + committed_at: commit.timestamp, + branch: default_branch } end end diff --git a/db/migrate/20171127120330_add_branch_to_dependency_activities.rb b/db/migrate/20171127120330_add_branch_to_dependency_activities.rb new file mode 100644 index 000000000..cc3ecb0f0 --- /dev/null +++ b/db/migrate/20171127120330_add_branch_to_dependency_activities.rb @@ -0,0 +1,5 @@ +class AddBranchToDependencyActivities < ActiveRecord::Migration[5.0] + def change + add_column :dependency_activities, :branch, :string + end +end diff --git a/db/schema.rb b/db/schema.rb index aabb9d15f..6502d8ce1 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20171109154509) do +ActiveRecord::Schema.define(version: 20171127120330) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -71,6 +71,7 @@ t.datetime "committed_at", :index=>{:name=>"index_dependency_activities_on_committed_at"} t.datetime "created_at", :null=>false t.datetime "updated_at", :null=>false + t.string "branch" end create_table "identities", force: :cascade do |t|