Skip to content

Commit

Permalink
Update gem typo logic and tests
Browse files Browse the repository at this point in the history
Use downloads count as threshold instead of static list.
Limit validation to new gems.
Update distance threshold as mentioned in segiddins PR.

Existing gem we may consider blocking:
https://gist.github.com/sonalkr132/af05b030af793ce17a69245152d5aa5f
total: 4859 (2.98%)
  • Loading branch information
sonalkr132 committed Jun 24, 2019
1 parent f667870 commit 827a2e8
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 107 deletions.
55 changes: 25 additions & 30 deletions app/models/gem_typo.rb
@@ -1,46 +1,41 @@
require 'rubygems/text'
require "rubygems/text"

class GemTypo
PROTECTED_GEMS = [
'rspec-core',
'diff-lcs',
'rspec-expectations',
'rspec-mocks',
'rspec',
'bundler',
'rspec-support',
'multi_json',
'rack',
'rake'
].freeze

DISTANCE_THRESHOLD = 1

GEM_EXCEPTIONS = [
'rspec-coreZ'
# Add exceptions here to manage gems which share a close distance,
# but are manually reviewed and accepted by rubygems team
].freeze
attr_reader :protected_gem

include Gem::Text

def initialize(rubygem_name, opts = {})
@rubygem_name = rubygem_name
@protected_gems = opts[:protected_gems] || GemTypo::PROTECTED_GEMS
@distance_threshold = opts[:distance_threshold] || GemTypo::DISTANCE_THRESHOLD
@gem_exceptions = opts[:gem_exceptions] || GemTypo::GEM_EXCEPTIONS
DOWNLOADS_THRESHOLD = 10_000_000
SIZE_THRESHOLD = 4

def initialize(rubygem_name)
@rubygem_name = rubygem_name.downcase
end

def protected_typo?
@protected_gems.each do |protected_gem|
return false if @rubygem_name == protected_gem
return false if @rubygem_name.size < GemTypo::SIZE_THRESHOLD

protected_gems.each do |protected_gem|
distance = levenshtein_distance(@rubygem_name, protected_gem)
if distance <= @distance_threshold &&
!@gem_exceptions.include?(@rubygem_name)
if distance <= distance_threshold
@protected_gem = protected_gem
return true
end
end

false
end

private

def distance_threshold
@rubygem_name.size == GemTypo::SIZE_THRESHOLD ? 1 : 2
end

def protected_gems
Rubygem.by_downloads
.where("count > ?", GemTypo::DOWNLOADS_THRESHOLD)
.pluck(:name)
.reject { |gem_name| gem_name == @rubygem_name }
end
end
6 changes: 3 additions & 3 deletions app/models/rubygem.rb
Expand Up @@ -18,7 +18,7 @@ class Rubygem < ApplicationRecord
uniqueness: { case_sensitive: false },
if: :needs_name_validation?
validate :blacklist_names_exclusion
validate :protected_gem_typo_protection
validate :protected_gem_typo, on: :create

after_create :update_unresolved
before_destroy :mark_unresolved
Expand Down Expand Up @@ -309,10 +309,10 @@ def blacklist_names_exclusion
errors.add :name, "'#{name}' is a reserved gem name."
end

def protected_gem_typo_protection
def protected_gem_typo
gem_typo = GemTypo.new(name)
return unless gem_typo.protected_typo?
errors.add :name, "'#{name}' is too close to a typo-protected gem."
errors.add :name, "'#{name}' is too close to typo-protected gem: #{gem_typo.protected_gem} "
end

def update_unresolved
Expand Down
114 changes: 40 additions & 74 deletions test/unit/gem_typo_test.rb
@@ -1,78 +1,44 @@
require 'test_helper'
require 'gem_typo'
require "test_helper"

class GemTypoTest < ActiveSupport::TestCase
teardown do
Rails.cache.clear
end

should 'return false for exact match' do
gem_typo = GemTypo.new('rspec-core')
assert_equal false, gem_typo.protected_typo?
end

should 'return true for 1 char distance match' do
gem_typo = GemTypo.new('rspec-core2')
assert_equal true, gem_typo.protected_typo?
end

should 'return false for 2 char distance match' do
gem_typo = GemTypo.new('rspec-core12')
assert_equal false, gem_typo.protected_typo?
end

should 'return false for 3 char distance match' do
gem_typo = GemTypo.new('rspec-core123')
assert_equal false, gem_typo.protected_typo?
end

should 'return false for 1 char distance match on the exception list' do
gem_typo = GemTypo.new('rspec-coreZ')
assert_equal false, gem_typo.protected_typo?
end

should 'allow customized protected_gems' do
opts = {
protected_gems: ["hello"]
}

gem_typo = GemTypo.new('hello', opts)
assert_equal false, gem_typo.protected_typo?

gem_typo = GemTypo.new('hello1', opts)
assert_equal true, gem_typo.protected_typo?
end

should 'allow customized distance_threshold' do
opts = {
distance_threshold: 3
}

gem_typo = GemTypo.new('rack', opts)
assert_equal false, gem_typo.protected_typo?

gem_typo = GemTypo.new('rack1', opts)
assert_equal true, gem_typo.protected_typo?

gem_typo = GemTypo.new('rack12', opts)
assert_equal true, gem_typo.protected_typo?

gem_typo = GemTypo.new('rack123', opts)
assert_equal true, gem_typo.protected_typo?

gem_typo = GemTypo.new('rack1234', opts)
assert_equal false, gem_typo.protected_typo?
end

should 'allow customized protected_gem_exceptions' do
opts = {
gem_exceptions: ["rake1"]
}

gem_typo = GemTypo.new('rake', opts)
assert_equal false, gem_typo.protected_typo?

gem_typo = GemTypo.new('rake1', opts)
assert_equal false, gem_typo.protected_typo?
context "with above downloads threshold gem" do
setup do
above_downloads_thres = GemTypo::DOWNLOADS_THRESHOLD + 1
create(:rubygem, name: "four", downloads: above_downloads_thres)
end

should "return false for exact match" do
gem_typo = GemTypo.new("four")
assert_equal false, gem_typo.protected_typo?
end

should "return false for gem name size below protected threshold" do
gem_typo = GemTypo.new("fou")
assert_equal false, gem_typo.protected_typo?
end

context "size equals protected threshold" do
should "return true for one character distance" do
gem_typo = GemTypo.new("fous")
assert_equal true, gem_typo.protected_typo?
end

should "return false for two character distance" do
gem_typo = GemTypo.new("foss")
assert_equal false, gem_typo.protected_typo?
end
end

context "size above protected threshold" do
should "return true for two character distance" do
gem_typo = GemTypo.new("fourss")
assert_equal true, gem_typo.protected_typo?
end

should "return false for three characher distance" do
gem_typo = GemTypo.new("foursss")
assert_equal false, gem_typo.protected_typo?
end
end
end
end

0 comments on commit 827a2e8

Please sign in to comment.