Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Language improvements, replace whatlanguage with CLD #2753

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion Gemfile
Expand Up @@ -20,6 +20,7 @@ gem 'paperclip', '~> 5.1'
gem 'paperclip-av-transcoder'

gem 'addressable'
gem 'cld2', require: 'cld'
gem 'devise'
gem 'devise-two-factor'
gem 'doorkeeper'
Expand Down Expand Up @@ -56,7 +57,6 @@ gem 'statsd-instrument'
gem 'twitter-text'
gem 'tzinfo-data'
gem 'webpacker', '~>1.2'
gem 'whatlanguage'

# For some reason the view specs start failing without this
gem 'react-rails'
Expand Down
6 changes: 4 additions & 2 deletions Gemfile.lock
Expand Up @@ -102,6 +102,8 @@ GEM
rack-test (>= 0.5.4)
xpath (~> 2.0)
chunky_png (1.3.8)
cld2 (1.0.3)
ffi (~> 1.9.3)
climate_control (0.1.0)
cocaine (0.5.8)
climate_control (>= 0.0.3, < 1.0)
Expand Down Expand Up @@ -153,6 +155,7 @@ GEM
faker (1.7.3)
i18n (~> 0.5)
fast_blank (1.0.0)
ffi (1.9.18)
fuubar (2.2.0)
rspec-core (~> 3.0)
ruby-progressbar (~> 1.4)
Expand Down Expand Up @@ -463,7 +466,6 @@ GEM
websocket-driver (0.6.5)
websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.2)
whatlanguage (1.0.6)
xpath (2.0.0)
nokogiri (~> 1.3)

Expand All @@ -484,6 +486,7 @@ DEPENDENCIES
capistrano-rbenv
capistrano-yarn
capybara
cld2
devise
devise-two-factor
doorkeeper
Expand Down Expand Up @@ -549,7 +552,6 @@ DEPENDENCIES
uglifier (>= 1.3.0)
webmock
webpacker (~> 1.2)
whatlanguage

RUBY VERSION
ruby 2.4.1p111
Expand Down
14 changes: 13 additions & 1 deletion app/lib/language_detector.rb
Expand Up @@ -9,11 +9,23 @@ def initialize(text, account = nil)
end

def to_iso_s
WhatLanguage.new(:all).language_iso(text_without_urls) || default_locale.to_sym
detected_language_code || default_locale.to_sym
end

private

def detected_language_code
detected_language[:code].to_sym if detected_language_reliable?
end

def detected_language
@_detected_language ||= CLD.detect_language(text_without_urls)
end

def detected_language_reliable?
detected_language[:reliable]
end

def text_without_urls
text.dup.tap do |new_text|
URI.extract(new_text).each do |url|
Expand Down
24 changes: 15 additions & 9 deletions spec/lib/language_detector_spec.rb
Expand Up @@ -3,11 +3,17 @@

describe LanguageDetector do
describe 'to_iso_s' do
it 'detects english language' do
string = 'Hello and welcome to mastodon'
result = described_class.new(string).to_iso_s

expect(result).to eq :en
it 'detects english language for basic strings' do
strings = [
"Hello and welcome to mastodon",
"I'd rather not!",
"a lot of people just want to feel righteous all the time and that's all that matters",
]
strings.each do |string|
result = described_class.new(string).to_iso_s

expect(result).to eq(:en), string
end
end

it 'detects spanish language' do
Expand All @@ -19,15 +25,15 @@

describe 'when language can\'t be detected' do
it 'confirm language engine cant detect' do
result = WhatLanguage.new(:all).language_iso('')
expect(result).to be_nil
result = CLD.detect_language('')
expect(result[:reliable]).to be false
end

describe 'because of a URL' do
it 'uses default locale when sent just a URL' do
string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
wl_result = WhatLanguage.new(:all).language_iso(string)
expect(wl_result).not_to eq :en
cld_result = CLD.detect_language(string)[:code]
expect(cld_result).not_to eq :en

result = described_class.new(string).to_iso_s

Expand Down