From cd709522040957c14bc067c8ae3bdc660c302b24 Mon Sep 17 00:00:00 2001 From: Matt Jankowski Date: Wed, 3 May 2017 09:52:10 -0400 Subject: [PATCH 1/3] add failing en specs --- spec/lib/language_detector_spec.rb | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/spec/lib/language_detector_spec.rb b/spec/lib/language_detector_spec.rb index 5fb19a1e7d4a3a..1b42d0adbb3f91 100644 --- a/spec/lib/language_detector_spec.rb +++ b/spec/lib/language_detector_spec.rb @@ -3,11 +3,17 @@ describe LanguageDetector do describe 'to_iso_s' do - it 'detects english language' do - string = 'Hello and welcome to mastodon' - result = described_class.new(string).to_iso_s - - expect(result).to eq :en + it 'detects english language for basic strings' do + strings = [ + "Hello and welcome to mastodon", + "I'd rather not!", + "a lot of people just want to feel righteous all the time and that's all that matters", + ] + strings.each do |string| + result = described_class.new(string).to_iso_s + + expect(result).to eq(:en), string + end end it 'detects spanish language' do From d736cc0be6f1cea917888e750244a4f7042647a9 Mon Sep 17 00:00:00 2001 From: Matt Jankowski Date: Wed, 3 May 2017 09:52:17 -0400 Subject: [PATCH 2/3] add cld2 gem --- Gemfile | 1 + Gemfile.lock | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/Gemfile b/Gemfile index 1287afe4414ada..5d7824394dcb83 100644 --- a/Gemfile +++ b/Gemfile @@ -20,6 +20,7 @@ gem 'paperclip', '~> 5.1' gem 'paperclip-av-transcoder' gem 'addressable' +gem 'cld2', require: 'cld' gem 'devise' gem 'devise-two-factor' gem 'doorkeeper' diff --git a/Gemfile.lock b/Gemfile.lock index 218e17237c2fff..e58d89c4af93f5 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -102,6 +102,8 @@ GEM rack-test (>= 0.5.4) xpath (~> 2.0) chunky_png (1.3.8) + cld2 (1.0.3) + ffi (~> 1.9.3) climate_control (0.1.0) cocaine (0.5.8) climate_control (>= 0.0.3, < 1.0) @@ -153,6 +155,7 @@ GEM faker (1.7.3) i18n (~> 0.5) fast_blank (1.0.0) + ffi (1.9.18) fuubar (2.2.0) rspec-core (~> 3.0) ruby-progressbar (~> 1.4) @@ -484,6 +487,7 @@ DEPENDENCIES capistrano-rbenv capistrano-yarn capybara + cld2 devise devise-two-factor doorkeeper From fd301e568b55896e39f043dae15c9d1f5bfe08da Mon Sep 17 00:00:00 2001 From: Matt Jankowski Date: Wed, 3 May 2017 09:58:55 -0400 Subject: [PATCH 3/3] Replace WhatLanguage with CLD --- Gemfile | 1 - Gemfile.lock | 2 -- app/lib/language_detector.rb | 14 +++++++++++++- spec/lib/language_detector_spec.rb | 8 ++++---- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/Gemfile b/Gemfile index 5d7824394dcb83..d84597a787994b 100644 --- a/Gemfile +++ b/Gemfile @@ -57,7 +57,6 @@ gem 'statsd-instrument' gem 'twitter-text' gem 'tzinfo-data' gem 'webpacker', '~>1.2' -gem 'whatlanguage' # For some reason the view specs start failing without this gem 'react-rails' diff --git a/Gemfile.lock b/Gemfile.lock index e58d89c4af93f5..f4b307cec9874c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -466,7 +466,6 @@ GEM websocket-driver (0.6.5) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.2) - whatlanguage (1.0.6) xpath (2.0.0) nokogiri (~> 1.3) @@ -553,7 +552,6 @@ DEPENDENCIES uglifier (>= 1.3.0) webmock webpacker (~> 1.2) - whatlanguage RUBY VERSION ruby 2.4.1p111 diff --git a/app/lib/language_detector.rb b/app/lib/language_detector.rb index 9a32d6a6423940..8c1751beb8019f 100644 --- a/app/lib/language_detector.rb +++ b/app/lib/language_detector.rb @@ -9,11 +9,23 @@ def initialize(text, account = nil) end def to_iso_s - WhatLanguage.new(:all).language_iso(text_without_urls) || default_locale.to_sym + detected_language_code || default_locale.to_sym end private + def detected_language_code + detected_language[:code].to_sym if detected_language_reliable? + end + + def detected_language + @_detected_language ||= CLD.detect_language(text_without_urls) + end + + def detected_language_reliable? + detected_language[:reliable] + end + def text_without_urls text.dup.tap do |new_text| URI.extract(new_text).each do |url| diff --git a/spec/lib/language_detector_spec.rb b/spec/lib/language_detector_spec.rb index 1b42d0adbb3f91..bd4e65ef8e2e66 100644 --- a/spec/lib/language_detector_spec.rb +++ b/spec/lib/language_detector_spec.rb @@ -25,15 +25,15 @@ describe 'when language can\'t be detected' do it 'confirm language engine cant detect' do - result = WhatLanguage.new(:all).language_iso('') - expect(result).to be_nil + result = CLD.detect_language('') + expect(result[:reliable]).to be false end describe 'because of a URL' do it 'uses default locale when sent just a URL' do string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4' - wl_result = WhatLanguage.new(:all).language_iso(string) - expect(wl_result).not_to eq :en + cld_result = CLD.detect_language(string)[:code] + expect(cld_result).not_to eq :en result = described_class.new(string).to_iso_s