Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Made detect language support underlying HTML mode.

  • Loading branch information...
commit 2056c318668c6dbc5395f382524a95760ed321ed 1 parent 97bf5c5
@rojotek rojotek authored
Showing with 19 additions and 5 deletions.
  1. +1 −2  ext/cld/thunk.cc
  2. +3 −3 lib/cld.rb
  3. +15 −0 spec/cld_spec.rb
View
3  ext/cld/thunk.cc
@@ -12,8 +12,7 @@ typedef struct {
} RESULT;
extern "C" {
- RESULT detectLanguageThunkInt(const char * src) {
- bool is_plain_text = true;
+ RESULT detectLanguageThunkInt(const char * src, bool is_plain_text) {
bool do_allow_extended_languages = true;
bool do_pick_summary_language = false;
bool do_remove_weak_matches = false;
View
6 lib/cld.rb
@@ -4,8 +4,8 @@
module CLD
extend FFI::Library
- def self.detect_language(text)
- result = detect_language_ext(text.to_s)
+ def self.detect_language(text, is_plain_text=true)
+ result = detect_language_ext(text.to_s, is_plain_text)
Hash[ result.members.map {|member| [member.to_sym, result[member]]} ]
end
@@ -17,5 +17,5 @@ class ReturnValue < FFI::Struct
GEM_ROOT = File.expand_path("../../", __FILE__)
ffi_lib "#{GEM_ROOT}/ext/cld/cld.so"
- attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in], ReturnValue.by_value
+ attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in, :bool], ReturnValue.by_value
end
View
15 spec/cld_spec.rb
@@ -19,6 +19,21 @@
it { subject[:reliable].should be_true }
end
+ context "French in HTML - using CLD html " do
+ subject { CLD.detect_language("<html><head><body><script>A large amount of english in the script which should be ignored if using html in detect_language.</script><p>plus ça change, plus c'est la même chose</p></body></html>", false) }
+
+ it { subject[:name].should eq("FRENCH") }
+ it { subject[:code].should eq("fr") }
+
+ end
+ context "French in HTML - using CLD text " do
+ subject { CLD.detect_language("<html><head><body><script>A large amount of english in the script which should be ignored if using html in detect_language.</script><p>plus ça change, plus c'est la même chose</p></body></html>", true) }
+
+ it { subject[:name].should eq("ENGLISH") }
+ it { subject[:code].should eq("en") }
+
+ end
+
context "Simplified Chinese text" do
subject { CLD.detect_language("你好吗箭体") }
Please sign in to comment.
Something went wrong with that request. Please try again.