Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge branch 'henrik/master'

  • Loading branch information...
commit b97113e938724f7916f664e37335ff06db125336 2 parents 168f445 + 6b4684a
Peter Cooper authored
3  build_lang_from_wordlists.rb
View
@@ -6,7 +6,8 @@
wordlists_folder = File.join(File.dirname(__FILE__), "wordlists")
Dir.entries(wordlists_folder).grep(/\w/).each do |lang|
+ next if lang == 'generators'
puts "Doing #{lang}"
filter = WhatLanguage.filter_from_dictionary(File.join(wordlists_folder, lang))
File.open(File.join(languages_folder, lang + ".lang"), 'w') { |f| f.write filter.dump }
-end
+end
BIN  lang/swedish.lang
View
Binary file not shown
4 test/test_whatlanguage.rb
View
@@ -18,6 +18,10 @@ def test_french
def test_spanish
assert_equal :spanish, @wl.language("La palabra mezquita se usa en español para referirse a todo tipo de edificios dedicados.")
end
+
+ def test_swedish
+ assert_equal :swedish, @wl.language("Den spanska räven rev en annan räv alldeles lagom.")
+ end
def test_nothing
assert_nil @wl.language("")
30 wordlists/generators/swedish.rb
View
@@ -0,0 +1,30 @@
+#!/usr/bin/env ruby
+
+# Run this script to regenerate the Swedish wordlist.
+
+# Data is from http://www.dsso.se/download.html
+# under a Creative Commons ShareAlike license (http://creativecommons.org/licenses/sa/1.0/).
+
+URL = "http://hem.bredband.net/dsso1/dsso-1.29.txt"
+WORDLIST = File.join(File.dirname(__FILE__), '../swedish')
+
+require "open-uri"
+require "iconv"
+
+puts "Fetching source data..."
+data = open(URL)
+
+puts "Writing to word list..."
+open(WORDLIST, 'w') do |file|
+ data.each do |line|
+ next unless line =~ /^\d+r\d+<.+?>([^:]+)/
+ line = $1
+
+ line.gsub!(/\s*,\s*/, "\n") # Some word variations are written like "word, variation"
+ line = Iconv.iconv('UTF-8', 'ISO-8859-1', line) # Convert Latin-1 to UTF-8
+
+ file.puts(line)
+ end
+end
+
+puts "All done."
54,818 wordlists/swedish
View
54,818 additions, 0 deletions not shown
Please sign in to comment.
Something went wrong with that request. Please try again.