diff --git a/tasks/extract_code_translations.rb b/tasks/extract_code_translations.rb new file mode 100644 index 0000000..e8e838c --- /dev/null +++ b/tasks/extract_code_translations.rb @@ -0,0 +1,34 @@ +require 'nokogiri' + +# html = open('http://www.biblegateway.com/passage/?search=Mateo1-2&version=NVI') +# doc = Nokogiri::HTML(html.read) + +link = 'language_table.html' + +# doc = Nokogiri::HTML(open(link)) + +doc = Nokogiri::HTML(open(link).read, nil, 'utf-8') +# doc.encoding = 'utf-8' + +codes = doc.css('tr td:first').map(&:content) + +puts codes + +hash = {} +current_code = '' + +doc.css('tr').each do |tr| + td = tr.css('td').first + puts td + if td + code = td.content + hash[code] = [] + + tr.css('td').each do |td| + label = td.content + hash[code] << label if hash[code] && label != code + end + end +end + +puts hash diff --git a/tasks/language_extract.html b/tasks/language_extract.html new file mode 100644 index 0000000..e0ff308 --- /dev/null +++ b/tasks/language_extract.html @@ -0,0 +1,73 @@ +
ISO 639-1 | +bg | +cs | +da | +de | +el | +en | +es | +et | +fi | +
---|---|---|---|---|---|---|---|---|---|
ab | +абхазки | +abchazština | +Abkhazian[citation needed] | +Abchasisch | +αμπχαζικά | +Abkhazian | +abjazio | +abhaasi | +abhaasi | +
af | +африканс | +afrikánština | +Afrikaans | +Afrikaans | +αφρικάνς | +Afrikaans | +afrikaans | +afrikaani | +afrikaans | +
an | +арагонски | +aragonština | +Aragonesisk | +Aragonesisch | +γλώσσα της Aragon | +Aragonese | +aragonés | +aragoni | +aragonia | +
ar | +арабски | +arabština | +Arabisk | +Arabisch | +αραβικά | +Arabic | +árabe | +araabia | +arabia | +
as | +асамски | +ásámština | +Assamesisk | +Assamesisch | +ασαμέζικα | +Assamese | +asamés | +assami | +assami | +