diff --git a/tasks/extract_code_translations.rb b/tasks/extract_code_translations.rb new file mode 100644 index 0000000..e8e838c --- /dev/null +++ b/tasks/extract_code_translations.rb @@ -0,0 +1,34 @@ +require 'nokogiri' + +# html = open('http://www.biblegateway.com/passage/?search=Mateo1-2&version=NVI') +# doc = Nokogiri::HTML(html.read) + +link = 'language_table.html' + +# doc = Nokogiri::HTML(open(link)) + +doc = Nokogiri::HTML(open(link).read, nil, 'utf-8') +# doc.encoding = 'utf-8' + +codes = doc.css('tr td:first').map(&:content) + +puts codes + +hash = {} +current_code = '' + +doc.css('tr').each do |tr| + td = tr.css('td').first + puts td + if td + code = td.content + hash[code] = [] + + tr.css('td').each do |td| + label = td.content + hash[code] << label if hash[code] && label != code + end + end +end + +puts hash diff --git a/tasks/language_extract.html b/tasks/language_extract.html new file mode 100644 index 0000000..e0ff308 --- /dev/null +++ b/tasks/language_extract.html @@ -0,0 +1,73 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file
ISO 639-1bgcsdadeelenesetfi
abабхазкиabchazštinaAbkhazian[citation needed]AbchasischαμπχαζικάAbkhazianabjazioabhaasiabhaasi
afафрикансafrikánštinaAfrikaansAfrikaansαφρικάνςAfrikaansafrikaansafrikaaniafrikaans
anарагонскиaragonštinaAragonesiskAragonesischγλώσσα της AragonAragonesearagonésaragoniaragonia
arарабскиarabštinaArabiskArabischαραβικάArabicárabearaabiaarabia
asасамскиásámštinaAssamesiskAssamesischασαμέζικαAssameseasamésassamiassami