Skip to content

Commit

Permalink
Merge pull request #2508 from diowa/encoding-fix
Browse files Browse the repository at this point in the history
Improve encoding support in CSV converter
  • Loading branch information
bbenezech committed Feb 4, 2016
2 parents ff6246c + 293bf76 commit 3fd3b32
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 10 deletions.
2 changes: 1 addition & 1 deletion app/views/rails_admin/main/export.html.haml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
%label.col-sm-2.control-label{for: "csv_options_encoding_to"}= t('admin.export.csv.encoding_to')
.col-sm-10.controls
-# from http://books.google.com/support/partner/bin/answer.py?answer=30990 :
= select_tag 'csv_options[encoding_to]', options_for_select(RailsAdmin::CSVConverter::TARGET_ENCODINGS), include_blank: true, placeholder: t('admin.misc.search'), :'data-enumeration' => true
= select_tag 'csv_options[encoding_to]', options_for_select(Encoding.name_list.sort), include_blank: true, placeholder: t('admin.misc.search'), :'data-enumeration' => true
%p.help-block= t('admin.export.csv.encoding_to_help', name: guessed_encoding)

.form-group.control-group
Expand Down
120 changes: 111 additions & 9 deletions lib/rails_admin/support/csv_converter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,104 @@

module RailsAdmin
class CSVConverter
UTF8_ENCODINGS = [nil, '', 'utf8', 'utf-8', 'unicode', 'UTF8', 'UTF-8', 'UNICODE', 'utf8mb4']
TARGET_ENCODINGS = %w(UTF-8 UTF-16LE UTF-16BE UTF-32LE UTF-32BE UTF-7 ISO-8859-1 ISO-8859-15 IBM850 MacRoman Windows-1252 ISO-8859-3 IBM852 ISO-8859-2 Windows-1250 IBM855 ISO-8859-5 KOI8-R MacCyrillic Windows-1251 IBM866 GB2312 GBK GB18030 Big5 Big5-HKSCS EUC-TW EUC-JP ISO-2022-JP Shift_JIS EUC-KR)
class DbEncodingMap
# The mapping from canonical encoding names in PostgreSQL to ones in Ruby.
# Taken from here:
# https://bitbucket.org/ged/ruby-pg/src/master/ext/pg.c
PG_ENCODINGS = {
'BIG5' => Encoding::Big5,
'EUC_CN' => Encoding::GB2312,
'EUC_JP' => Encoding::EUC_JP,
'EUC_JIS_2004' => Encoding::EUC_JP,
'EUC_KR' => Encoding::EUC_KR,
'EUC_TW' => Encoding::EUC_TW,
'GB18030' => Encoding::GB18030,
'GBK' => Encoding::GBK,
'ISO_8859_5' => Encoding::ISO_8859_5,
'ISO_8859_6' => Encoding::ISO_8859_6,
'ISO_8859_7' => Encoding::ISO_8859_7,
'ISO_8859_8' => Encoding::ISO_8859_8,
'KOI8' => Encoding::KOI8_R,
'KOI8R' => Encoding::KOI8_R,
'KOI8U' => Encoding::KOI8_U,
'LATIN1' => Encoding::ISO_8859_1,
'LATIN2' => Encoding::ISO_8859_2,
'LATIN3' => Encoding::ISO_8859_3,
'LATIN4' => Encoding::ISO_8859_4,
'LATIN5' => Encoding::ISO_8859_9,
'LATIN6' => Encoding::ISO_8859_10,
'LATIN7' => Encoding::ISO_8859_13,
'LATIN8' => Encoding::ISO_8859_14,
'LATIN9' => Encoding::ISO_8859_15,
'LATIN10' => Encoding::ISO_8859_16,
'MULE_INTERNAL' => Encoding::Emacs_Mule,
'SJIS' => Encoding::Windows_31J,
'SHIFT_JIS_2004' => Encoding::Windows_31J,
'SQL_ASCII' => nil,
'UHC' => Encoding::CP949,
'UTF8' => Encoding::UTF_8,
'WIN866' => Encoding::IBM866,
'WIN874' => Encoding::Windows_874,
'WIN1250' => Encoding::Windows_1250,
'WIN1251' => Encoding::Windows_1251,
'WIN1252' => Encoding::Windows_1252,
'WIN1253' => Encoding::Windows_1253,
'WIN1254' => Encoding::Windows_1254,
'WIN1255' => Encoding::Windows_1255,
'WIN1256' => Encoding::Windows_1256,
'WIN1257' => Encoding::Windows_1257,
'WIN1258' => Encoding::Windows_1258,
}

# The mapping from canonical encoding names in MySQL to ones in Ruby.
# Taken from here:
# https://github.com/tmtm/ruby-mysql/blob/master/lib/mysql/charset.rb
# Author: TOMITA Masahiro <tommy@tmtm.org>
MYSQL_ENCODINGS = {
'armscii8' => nil,
'ascii' => Encoding::US_ASCII,
'big5' => Encoding::Big5,
'binary' => Encoding::ASCII_8BIT,
'cp1250' => Encoding::Windows_1250,
'cp1251' => Encoding::Windows_1251,
'cp1256' => Encoding::Windows_1256,
'cp1257' => Encoding::Windows_1257,
'cp850' => Encoding::CP850,
'cp852' => Encoding::CP852,
'cp866' => Encoding::IBM866,
'cp932' => Encoding::Windows_31J,
'dec8' => nil,
'eucjpms' => Encoding::EucJP_ms,
'euckr' => Encoding::EUC_KR,
'gb2312' => Encoding::EUC_CN,
'gbk' => Encoding::GBK,
'geostd8' => nil,
'greek' => Encoding::ISO_8859_7,
'hebrew' => Encoding::ISO_8859_8,
'hp8' => nil,
'keybcs2' => nil,
'koi8r' => Encoding::KOI8_R,
'koi8u' => Encoding::KOI8_U,
'latin1' => Encoding::ISO_8859_1,
'latin2' => Encoding::ISO_8859_2,
'latin5' => Encoding::ISO_8859_9,
'latin7' => Encoding::ISO_8859_13,
'macce' => Encoding::MacCentEuro,
'macroman' => Encoding::MacRoman,
'sjis' => Encoding::SHIFT_JIS,
'swe7' => nil,
'tis620' => Encoding::TIS_620,
'ucs2' => Encoding::UTF_16BE,
'ujis' => Encoding::EucJP_ms,
'utf8' => Encoding::UTF_8,
'utf8mb4' => Encoding::UTF_8,
}

def self.encodings
@_encodings ||= PG_ENCODINGS.merge MYSQL_ENCODINGS
end
end

def initialize(objects = [], schema = {})
return self if (@objects = objects).blank?

Expand Down Expand Up @@ -35,23 +131,28 @@ def initialize(objects = [], schema = {})

def to_csv(options = {})
# encoding shenanigans first
@encoding_from = Encoding.find(UTF8_ENCODINGS.include?(@abstract_model.encoding) ? 'UTF-8' : @abstract_model.encoding)
@encoding_to = Encoding.find(options[:encoding_to].presence || @encoding_from)
encoding_from = DbEncodingMap.encodings[@abstract_model.encoding] || Encoding::UTF_8
encoding_to =
if options[:encoding_to].present?
Encoding.find(options[:encoding_to])
else
encoding_from
end

csv_string = generate_csv_string(options)

if @encoding_to != @encoding_from
csv_string = csv_string.encode(@encoding_to, @encoding_from, invalid: :replace, undef: :replace, replace: '?')
if encoding_to != encoding_from
csv_string = csv_string.encode(encoding_to, encoding_from, invalid: :replace, undef: :replace, replace: '?')
end
# Add a BOM for utf8 encodings, helps with utf8 auto-detect for some versions of Excel.
# Don't add if utf8 but user don't want to touch input encoding:
# If user chooses utf8, they will open it in utf8 and BOM will disappear at reading.
# But that way "English" users who don't bother and chooses to let utf8 by default won't get BOM added
# and will not see it if Excel opens the file with a different encoding.
if options[:encoding_to].present? && @encoding_to == Encoding::UTF_8
if options[:encoding_to].present? && encoding_to == Encoding::UTF_8
csv_string = "\xEF\xBB\xBF#{csv_string}"
end
[!options[:skip_header], @encoding_to.to_s, csv_string]
[!options[:skip_header], encoding_to.to_s, csv_string]
end

private
Expand All @@ -66,10 +167,11 @@ def export_fields_for(method, model_config = @model_config)

def generate_csv_string(options)
generator_options = (options[:generator] || {}).symbolize_keys.delete_if { |_, value| value.blank? }
method = @objects.respond_to?(:find_each) ? :find_each : :each

CSV.generate(generator_options) do |csv|
csv << generate_csv_header unless options[:skip_header]

method = @objects.respond_to?(:find_each) ? :find_each : :each
@objects.send(method) do |object|
csv << generate_csv_row(object)
end
Expand Down
13 changes: 13 additions & 0 deletions spec/rails_admin/support/csv_converter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,19 @@
end
end

context 'when encoding FROM MySQL latin1' do
let(:encoding) { '' }
let(:objects) { FactoryGirl.create_list :player, 1, number: 1, name: 'Josè'.encode('ISO-8859-1') }

it 'exports to ISO-8859-1', active_record: true do
expect(::ActiveRecord::Base.connection).to receive(:encoding) { 'latin1' }
expect(subject[1]).to eq 'ISO-8859-1'
expect(subject[2].encoding).to eq Encoding::ISO_8859_1
expect(subject[2].unpack('H*').first).
to eq '4e756d6265722c4e616d650a312c4a6f73e80a'
end
end

context 'when encoding to UTF-8' do
let(:encoding) { 'UTF-8' }

Expand Down

0 comments on commit 3fd3b32

Please sign in to comment.