Skip to content

Commit

Permalink
Simplify export by trusting DB driver's encoding handling
Browse files Browse the repository at this point in the history
Refs. #2508
  • Loading branch information
mshibuya committed Feb 27, 2016
1 parent 1364dec commit dca8911
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 177 deletions.
11 changes: 8 additions & 3 deletions lib/rails_admin/adapters/active_record.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,14 @@ def properties
delegate :primary_key, :table_name, to: :model, prefix: false

def encoding
encoding = ::ActiveRecord::Base.connection.try(:encoding)
encoding ||= ::ActiveRecord::Base.connection.try(:charset) # mysql2
encoding || 'UTF-8'
case ::ActiveRecord::Base.connection_config[:adapter]
when 'postgresql'
::ActiveRecord::Base.connection.select_one("SELECT ''::text AS str;").values.first.encoding
when 'mysql2'
::ActiveRecord::Base.connection.instance_variable_get(:@connection).encoding
else
::ActiveRecord::Base.connection.select_one("SELECT '' AS str;").values.first.encoding
end
end

def embedded?
Expand Down
2 changes: 1 addition & 1 deletion lib/rails_admin/adapters/mongoid.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def table_name
end

def encoding
'UTF-8'
Encoding::UTF_8
end

def embedded?
Expand Down
120 changes: 7 additions & 113 deletions lib/rails_admin/support/csv_converter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,104 +3,6 @@

module RailsAdmin
class CSVConverter
class DbEncodingMap
# The mapping from canonical encoding names in PostgreSQL to ones in Ruby.
# Taken from here:
# https://bitbucket.org/ged/ruby-pg/src/master/ext/pg.c
PG_ENCODINGS = {
'BIG5' => Encoding::Big5,
'EUC_CN' => Encoding::GB2312,
'EUC_JP' => Encoding::EUC_JP,
'EUC_JIS_2004' => Encoding::EUC_JP,
'EUC_KR' => Encoding::EUC_KR,
'EUC_TW' => Encoding::EUC_TW,
'GB18030' => Encoding::GB18030,
'GBK' => Encoding::GBK,
'ISO_8859_5' => Encoding::ISO_8859_5,
'ISO_8859_6' => Encoding::ISO_8859_6,
'ISO_8859_7' => Encoding::ISO_8859_7,
'ISO_8859_8' => Encoding::ISO_8859_8,
'KOI8' => Encoding::KOI8_R,
'KOI8R' => Encoding::KOI8_R,
'KOI8U' => Encoding::KOI8_U,
'LATIN1' => Encoding::ISO_8859_1,
'LATIN2' => Encoding::ISO_8859_2,
'LATIN3' => Encoding::ISO_8859_3,
'LATIN4' => Encoding::ISO_8859_4,
'LATIN5' => Encoding::ISO_8859_9,
'LATIN6' => Encoding::ISO_8859_10,
'LATIN7' => Encoding::ISO_8859_13,
'LATIN8' => Encoding::ISO_8859_14,
'LATIN9' => Encoding::ISO_8859_15,
'LATIN10' => Encoding::ISO_8859_16,
'MULE_INTERNAL' => Encoding::Emacs_Mule,
'SJIS' => Encoding::Windows_31J,
'SHIFT_JIS_2004' => Encoding::Windows_31J,
'SQL_ASCII' => nil,
'UHC' => Encoding::CP949,
'UTF8' => Encoding::UTF_8,
'WIN866' => Encoding::IBM866,
'WIN874' => Encoding::Windows_874,
'WIN1250' => Encoding::Windows_1250,
'WIN1251' => Encoding::Windows_1251,
'WIN1252' => Encoding::Windows_1252,
'WIN1253' => Encoding::Windows_1253,
'WIN1254' => Encoding::Windows_1254,
'WIN1255' => Encoding::Windows_1255,
'WIN1256' => Encoding::Windows_1256,
'WIN1257' => Encoding::Windows_1257,
'WIN1258' => Encoding::Windows_1258,
}

# The mapping from canonical encoding names in MySQL to ones in Ruby.
# Taken from here:
# https://github.com/tmtm/ruby-mysql/blob/master/lib/mysql/charset.rb
# Author: TOMITA Masahiro <tommy@tmtm.org>
MYSQL_ENCODINGS = {
'armscii8' => nil,
'ascii' => Encoding::US_ASCII,
'big5' => Encoding::Big5,
'binary' => Encoding::ASCII_8BIT,
'cp1250' => Encoding::Windows_1250,
'cp1251' => Encoding::Windows_1251,
'cp1256' => Encoding::Windows_1256,
'cp1257' => Encoding::Windows_1257,
'cp850' => Encoding::CP850,
'cp852' => Encoding::CP852,
'cp866' => Encoding::IBM866,
'cp932' => Encoding::Windows_31J,
'dec8' => nil,
'eucjpms' => Encoding::EucJP_ms,
'euckr' => Encoding::EUC_KR,
'gb2312' => Encoding::EUC_CN,
'gbk' => Encoding::GBK,
'geostd8' => nil,
'greek' => Encoding::ISO_8859_7,
'hebrew' => Encoding::ISO_8859_8,
'hp8' => nil,
'keybcs2' => nil,
'koi8r' => Encoding::KOI8_R,
'koi8u' => Encoding::KOI8_U,
'latin1' => Encoding::ISO_8859_1,
'latin2' => Encoding::ISO_8859_2,
'latin5' => Encoding::ISO_8859_9,
'latin7' => Encoding::ISO_8859_13,
'macce' => Encoding::MacCentEuro,
'macroman' => Encoding::MacRoman,
'sjis' => Encoding::SHIFT_JIS,
'swe7' => nil,
'tis620' => Encoding::TIS_620,
'ucs2' => Encoding::UTF_16BE,
'ujis' => Encoding::EucJP_ms,
'utf8' => Encoding::UTF_8,
'utf8mb4' => Encoding::UTF_8,
}

def self.encodings
@_encodings ||= PG_ENCODINGS.merge MYSQL_ENCODINGS
end
end

def initialize(objects = [], schema = {})
return self if (@objects = objects).blank?

Expand Down Expand Up @@ -130,29 +32,21 @@ def initialize(objects = [], schema = {})
end

def to_csv(options = {})
# encoding shenanigans first
encoding_from = DbEncodingMap.encodings[@abstract_model.encoding] || Encoding::UTF_8
encoding_to =
if options[:encoding_to].present?
Encoding.find(options[:encoding_to])
else
encoding_from
end
encoding_to = Encoding.find(options[:encoding_to]) if options[:encoding_to].present?

csv_string = generate_csv_string(options)

if encoding_to != encoding_from
csv_string = csv_string.encode(encoding_to, encoding_from, invalid: :replace, undef: :replace, replace: '?')
if encoding_to
csv_string = csv_string.encode(encoding_to, invalid: :replace, undef: :replace, replace: '?')
end

# Add a BOM for utf8 encodings, helps with utf8 auto-detect for some versions of Excel.
# Don't add if utf8 but user don't want to touch input encoding:
# If user chooses utf8, they will open it in utf8 and BOM will disappear at reading.
# But that way "English" users who don't bother and chooses to let utf8 by default won't get BOM added
# and will not see it if Excel opens the file with a different encoding.
if options[:encoding_to].present? && encoding_to == Encoding::UTF_8
csv_string = "\xEF\xBB\xBF#{csv_string}"
end
[!options[:skip_header], encoding_to.to_s, csv_string]
csv_string = "\xEF\xBB\xBF#{csv_string}" if encoding_to == Encoding::UTF_8

[!options[:skip_header], (encoding_to || csv_string.encoding).to_s, csv_string]
end

private
Expand Down
28 changes: 9 additions & 19 deletions spec/dummy_app/Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,18 @@ gem 'rack-cache', require: 'rack/cache'

group :active_record do
platforms :jruby do
case ENV['CI_DB_ADAPTER']
when 'mysql'
gem 'activerecord-jdbcmysql-adapter', '>= 1.2'
gem 'jdbc-mysql', '>= 5.1'
when 'postgresql'
gem 'activerecord-jdbcpostgresql-adapter', '>= 1.2'
gem 'jdbc-postgres', '>= 9.2'
else
gem 'activerecord-jdbcsqlite3-adapter', '>= 1.2'
gem 'jdbc-sqlite3', '>= 3.7'
end
gem 'activerecord-jdbcmysql-adapter', '>= 1.2'
gem 'jdbc-mysql', '>= 5.1'
gem 'activerecord-jdbcpostgresql-adapter', '>= 1.2'
gem 'jdbc-postgres', '>= 9.2'
gem 'activerecord-jdbcsqlite3-adapter', '>= 1.2'
gem 'jdbc-sqlite3', '>= 3.7'
end

platforms :ruby, :mswin, :mingw do
case ENV['CI_DB_ADAPTER']
when 'mysql2'
gem 'mysql2', '~> 0.3.14'
when 'postgresql'
gem 'pg', '>= 0.14'
else
gem 'sqlite3', '>= 1.3'
end
gem 'mysql2', '~> 0.3.14'
gem 'pg', '>= 0.14'
gem 'sqlite3', '>= 1.3'
end

gem 'paper_trail', '~> 3.0'
Expand Down
39 changes: 23 additions & 16 deletions spec/dummy_app/config/database.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,32 @@
# SQLite version 3.x
# gem install sqlite3
#
# Ensure the SQLite 3 gem is defined in your Gemfile
# gem 'sqlite3'
development:
sqlite: &sqlite
adapter: sqlite3
database: db/development.sqlite3
pool: 5
timeout: 5000

# Warning: The database defined as "test" will be erased and
# re-generated from your development database when you run "rake".
# Do not set this db to the same as development or production.
test:
adapter: sqlite3
database: db/test.sqlite3
postgresql:
adapter: postgresql
database: rails_admin
username: postgres
password:
host: localhost
encoding: utf8
pool: 5
timeout: 5000

production:
adapter: sqlite3
database: db/production.sqlite3
mysql:
adapter: mysql2
database: rails_admin
username: root
password:
host: localhost
encoding: utf8
pool: 5
timeout: 5000

development:
<<: *sqlite
database: db/development.sqlite3

test:
<<: *sqlite
database: db/test.sqlite3
48 changes: 23 additions & 25 deletions spec/rails_admin/support/csv_converter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,35 +32,33 @@

subject { RailsAdmin::CSVConverter.new(objects, schema).to_csv(encoding_to: encoding) }

context 'when encoding FROM MySQL utf8mb4' do
let(:encoding) { 'UTF-8' } # default

it 'exports to UTF-8 with BOM', active_record: true do
# MySQL connection may report its encoding as 'utf8mb4'
expect(::ActiveRecord::Base.connection).to receive(:encoding) { 'utf8mb4' }
expect(subject[1]).to eq 'UTF-8'
expect(subject[2].encoding).to eq Encoding::UTF_8
expect(subject[2].unpack('H*').first).
to eq 'efbbbf4e756d6265722c4e616d650a312ce381aae381bee381880a' # have BOM
end
end

context 'when encoding FROM MySQL latin1' do
context 'when encoding FROM latin1', active_record: true do
let(:encoding) { '' }
let(:objects) { FactoryGirl.create_list :player, 1, number: 1, name: 'Josè'.encode('ISO-8859-1') }

it 'exports to ISO-8859-1', active_record: true do
# postgresql cannot load invalid data: invalid byte sequence for encoding "UTF8": 0xe8
if ActiveRecord::Base.connection_config[:adapter] != 'postgresql'
expect(::ActiveRecord::Base.connection).to receive(:encoding) { 'latin1' }
expect(subject[1]).to eq 'ISO-8859-1'
expect(subject[2].encoding).to eq Encoding::ISO_8859_1
expect(subject[2].unpack('H*').first).
to eq '4e756d6265722c4e616d650a312c4a6f73e80a'
else
expect(true).to eq(true)
before do
case ActiveRecord::Base.connection_config[:adapter]
when 'postgresql'
@connection = ActiveRecord::Base.connection.instance_variable_get(:@connection)
@connection.set_client_encoding('latin1')
when 'mysql2'
ActiveRecord::Base.connection.execute('SET NAMES latin1;')
end
end
after do
case ActiveRecord::Base.connection_config[:adapter]
when 'postgresql'
@connection.set_client_encoding('utf8')
when 'mysql2'
ActiveRecord::Base.connection.execute('SET NAMES utf8;')
end
end

it 'exports to ISO-8859-1' do
expect(subject[1]).to eq 'ISO-8859-1'
expect(subject[2].encoding).to eq Encoding::ISO_8859_1
expect(subject[2].unpack('H*').first).
to eq '4e756d6265722c4e616d650a312c4a6f73e80a'
end
end

context 'when encoding to UTF-8' do
Expand Down

0 comments on commit dca8911

Please sign in to comment.