Merge branch 'import-xlsx-encoding' into test

commercetools · Nov 10, 2016 · 3a121e4 · 3a121e4
2 parents 4fd03d4 + 68d9b49
commit 3a121e4
Show file tree

Hide file tree

Showing 6 changed files with 315 additions and 16 deletions.
diff --git a/Gruntfile.coffee b/Gruntfile.coffee
@@ -56,10 +56,10 @@ module.exports = (grunt) ->
     # watching for changes
     watch:
       default:
-        files: ['src/coffee/*.coffee']
+        files: ['src/**/**/*.coffee']
         tasks: ['build']
       test:
-        files: ['src/**/*.coffee']
+        files: ['src/**/**/*.coffee']
         tasks: ['test']
 
     shell:

diff --git a/package.json b/package.json
@@ -46,7 +46,10 @@
   "dependencies": {
     "bluebird": "2.9.x",
     "csv": "0.3.x",
+    "exceljs": "^0.2.36",
+    "iconv-lite": "^0.4.13",
     "jszip": "2.5.x",
+    "lodash": "^4.16.6",
     "optimist": "0.6.x",
     "sphere-node-sdk": "1.3.x",
     "sphere-node-utils": "0.8.1",
@@ -67,7 +70,8 @@
     "grunt-shell": "1.1.x",
     "istanbul": "0.3.x",
     "mocha": "^3.0.2",
-    "sphere-coffeelint": "git://github.com/sphereio/sphere-coffeelint.git#master"
+    "sphere-coffeelint": "git://github.com/sphereio/sphere-coffeelint.git#master",
+    "tmp": "0.0.30"
   },
   "keywords": [
     "sphere",

diff --git a/src/coffee/io/reader.coffee b/src/coffee/io/reader.coffee
@@ -0,0 +1,107 @@
+_ = require 'underscore'
+Csv = require 'csv'
+path = require 'path'
+Promise = require 'bluebird'
+iconv = require 'iconv-lite'
+fs = Promise.promisifyAll require('fs')
+Excel = require 'exceljs'
+
+debugLog = console.log
+debugLog = _.noop
+
+
+class Reader
+
+  constructor: (@options = {}) ->
+    debugLog "READER::options:", JSON.stringify(@options)
+    @options.encoding = @options.encoding || 'utf-8'
+    @options.csvDelimiter = @options.csvDelimiter || ','
+    @header = null
+    @rows = []
+
+    if not iconv.encodingExists @options.encoding
+      throw new Error 'Encoding does not exist: ' + @options.encoding
+
+  read: (file) =>
+    debugLog "READER::stream file %s", file
+    @inputStream = fs.createReadStream file
+
+    if @options.importFormat == 'xlsx'
+      @_readXlsx(@inputStream)
+    else
+      @_readCsv(@inputStream)
+
+  @mapRow: (header, row) ->
+    res = {}
+    header.forEach (item, index) ->
+      res[item] = row[index]
+    res
+
+  @parseCsv: (csv, delimiter, encoding) ->
+    header = null
+    rows = []
+    options =
+      delimiter: delimiter
+      skip_empty_lines: true
+
+    # only buffer can be decoded from another encoding
+    if csv instanceof Buffer
+      csv = @decode(csv, encoding)
+
+    new Promise (resolve, reject) ->
+      Csv()
+      .from.string(csv, options)
+      .on 'record', (row) ->
+        if not header
+          header = row
+        else
+          rows.push Reader.mapRow(header, row)
+      .on 'error', (err) ->
+        reject(err)
+      .on 'end', ->
+        resolve(rows)
+
+  _readCsv: (stream) =>
+    new Promise (resolve, reject) =>
+      buffers = []
+
+      # stream whole file to buffer because we need to decode it first from buffer
+      # - iconv-lite does not support string to string decoding
+      stream.on 'data', (buffer) ->
+        buffers.push buffer
+      stream.on 'error', (err) -> reject(err)
+      stream.on 'end', =>
+        buffer = Buffer.concat(buffers)
+        Reader.parseCsv(buffer, @options.csvDelimiter, @options.encoding)
+        .then (parsed) -> resolve(parsed)
+        .catch (err) -> reject(err)
+
+  _readXlsx: (stream) ->
+    workbook = new Excel.Workbook()
+    workbook.xlsx.read(stream)
+    .then (workbook) ->
+      header = null
+      rows = []
+      worksheet = workbook.getWorksheet(1)
+      worksheet.eachRow (row) ->
+        rowValues = row.values
+        rowValues.shift()
+        rowVaues = _.map rowValues, (item) ->
+          if not item? or _.isObject(item)
+            item = ""
+          String(item)
+
+        if not header
+          header = rowVaues
+        else
+          rows.push Reader.mapRow(header, rowVaues)
+      rows
+
+  @decode: (buffer, encoding) ->
+    debugLog "READER:decode from %s",encoding
+    if encoding == 'utf-8'
+      return buffer.toString()
+
+    iconv.decode buffer, encoding
+
+module.exports = Reader
diff --git a/src/coffee/product-type-generator.coffee b/src/coffee/product-type-generator.coffee
@@ -196,8 +196,22 @@ class ProductTypeGenerator
     i18n = {}
     languages = @_languages header, _.keys row
     for language in languages
-      if row["#{header}.#{language}"].trim() isnt ''
-        i18n[language] = row["#{header}.#{language}"].trim()
+      # condition was commented out due to an error:
+      # attributes -> type -> elementType -> values -> label: Values of LocalizedString must not be empty.
+      # which was caused by empty value label:
+      #  "values": [
+      #    {
+      #      "key": "",
+      #      "label": {}
+      #    }, ....
+      #  ]
+      # where correct version should look for example like this:
+      #  "label": {
+      #    "de": ""
+      #  }
+
+      # if row["#{header}.#{language}"].trim() isnt ''
+      i18n[language] = row["#{header}.#{language}"].trim()
     i18n
 
   ###*

diff --git a/src/coffee/run.coffee b/src/coffee/run.coffee
@@ -1,11 +1,15 @@
 _ = require 'underscore'
 Promise = require 'bluebird'
 fs = Promise.promisifyAll require('fs')
+Path = require 'path'
 Csv = require 'csv'
 JSZip = require 'jszip'
 ProductTypeGenerator = require './product-type-generator'
+Reader = require './io/reader'
 ProductTypeImporter = require './product-type-import'
 
+supportedFileTypes = ['csv', 'xlsx']
+
 argv = require('optimist')
   .usage('Usage: $0 --types [CSV] --attributes [CSV] --target [folder] --withRetailer --zip --zipFileName [name]')
   .describe('types', 'path to CSV file describing product-type general info')
@@ -14,6 +18,9 @@ argv = require('optimist')
   .describe('withRetailer', 'whether to generate an extra file for master<->retailer support with a "mastersku" attribute or not')
   .describe('zip', 'whether to zip the target folder or not')
   .describe('zipFileName', 'the zipped file name (without extension)')
+  .describe('encoding', 'encoding used when importing data (default: utf8)')
+  .describe('importFormat', 'data format of imported data, supported are: csv, xlsx (default: csv)')
+  .describe('csvDelimiter', 'delimiter used in CSV file (default: ,)')
 
   # product type import tool config
   .describe('projectKey', 'your SPHERE.IO project-key')
@@ -33,6 +40,9 @@ argv = require('optimist')
   .default('logSilent', false)
   .default('logDir', '.')
   .default('logLevel', 'info')
+  .default('importFormat', 'csv')
+  .default('csvDelimiter', ',')
+  .default('encoding', 'utf8')
 
   .default('withRetailer', false)
   .default('zip', false)
@@ -48,15 +58,29 @@ argv = require('optimist')
   .argv
 
 ###
-Reads a CSV file by given path and returns a promise for the result.
-@param {string} path The path of the CSV file.
-@return Promise of csv read result.
+Reads a CSV or XLSX file by given path and returns a promise for the result.
+@param {string} path The path of the file.
+@return Promise of csv/xlsx read result.
 ###
-readCsvAsync = (path) ->
-  new Promise (resolve, reject) ->
-    Csv().from.path(path, {columns: true, trim: true})
-    .to.array (data, count) -> resolve data
-    .on 'error', (error) -> reject error
+readFileContent = (path) ->
+  fileType = getFileType(path)
+
+  if not isSupportedFileType(fileType)
+    return Promise.reject(Error("File type #{fileType} is not supported. Use one of #{supportedFileTypes}"))
+
+  reader = new Reader
+    csvDelimiter: argv.csvDelimiter,
+    encoding: argv.encoding,
+    importFormat: fileType,
+  reader.read(path)
+
+isSupportedFileType = (type) ->
+  return supportedFileTypes.indexOf(type) >= 0
+
+getFileType = (filePath) ->
+  ext = Path.extname(filePath)
+  if ext.length
+    return ext.substr(1).toLocaleLowerCase()
 
 writeFileAsync = (productTypeDefinition, target, prefix = 'product-type') ->
   prettified = JSON.stringify productTypeDefinition, null, 2
@@ -119,8 +143,8 @@ importSphereProductTypes = (data) ->
   .then ->
     importer.import data
 
-console.log 'About to read CSV files...'
-Promise.all [readCsvAsync(argv.types), readCsvAsync(argv.attributes)]
+console.log 'About to read files...'
+Promise.all [readFileContent(argv.types), readFileContent(argv.attributes)]
 .spread (types, attributes) ->
   console.log 'Running generator...'
   generator = new ProductTypeGenerator
@@ -142,6 +166,6 @@ Promise.all [readCsvAsync(argv.types), readCsvAsync(argv.attributes)]
     console.dir e.stack
     process.exit 1
 .catch (e) ->
-  console.error "Could not read CSV files: #{e.message}"
+  console.error "Could not read files: #{e.message}"
   process.exit 1
 .done()