New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add xlsx support #26
add xlsx support #26
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package docconv | ||
|
||
import ( | ||
"fmt" | ||
"github.com/tealeg/xlsx" | ||
"io" | ||
) | ||
|
||
// Convert MS Excel Spreadsheet | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For exported functions the comment should begin with the name of the function: // ConvertXLSX ... This looks like it converts XLSX to something similar to CSV, but isn't (i.e. there doesn't seem to be any escaping): if a cell includes You probably want use https://golang.org/pkg/encoding/csv/#Writer from the standard library if you need CSV. |
||
func ConvertXLSX(r io.Reader) (string, map[string]string, error) { | ||
f, err := NewLocalFile(r, "/tmp", "sajari-convert-") | ||
if err != nil { | ||
return "", nil, fmt.Errorf("error creating local file: %v", err) | ||
} | ||
defer f.Done() | ||
|
||
fileStat, err := f.Stat() | ||
if err != nil { | ||
return "", nil, fmt.Errorf("error on getting file stats: %v", err) | ||
} | ||
xlsFile, err := xlsx.OpenReaderAt(f, fileStat.Size()) | ||
if err != nil { | ||
return "", nil, fmt.Errorf("error on xlsx parsing: %v", err) | ||
} | ||
|
||
// Meta data | ||
mc := make(chan map[string]string, 1) | ||
go func() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't need to use a go routine here. |
||
meta := make(map[string]string) | ||
meta["ModifiedDate"] = fmt.Sprintf("%d", fileStat.ModTime().Unix()) | ||
mc <- meta | ||
}() | ||
|
||
// Document body | ||
bc := make(chan string, 1) | ||
go func() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't need a go routine here either. |
||
var body string | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potentially a lot of string concat operations here, you should use a |
||
for _, sheet := range xlsFile.Sheets { | ||
for rowIndex, row := range sheet.Rows { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a convention to use smaller names in Go, so |
||
for cellIndex, cell := range row.Cells { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
text, _ := cell.String() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't ignore the error here. From the docs for the XLSX lib you're using, it will return the raw value of the cell, and hence incorrectly formatted. I would assume that the formatting string in the spreadsheet is correct, and the XLSX library is failing to parse it and so the output won't properly represent the value as shown in the spreadsheet: you don't want to fail silently. |
||
body += text | ||
if cellIndex < len(row.Cells)-1 { | ||
body += "," | ||
} | ||
} | ||
if rowIndex < len(sheet.Rows)-1 { | ||
body += "\n" | ||
} | ||
} | ||
} | ||
bc <- body | ||
}() | ||
|
||
body := <-bc | ||
meta := <-mc | ||
|
||
return body, meta, nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Put this import underneath the others (it's non-std lib).