Skip to content

Commit

Permalink
This closes #833, closes #845, and closes #1022, breaking changes
Browse files Browse the repository at this point in the history
- Close spreadsheet and row's iterator required
- New options `WorksheetUnzipMemLimit` have been added
- Improve streaming reading performance, memory usage decrease about 93.7%
  • Loading branch information
xuri committed Sep 19, 2021
1 parent 2add938 commit 790c363
Show file tree
Hide file tree
Showing 24 changed files with 322 additions and 36 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -5,6 +5,7 @@ test/Test*.xlsm
test/BadEncrypt.xlsx
test/BadWorkbook.SaveAsEmptyStruct.xlsx
test/*.png
test/excelize-*
*.out
*.test
.idea
8 changes: 8 additions & 0 deletions README.md
Expand Up @@ -96,6 +96,10 @@ func main() {
}
fmt.Println()
}
// Close the spreadsheet.
if err = f.Close(); err != nil {
fmt.Println(err)
}
}
```

Expand Down Expand Up @@ -203,6 +207,10 @@ func main() {
if err = f.Save(); err != nil {
fmt.Println(err)
}
// Close the spreadsheet.
if err = f.Close(); err != nil {
fmt.Println(err)
}
}
```

Expand Down
10 changes: 9 additions & 1 deletion README_zh.md
Expand Up @@ -96,6 +96,10 @@ func main() {
}
fmt.Println()
}
// 关闭工作簿
if err = f.Close(); err != nil {
fmt.Println(err)
}
}
```

Expand Down Expand Up @@ -199,10 +203,14 @@ func main() {
}`); err != nil {
fmt.Println(err)
}
// 保存文件
// 保存工作簿
if err = f.Save(); err != nil {
fmt.Println(err)
}
// 关闭工作簿
if err = f.Close(); err != nil {
fmt.Println(err)
}
}
```

Expand Down
4 changes: 4 additions & 0 deletions cell_test.go
Expand Up @@ -72,6 +72,7 @@ func TestConcurrency(t *testing.T) {
}
assert.Equal(t, "1", val)
assert.NoError(t, f.SaveAs(filepath.Join("test", "TestConcurrency.xlsx")))
assert.NoError(t, f.Close())
}

func TestCheckCellInArea(t *testing.T) {
Expand Down Expand Up @@ -325,6 +326,7 @@ func TestOverflowNumericCell(t *testing.T) {
assert.NoError(t, err)
// GOARCH=amd64 - all ok; GOARCH=386 - actual: "-2147483648"
assert.Equal(t, "8595602512225", val, "A1 should be 8595602512225")
assert.NoError(t, f.Close())
}

func TestSetCellFormula(t *testing.T) {
Expand All @@ -340,6 +342,7 @@ func TestSetCellFormula(t *testing.T) {
assert.EqualError(t, f.SetCellFormula("Sheet1", "C", "SUM(Sheet2!D2,Sheet2!D9)"), `cannot convert cell "C" to coordinates: invalid cell name "C"`)

assert.NoError(t, f.SaveAs(filepath.Join("test", "TestSetCellFormula1.xlsx")))
assert.NoError(t, f.Close())

f, err = OpenFile(filepath.Join("test", "CalcChain.xlsx"))
if !assert.NoError(t, err) {
Expand All @@ -351,6 +354,7 @@ func TestSetCellFormula(t *testing.T) {
// Test remove all cell formula.
assert.NoError(t, f.SetCellFormula("Sheet1", "B1", ""))
assert.NoError(t, f.SaveAs(filepath.Join("test", "TestSetCellFormula3.xlsx")))
assert.NoError(t, f.Close())

// Test set shared formula for the cells.
f = NewFile()
Expand Down
2 changes: 2 additions & 0 deletions chart_test.go
Expand Up @@ -206,6 +206,7 @@ func TestAddChart(t *testing.T) {
assert.EqualError(t, f.AddChart("Sheet2", "BD32", `{"type":"col","series":[{"name":"Sheet1!$A$30","categories":"Sheet1!$B$29:$D$29","values":"Sheet1!$B$30:$D$30"},{"name":"Sheet1!$A$31","categories":"Sheet1!$B$29:$D$29","values":"Sheet1!$B$31:$D$31"},{"name":"Sheet1!$A$32","categories":"Sheet1!$B$29:$D$29","values":"Sheet1!$B$32:$D$32"},{"name":"Sheet1!$A$33","categories":"Sheet1!$B$29:$D$29","values":"Sheet1!$B$33:$D$33"},{"name":"Sheet1!$A$34","categories":"Sheet1!$B$29:$D$29","values":"Sheet1!$B$34:$D$34"},{"name":"Sheet1!$A$35","categories":"Sheet1!$B$29:$D$29","values":"Sheet1!$B$35:$D$35"},{"name":"Sheet1!$A$36","categories":"Sheet1!$B$29:$D$29","values":"Sheet1!$B$36:$D$36"},{"name":"Sheet1!$A$37","categories":"Sheet1!$B$29:$D$29","values":"Sheet1!$B$37:$D$37"}],"format":{"x_scale":1.0,"y_scale":1.0,"x_offset":15,"y_offset":10,"print_obj":true,"lock_aspect_ratio":false,"locked":false},"legend":{"position":"left","show_legend_key":false},"title":{"name":"2D Column Chart"},"plotarea":{"show_bubble_size":true,"show_cat_name":false,"show_leader_lines":false,"show_percent":true,"show_series_name":true,"show_val":true},"show_blanks_as":"zero"}`, ""), "unexpected end of JSON input")
// Test add combo chart with unsupported chart type
assert.EqualError(t, f.AddChart("Sheet2", "BD64", `{"type":"barOfPie","series":[{"name":"Sheet1!$A$30","categories":"Sheet1!$A$30:$D$37","values":"Sheet1!$B$30:$B$37"}],"format":{"x_scale":1.0,"y_scale":1.0,"x_offset":15,"y_offset":10,"print_obj":true,"lock_aspect_ratio":false,"locked":false},"legend":{"position":"left","show_legend_key":false},"title":{"name":"Bar of Pie Chart"},"plotarea":{"show_bubble_size":true,"show_cat_name":false,"show_leader_lines":false,"show_percent":true,"show_series_name":true,"show_val":true},"show_blanks_as":"zero","x_axis":{"major_grid_lines":true},"y_axis":{"major_grid_lines":true}}`, `{"type":"unknown","series":[{"name":"Sheet1!$A$30","categories":"Sheet1!$A$30:$D$37","values":"Sheet1!$B$30:$B$37"}],"format":{"x_scale":1.0,"y_scale":1.0,"x_offset":15,"y_offset":10,"print_obj":true,"lock_aspect_ratio":false,"locked":false},"legend":{"position":"left","show_legend_key":false},"title":{"name":"Bar of Pie Chart"},"plotarea":{"show_bubble_size":true,"show_cat_name":false,"show_leader_lines":false,"show_percent":true,"show_series_name":true,"show_val":true},"show_blanks_as":"zero","x_axis":{"major_grid_lines":true},"y_axis":{"major_grid_lines":true}}`), "unsupported chart type unknown")
assert.NoError(t, f.Close())
}

func TestAddChartSheet(t *testing.T) {
Expand Down Expand Up @@ -254,6 +255,7 @@ func TestDeleteChart(t *testing.T) {
assert.EqualError(t, f.DeleteChart("Sheet1", ""), `cannot convert cell "" to coordinates: invalid cell name ""`)
// Test delete chart on no chart worksheet.
assert.NoError(t, NewFile().DeleteChart("Sheet1", "A1"))
assert.NoError(t, f.Close())
}

func TestChartWithLogarithmicBase(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion col.go
Expand Up @@ -209,7 +209,7 @@ func (f *File) Cols(sheet string) (*Cols, error) {
f.saveFileList(name, f.replaceNameSpaceBytes(name, output))
}
var colIterator columnXMLIterator
colIterator.cols.sheetXML = f.readXML(name)
colIterator.cols.sheetXML = f.readBytes(name)
decoder := f.xmlNewDecoder(bytes.NewReader(colIterator.cols.sheetXML))
for {
token, _ := decoder.Token()
Expand Down
5 changes: 5 additions & 0 deletions col_test.go
Expand Up @@ -39,6 +39,7 @@ func TestCols(t *testing.T) {
if !assert.Equal(t, collectedRows, returnedColumns) {
t.FailNow()
}
assert.NoError(t, f.Close())

f = NewFile()
cells := []string{"C2", "C3", "C4"}
Expand Down Expand Up @@ -75,6 +76,7 @@ func TestColumnsIterator(t *testing.T) {
require.True(t, colCount <= expectedNumCol, "colCount is greater than expected")
}
assert.Equal(t, expectedNumCol, colCount)
assert.NoError(t, f.Close())

f = NewFile()
cells := []string{"C2", "C3", "C4", "D2", "D3", "D4"}
Expand All @@ -99,6 +101,7 @@ func TestColsError(t *testing.T) {
}
_, err = f.Cols("SheetN")
assert.EqualError(t, err, "sheet SheetN is not exist")
assert.NoError(t, f.Close())
}

func TestGetColsError(t *testing.T) {
Expand All @@ -108,6 +111,7 @@ func TestGetColsError(t *testing.T) {
}
_, err = f.GetCols("SheetN")
assert.EqualError(t, err, "sheet SheetN is not exist")
assert.NoError(t, f.Close())

f = NewFile()
f.Sheet.Delete("xl/worksheets/sheet1.xml")
Expand Down Expand Up @@ -283,6 +287,7 @@ func TestOutlineLevel(t *testing.T) {
f, err = OpenFile(filepath.Join("test", "Book1.xlsx"))
assert.NoError(t, err)
assert.NoError(t, f.SetColOutlineLevel("Sheet2", "B", 2))
assert.NoError(t, f.Close())
}

func TestSetColStyle(t *testing.T) {
Expand Down
1 change: 1 addition & 0 deletions crypt_test.go
Expand Up @@ -22,6 +22,7 @@ func TestEncrypt(t *testing.T) {
f, err := OpenFile(filepath.Join("test", "encryptSHA1.xlsx"), Options{Password: "password"})
assert.NoError(t, err)
assert.EqualError(t, f.SaveAs(filepath.Join("test", "BadEncrypt.xlsx"), Options{Password: "password"}), "not support encryption currently")
assert.NoError(t, f.Close())
}

func TestEncryptionMechanism(t *testing.T) {
Expand Down
2 changes: 2 additions & 0 deletions docProps_test.go
Expand Up @@ -44,6 +44,7 @@ func TestSetDocProps(t *testing.T) {
assert.NoError(t, f.SaveAs(filepath.Join("test", "TestSetDocProps.xlsx")))
f.Pkg.Store("docProps/core.xml", nil)
assert.NoError(t, f.SetDocProps(&DocProperties{}))
assert.NoError(t, f.Close())

// Test unsupported charset
f = NewFile()
Expand All @@ -62,6 +63,7 @@ func TestGetDocProps(t *testing.T) {
f.Pkg.Store("docProps/core.xml", nil)
_, err = f.GetDocProps()
assert.NoError(t, err)
assert.NoError(t, f.Close())

// Test unsupported charset
f = NewFile()
Expand Down
3 changes: 3 additions & 0 deletions errors.go
Expand Up @@ -131,4 +131,7 @@ var (
// ErrCellCharsLength defined the error message for receiving a cell
// characters length that exceeds the limit.
ErrCellCharsLength = fmt.Errorf("cell value must be 0-%d characters", TotalCellChars)
// ErrOptionsUnzipSizeLimit defined the error message for receiving
// invalid UnzipSizeLimit and WorksheetUnzipMemLimit.
ErrOptionsUnzipSizeLimit = errors.New("the value of UnzipSizeLimit should be greater than or equal to WorksheetUnzipMemLimit")
)
53 changes: 39 additions & 14 deletions excelize.go
Expand Up @@ -37,6 +37,7 @@ type File struct {
checked map[string]bool
sheetMap map[string]string
streams map[string]*StreamWriter
tempFiles sync.Map
CalcChain *xlsxCalcChain
Comments map[string]*xlsxComments
ContentTypes *xlsxTypes
Expand All @@ -58,13 +59,26 @@ type File struct {

type charsetTranscoderFn func(charset string, input io.Reader) (rdr io.Reader, err error)

// Options define the options for open and reading spreadsheet. RawCellValue
// specify if apply the number format for the cell value or get the raw
// value.
// Options define the options for open and reading spreadsheet.
//
// Password specifies the password of the spreadsheet in plain text.
//
// RawCellValue specifies if apply the number format for the cell value or get
// the raw value.
//
// UnzipSizeLimit specifies the unzip size limit in bytes on open the
// spreadsheet, this value should be greater than or equal to
// WorksheetUnzipMemLimit, the default size limit is 16GB.
//
// WorksheetUnzipMemLimit specifies the memory limit on unzipping worksheet in
// bytes, worksheet XML will be extracted to system temporary directory when
// the file size is over this value, this value should be less than or equal
// to UnzipSizeLimit, the default value is 16MB.
type Options struct {
Password string
RawCellValue bool
UnzipSizeLimit int64
Password string
RawCellValue bool
UnzipSizeLimit int64
WorksheetUnzipMemLimit int64
}

// OpenFile take the name of an spreadsheet file and returns a populated
Expand All @@ -78,10 +92,8 @@ type Options struct {
//
// Note that the excelize just support decrypt and not support encrypt
// currently, the spreadsheet saved by Save and SaveAs will be without
// password unprotected.
//
// UnzipSizeLimit specified the unzip size limit in bytes on open the
// spreadsheet, the default size limit is 16GB.
// password unprotected. Close the file by Close after opening the
// spreadsheet.
func OpenFile(filename string, opt ...Options) (*File, error) {
file, err := os.Open(filepath.Clean(filename))
if err != nil {
Expand All @@ -99,10 +111,11 @@ func OpenFile(filename string, opt ...Options) (*File, error) {
// newFile is object builder
func newFile() *File {
return &File{
options: &Options{UnzipSizeLimit: UnzipSizeLimit},
options: &Options{UnzipSizeLimit: UnzipSizeLimit, WorksheetUnzipMemLimit: StreamChunkSize},
xmlAttr: make(map[string][]xml.Attr),
checked: make(map[string]bool),
sheetMap: make(map[string]string),
tempFiles: sync.Map{},
Comments: make(map[string]*xlsxComments),
Drawings: sync.Map{},
sharedStringsMap: make(map[string]int),
Expand All @@ -125,6 +138,18 @@ func OpenReader(r io.Reader, opt ...Options) (*File, error) {
f.options = parseOptions(opt...)
if f.options.UnzipSizeLimit == 0 {
f.options.UnzipSizeLimit = UnzipSizeLimit
if f.options.WorksheetUnzipMemLimit > f.options.UnzipSizeLimit {
f.options.UnzipSizeLimit = f.options.WorksheetUnzipMemLimit
}
}
if f.options.WorksheetUnzipMemLimit == 0 {
f.options.WorksheetUnzipMemLimit = StreamChunkSize
if f.options.UnzipSizeLimit < f.options.WorksheetUnzipMemLimit {
f.options.WorksheetUnzipMemLimit = f.options.UnzipSizeLimit
}
}
if f.options.WorksheetUnzipMemLimit > f.options.UnzipSizeLimit {
return nil, ErrOptionsUnzipSizeLimit
}
if bytes.Contains(b, oleIdentifier) {
b, err = Decrypt(b, f.options)
Expand All @@ -136,7 +161,7 @@ func OpenReader(r io.Reader, opt ...Options) (*File, error) {
if err != nil {
return nil, err
}
file, sheetCount, err := ReadZipReader(zr, f.options)
file, sheetCount, err := f.ReadZipReader(zr)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -210,10 +235,10 @@ func (f *File) workSheetReader(sheet string) (ws *xlsxWorksheet, err error) {
}
ws = new(xlsxWorksheet)
if _, ok := f.xmlAttr[name]; !ok {
d := f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readXML(name))))
d := f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readBytes(name))))
f.xmlAttr[name] = append(f.xmlAttr[name], getRootElement(d)...)
}
if err = f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readXML(name)))).
if err = f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readBytes(name)))).
Decode(ws); err != nil && err != io.EOF {
err = fmt.Errorf("xml decode error: %s", err)
return
Expand Down

0 comments on commit 790c363

Please sign in to comment.