From 55b527a7f07464b93cfada3200a5fea22c893dac Mon Sep 17 00:00:00 2001 From: Suyash Kumar Date: Sat, 1 Apr 2023 22:48:09 -0400 Subject: [PATCH] Add basic Parser.Next based benchmark --- parse.go | 11 ++++----- parse_test.go | 34 ++++++++++++++++++++++++++++ read.go | 62 ++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 86 insertions(+), 21 deletions(-) diff --git a/parse.go b/parse.go index 89e3776b..0937c2ea 100644 --- a/parse.go +++ b/parse.go @@ -64,10 +64,11 @@ func Parse(in io.Reader, bytesToRead int64, frameChan chan *frame.Frame, opts .. } for !p.reader.rawReader.IsLimitExhausted() { - _, err := p.Next() + elem, err := p.Next() if err != nil { return p.dataset, err } + p.dataset.Elements = append(p.dataset.Elements, elem) } // Close the frameChannel if needed @@ -115,8 +116,9 @@ func NewParser(in io.Reader, bytesToRead int64, frameChannel chan *frame.Frame, optSet := toParseOptSet(opts...) p := Parser{ reader: &reader{ - rawReader: dicomio.NewReader(bufio.NewReader(in), binary.LittleEndian, bytesToRead), - opts: optSet, + rawReader: dicomio.NewReader(bufio.NewReader(in), binary.LittleEndian, bytesToRead), + opts: optSet, + datasetCtx: &Dataset{}, }, frameChannel: frameChannel, } @@ -167,7 +169,7 @@ func (p *Parser) Next() (*Element, error) { } return nil, ErrorEndOfDICOM } - elem, err := p.reader.readElement(&p.dataset, p.frameChannel) + elem, err := p.reader.ReadElement(p.frameChannel) if err != nil { // TODO: tolerate some kinds of errors and continue parsing return nil, err @@ -186,7 +188,6 @@ func (p *Parser) Next() (*Element, error) { p.reader.rawReader.SetCodingSystem(cs) } - p.dataset.Elements = append(p.dataset.Elements, elem) return elem, nil } diff --git a/parse_test.go b/parse_test.go index 7b64533f..c253fa31 100644 --- a/parse_test.go +++ b/parse_test.go @@ -237,6 +237,40 @@ func BenchmarkParser_NextAPI(b *testing.B) { } } +func BenchmarkParser_NextAPI(b *testing.B) { + files, err := ioutil.ReadDir("./testdata") + if err != nil { + b.Fatalf("unable to read testdata/: %v", err) + } + for _, f := range files { + if !f.IsDir() && strings.HasSuffix(f.Name(), ".dcm") { + b.Run(f.Name(), func(b *testing.B) { + + dcm, err := os.Open("./testdata/" + f.Name()) + if err != nil { + b.Errorf("Unable to open %s. Error: %v", f.Name(), err) + } + defer dcm.Close() + + data, err := ioutil.ReadAll(dcm) + if err != nil { + b.Errorf("Unable to read file into memory for benchmark: %v", err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + r := bytes.NewReader(data) + p, _ := dicom.NewParser(r, int64(len(data)), nil) + var err error + for err == nil { + _, err = p.Next() + } + } + }) + } + } +} + func Example_readFile() { // See also: dicom.Parse, which uses a more generic io.Reader API. dataset, _ := dicom.ParseFile("testdata/1.dcm", nil) diff --git a/read.go b/read.go index 6e653be5..795ce1dd 100644 --- a/read.go +++ b/read.go @@ -41,6 +41,11 @@ var ( type reader struct { rawReader dicomio.Reader opts parseOptSet + // datasetCtx is the top-level context Dataset holding only elements that + // may be needed to parse future elements (e.g. like tag.Rows). See + // datasetContextElementPassList for elements that will be automatically + // included in this context. + datasetCtx *Dataset } func (r *reader) readTag() (*tag.Tag, error) { @@ -154,7 +159,7 @@ func (r *reader) readHeader() ([]*Element, error) { // Must read metadata as LittleEndian explicit VR // Read the length of the metadata elements: (0002,0000) MetaElementGroupLength - maybeMetaLen, err := r.readElement(nil, nil) + maybeMetaLen, err := r.readElementInternal(nil, nil) if err != nil { return nil, err } @@ -174,7 +179,7 @@ func (r *reader) readHeader() ([]*Element, error) { } defer r.rawReader.PopLimit() for !r.rawReader.IsLimitExhausted() { - elem, err := r.readElement(nil, nil) + elem, err := r.readElementInternal(nil, nil) if err != nil { // TODO: see if we can skip over malformed elements somehow return nil, err @@ -466,7 +471,7 @@ func (r *reader) readSequence(t tag.Tag, vr string, vl uint32, d *Dataset) (Valu seqElements := &Dataset{} if vl == tag.VLUndefinedLength { for { - subElement, err := r.readElement(seqElements, nil) + subElement, err := r.readElementInternal(seqElements, nil) if err != nil { // Stop reading due to error log.Println("error reading subitem, ", err) @@ -493,7 +498,7 @@ func (r *reader) readSequence(t tag.Tag, vr string, vl uint32, d *Dataset) (Valu return nil, err } for !r.rawReader.IsLimitExhausted() { - subElement, err := r.readElement(seqElements, nil) + subElement, err := r.readElementInternal(seqElements, nil) if err != nil { // TODO: option to ignore errors parsing subelements? return nil, err @@ -519,7 +524,7 @@ func (r *reader) readSequenceItem(t tag.Tag, vr string, vl uint32, d *Dataset) ( if vl == tag.VLUndefinedLength { for { - subElem, err := r.readElement(&seqElements, nil) + subElem, err := r.readElementInternal(&seqElements, nil) if err != nil { return nil, err } @@ -537,7 +542,7 @@ func (r *reader) readSequenceItem(t tag.Tag, vr string, vl uint32, d *Dataset) ( } for !r.rawReader.IsLimitExhausted() { - subElem, err := r.readElement(&seqElements, nil) + subElem, err := r.readElementInternal(&seqElements, nil) if err != nil { return nil, err } @@ -693,17 +698,24 @@ func (r *reader) readInt(t tag.Tag, vr string, vl uint32) (Value, error) { return retVal, err } -// readElement reads the next element. If the next element is a sequence element, -// it may result in a collection of Elements. It takes a pointer to the Dataset of -// elements read so far, since previously read elements may be needed to parse +// ReadElement reads the next element. This is the top level function that should +// be called to read elements. If the next element is a sequence element, +// it may result in reading a collection of Elements. +func (r *reader) ReadElement(fc chan<- *frame.Frame) (*Element, error) { + return r.readElementInternal(r.datasetCtx, fc) +} + +// readElementInternal reads the next element. If the next element is a sequence element, +// it may result in reading a collection of Elements. It takes a pointer to the Dataset of +// context elements, since previously read elements may be needed to parse // certain Elements (like native PixelData). If the Dataset is nil, it is // treated as an empty Dataset. -func (r *reader) readElement(d *Dataset, fc chan<- *frame.Frame) (*Element, error) { +func (r *reader) readElementInternal(datasetCtx *Dataset, fc chan<- *frame.Frame) (*Element, error) { t, err := r.readTag() if err != nil { return nil, err } - debug.Logf("readElement: tag: %s", t.String()) + debug.Logf("readElementInternal: tag: %s", t.String()) readImplicit := r.rawReader.IsImplicit() if *t == tag.Item { @@ -715,22 +727,23 @@ func (r *reader) readElement(d *Dataset, fc chan<- *frame.Frame) (*Element, erro if err != nil { return nil, err } - debug.Logf("readElement: vr: %s", vr) + debug.Logf("readElementInternal: vr: %s", vr) vl, err := r.readVL(readImplicit, *t, vr) if err != nil { return nil, err } - debug.Logf("readElement: vl: %d", vl) + debug.Logf("readElementInternal: vl: %datasetCtx", vl) - val, err := r.readValue(*t, vr, vl, readImplicit, d, fc) + val, err := r.readValue(*t, vr, vl, readImplicit, datasetCtx, fc) if err != nil { log.Println("error reading value ", err) return nil, err } - return &Element{Tag: *t, ValueRepresentation: tag.GetVRKind(*t, vr), RawValueRepresentation: vr, ValueLength: vl, Value: val}, nil - + elem := &Element{Tag: *t, ValueRepresentation: tag.GetVRKind(*t, vr), RawValueRepresentation: vr, ValueLength: vl, Value: val} + addToContextIfNeeded(datasetCtx, elem) + return elem, nil } // Read an Item object as raw bytes, useful when parsing encapsulated PixelData. @@ -789,3 +802,20 @@ func (r *reader) readRawItem(shouldSkip bool) ([]byte, bool, error) { func (r *reader) moreToRead() bool { return !r.rawReader.IsLimitExhausted() } + +// datasetContextElementPassList holds the set of DICOM Element Tags that should +// be included in the context Dataset. These are elements that may be needed to +// read downstream elements in the future. +var datasetContextElementPassList = tag.Tags{ + &tag.Rows, + &tag.Columns, + &tag.NumberOfFrames, + &tag.BitsAllocated, + &tag.SamplesPerPixel, +} + +func addToContextIfNeeded(datasetCtx *Dataset, e *Element) { + if datasetContextElementPassList.Contains(&e.Tag) { + datasetCtx.Elements = append(datasetCtx.Elements, e) + } +}