Skip to content

Commit 1cedc72

Browse files
committed
Debian control file parser
0 parents  commit 1cedc72

File tree

6 files changed

+1147
-0
lines changed

6 files changed

+1147
-0
lines changed

go.mod

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
module zombiezen.com/go/aptblob
2+
3+
go 1.15
4+
5+
require (
6+
github.com/google/go-cmp v0.4.1
7+
github.com/spf13/cobra v1.0.0
8+
gocloud.dev v0.20.0
9+
)

go.sum

Lines changed: 557 additions & 0 deletions
Large diffs are not rendered by default.

internal/apt/control.go

Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
package apt
2+
3+
import (
4+
"bufio"
5+
"bytes"
6+
"errors"
7+
"fmt"
8+
"io"
9+
"strings"
10+
)
11+
12+
// A Parser reads fields from a control file.
13+
// The syntax is documented at https://www.debian.org/doc/debian-policy/ch-controlfields.html#syntax-of-control-files
14+
type Parser struct {
15+
// Fields specifies the type of possible fields.
16+
Fields map[string]FieldType
17+
18+
scan *bufio.Scanner
19+
lineno int
20+
para Paragraph
21+
err error
22+
}
23+
24+
// NewParser returns a new parser that reads from r.
25+
func NewParser(r io.Reader) *Parser {
26+
p := &Parser{
27+
scan: bufio.NewScanner(r),
28+
lineno: 1,
29+
}
30+
// Split by paragraph.
31+
p.scan.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) {
32+
for advance < len(data) {
33+
start := advance
34+
var line []byte
35+
if i := bytes.IndexByte(data[advance:], '\n'); i != -1 {
36+
line = data[advance : advance+i]
37+
advance += i + 1
38+
} else if atEOF {
39+
line = data[advance:]
40+
advance = len(data)
41+
} else {
42+
// Not enough buffered for complete line.
43+
return 0, nil, nil
44+
}
45+
if isEmptyLine(line) {
46+
if token == nil {
47+
// Advance lineno for leading empty lines.
48+
p.lineno++
49+
}
50+
return
51+
}
52+
token = data[:start+len(line)]
53+
}
54+
if !atEOF {
55+
// Not enough buffered.
56+
return 0, nil, nil
57+
}
58+
return
59+
})
60+
return p
61+
}
62+
63+
// Single parses a single-paragraph control file, which will then be available
64+
// through the Paragraph method. It returns false if the method is called after
65+
// any call to Next, the parser stops before reading a paragraph, or the parser
66+
// encounters a syntax error.
67+
func (p *Parser) Single() bool {
68+
if p.err != nil {
69+
return false
70+
}
71+
if p.lineno != 1 {
72+
p.clear()
73+
p.err = errors.New("parse debian control file: Parser.Single called after Parser.Next")
74+
return false
75+
}
76+
if !p.Next() {
77+
return false
78+
}
79+
80+
// Check for trailing data.
81+
if p.scan.Scan() {
82+
p.clear()
83+
p.err = fmt.Errorf("parse debian control file: line %d: multiple paragraphs encountered", p.lineno)
84+
return false
85+
}
86+
if err := p.scan.Err(); err != nil {
87+
p.clear()
88+
p.err = fmt.Errorf("parse debian control file: line %d: %w", p.lineno, err)
89+
return false
90+
}
91+
return true
92+
}
93+
94+
// Next advances the parser to the next paragraph, which will then be available
95+
// through the Paragraph method. It returns false when the parser stops, either
96+
// by reaching the end of input or an error.
97+
func (p *Parser) Next() bool {
98+
if p.err != nil {
99+
return false
100+
}
101+
p.clear()
102+
if !p.scan.Scan() {
103+
if err := p.scan.Err(); err != nil {
104+
p.err = fmt.Errorf("parse debian control file: line %d: %w", p.lineno, err)
105+
}
106+
return false
107+
}
108+
text := p.scan.Text()
109+
for len(text) > 0 {
110+
valueEnd := len(text)
111+
if i := strings.IndexByte(text, '\n'); i != -1 {
112+
// Always i > 0, since paragraph separators are scanned out.
113+
valueEnd = i
114+
}
115+
if text[0] == '#' {
116+
p.clear()
117+
p.err = fmt.Errorf("parse debian control file: line %d: comments not allowed", p.lineno)
118+
return false
119+
}
120+
121+
// Parse field name.
122+
colon := strings.IndexByte(text[:valueEnd], ':')
123+
if colon == -1 {
124+
p.clear()
125+
p.err = fmt.Errorf("parse debian control file: line %d: missing colon", p.lineno)
126+
return false
127+
}
128+
field := Field{Name: text[:colon]}
129+
if err := validateFieldName(field.Name); err != nil {
130+
p.clear()
131+
p.err = fmt.Errorf("parse debian control file: line %d: %w", p.lineno, err)
132+
return false
133+
}
134+
if p.para.find(field.Name) != -1 {
135+
p.clear()
136+
p.err = fmt.Errorf("parse debian control file: line %d: multiple fields for %q", p.lineno, field.Name)
137+
return false
138+
}
139+
140+
// Locate end of field value, considering any continuation lines.
141+
startLine := p.lineno
142+
for valueEnd+1 < len(text) && strings.IndexByte(" \t#", text[valueEnd+1]) != -1 {
143+
p.lineno++
144+
if text[valueEnd+1] == '#' {
145+
p.clear()
146+
p.err = fmt.Errorf("parse debian control file: line %d: comments not allowed", p.lineno)
147+
return false
148+
}
149+
i := strings.IndexByte(text[valueEnd+1:], '\n')
150+
if i == -1 {
151+
valueEnd = len(text)
152+
} else {
153+
valueEnd += 1 + i
154+
}
155+
}
156+
switch p.Fields[field.Name] {
157+
case Simple:
158+
if p.lineno != startLine {
159+
p.clear()
160+
p.err = fmt.Errorf("parse debian control file: line %d: field %q must be a single line", startLine, field.Name)
161+
return false
162+
163+
}
164+
field.Value = strings.TrimSpace(text[colon+1 : valueEnd])
165+
case Folded:
166+
field.Value = strings.ReplaceAll(strings.TrimSpace(text[colon+1:valueEnd]), "\n", "")
167+
case Multiline:
168+
field.Value = text[colon+1 : valueEnd]
169+
default:
170+
panic("unknown field type")
171+
}
172+
if field.Value == "" {
173+
p.clear()
174+
p.err = fmt.Errorf("parse debian control file: line %d: empty field %q", startLine, field.Name)
175+
return false
176+
}
177+
178+
// Add field to paragraph and advance to following line.
179+
p.para = append(p.para, field)
180+
text = strings.TrimPrefix(text[valueEnd:], "\n")
181+
p.lineno++
182+
}
183+
return true
184+
}
185+
186+
func validateFieldName(name string) error {
187+
if name == "" {
188+
return errors.New("empty field name")
189+
}
190+
if name[0] == '-' {
191+
return fmt.Errorf("field name %q begins with hyphen", name)
192+
}
193+
for i := 0; i < len(name); i++ {
194+
c := name[i]
195+
if !('!' <= c && c <= '9' || ';' <= c && c <= '~') {
196+
return fmt.Errorf("field name %q has forbidden character %q", name, c)
197+
}
198+
}
199+
return nil
200+
}
201+
202+
// FieldType is an enumeration of the types of fields.
203+
type FieldType int
204+
205+
const (
206+
// Simple indicates a single-line field.
207+
Simple FieldType = iota
208+
// Multiline indicates a field that may contain multiple lines.
209+
Multiline
210+
// Folded indicates a field that may span multiple lines, but newlines are
211+
// stripped before being returned.
212+
Folded
213+
)
214+
215+
func (p *Parser) clear() {
216+
for i := range p.para {
217+
p.para[i] = Field{}
218+
}
219+
p.para = p.para[:0]
220+
}
221+
222+
func (p *Parser) Paragraph() Paragraph {
223+
return p.para[:len(p.para):len(p.para)]
224+
}
225+
226+
func (p *Parser) Err() error {
227+
return p.err
228+
}
229+
230+
// Field is a single field in a control file.
231+
type Field struct {
232+
Name string
233+
Value string
234+
}
235+
236+
// String formats the field as a line in a "Release" file.
237+
func (f Field) String() string {
238+
sb := new(strings.Builder)
239+
f.appendTo(sb)
240+
return sb.String()
241+
}
242+
243+
func (f Field) appendTo(sb *strings.Builder) {
244+
sb.WriteString(f.Name)
245+
sb.WriteString(": ")
246+
sb.WriteString(f.Value)
247+
}
248+
249+
// Paragraph is an ordered mapping of fields in a control file.
250+
type Paragraph []Field
251+
252+
func (m Paragraph) find(name string) int {
253+
for i, f := range m {
254+
if f.Name == name {
255+
return i
256+
}
257+
}
258+
return -1
259+
}
260+
261+
// Get returns the value of the field with the given name or the empty string
262+
// if the field is not present in the paragraph.
263+
func (para Paragraph) Get(name string) string {
264+
i := para.find(name)
265+
if i == -1 {
266+
return ""
267+
}
268+
return para[i].Value
269+
}
270+
271+
// Set sets the value of the named field, appending it to the paragraph if necessary.
272+
func (para *Paragraph) Set(name, value string) {
273+
i := para.find(name)
274+
if i == -1 {
275+
*para = append(*para, Field{name, value})
276+
return
277+
}
278+
(*para)[i].Value = value
279+
}
280+
281+
// String formats the fields as lines in a "Release" file.
282+
func (m Paragraph) String() string {
283+
sb := new(strings.Builder)
284+
m.appendTo(sb)
285+
return sb.String()
286+
}
287+
288+
func (m Paragraph) appendTo(sb *strings.Builder) {
289+
for i, f := range m {
290+
if i > 0 {
291+
sb.WriteByte('\n')
292+
}
293+
f.appendTo(sb)
294+
}
295+
}
296+
297+
func isEmptyLine(line []byte) bool {
298+
for _, b := range line {
299+
if b != ' ' && b != '\t' && b != '\n' {
300+
return false
301+
}
302+
}
303+
return true
304+
}

0 commit comments

Comments
 (0)