|
| 1 | +package apt |
| 2 | + |
| 3 | +import ( |
| 4 | + "bufio" |
| 5 | + "bytes" |
| 6 | + "errors" |
| 7 | + "fmt" |
| 8 | + "io" |
| 9 | + "strings" |
| 10 | +) |
| 11 | + |
| 12 | +// A Parser reads fields from a control file. |
| 13 | +// The syntax is documented at https://www.debian.org/doc/debian-policy/ch-controlfields.html#syntax-of-control-files |
| 14 | +type Parser struct { |
| 15 | + // Fields specifies the type of possible fields. |
| 16 | + Fields map[string]FieldType |
| 17 | + |
| 18 | + scan *bufio.Scanner |
| 19 | + lineno int |
| 20 | + para Paragraph |
| 21 | + err error |
| 22 | +} |
| 23 | + |
| 24 | +// NewParser returns a new parser that reads from r. |
| 25 | +func NewParser(r io.Reader) *Parser { |
| 26 | + p := &Parser{ |
| 27 | + scan: bufio.NewScanner(r), |
| 28 | + lineno: 1, |
| 29 | + } |
| 30 | + // Split by paragraph. |
| 31 | + p.scan.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) { |
| 32 | + for advance < len(data) { |
| 33 | + start := advance |
| 34 | + var line []byte |
| 35 | + if i := bytes.IndexByte(data[advance:], '\n'); i != -1 { |
| 36 | + line = data[advance : advance+i] |
| 37 | + advance += i + 1 |
| 38 | + } else if atEOF { |
| 39 | + line = data[advance:] |
| 40 | + advance = len(data) |
| 41 | + } else { |
| 42 | + // Not enough buffered for complete line. |
| 43 | + return 0, nil, nil |
| 44 | + } |
| 45 | + if isEmptyLine(line) { |
| 46 | + if token == nil { |
| 47 | + // Advance lineno for leading empty lines. |
| 48 | + p.lineno++ |
| 49 | + } |
| 50 | + return |
| 51 | + } |
| 52 | + token = data[:start+len(line)] |
| 53 | + } |
| 54 | + if !atEOF { |
| 55 | + // Not enough buffered. |
| 56 | + return 0, nil, nil |
| 57 | + } |
| 58 | + return |
| 59 | + }) |
| 60 | + return p |
| 61 | +} |
| 62 | + |
| 63 | +// Single parses a single-paragraph control file, which will then be available |
| 64 | +// through the Paragraph method. It returns false if the method is called after |
| 65 | +// any call to Next, the parser stops before reading a paragraph, or the parser |
| 66 | +// encounters a syntax error. |
| 67 | +func (p *Parser) Single() bool { |
| 68 | + if p.err != nil { |
| 69 | + return false |
| 70 | + } |
| 71 | + if p.lineno != 1 { |
| 72 | + p.clear() |
| 73 | + p.err = errors.New("parse debian control file: Parser.Single called after Parser.Next") |
| 74 | + return false |
| 75 | + } |
| 76 | + if !p.Next() { |
| 77 | + return false |
| 78 | + } |
| 79 | + |
| 80 | + // Check for trailing data. |
| 81 | + if p.scan.Scan() { |
| 82 | + p.clear() |
| 83 | + p.err = fmt.Errorf("parse debian control file: line %d: multiple paragraphs encountered", p.lineno) |
| 84 | + return false |
| 85 | + } |
| 86 | + if err := p.scan.Err(); err != nil { |
| 87 | + p.clear() |
| 88 | + p.err = fmt.Errorf("parse debian control file: line %d: %w", p.lineno, err) |
| 89 | + return false |
| 90 | + } |
| 91 | + return true |
| 92 | +} |
| 93 | + |
| 94 | +// Next advances the parser to the next paragraph, which will then be available |
| 95 | +// through the Paragraph method. It returns false when the parser stops, either |
| 96 | +// by reaching the end of input or an error. |
| 97 | +func (p *Parser) Next() bool { |
| 98 | + if p.err != nil { |
| 99 | + return false |
| 100 | + } |
| 101 | + p.clear() |
| 102 | + if !p.scan.Scan() { |
| 103 | + if err := p.scan.Err(); err != nil { |
| 104 | + p.err = fmt.Errorf("parse debian control file: line %d: %w", p.lineno, err) |
| 105 | + } |
| 106 | + return false |
| 107 | + } |
| 108 | + text := p.scan.Text() |
| 109 | + for len(text) > 0 { |
| 110 | + valueEnd := len(text) |
| 111 | + if i := strings.IndexByte(text, '\n'); i != -1 { |
| 112 | + // Always i > 0, since paragraph separators are scanned out. |
| 113 | + valueEnd = i |
| 114 | + } |
| 115 | + if text[0] == '#' { |
| 116 | + p.clear() |
| 117 | + p.err = fmt.Errorf("parse debian control file: line %d: comments not allowed", p.lineno) |
| 118 | + return false |
| 119 | + } |
| 120 | + |
| 121 | + // Parse field name. |
| 122 | + colon := strings.IndexByte(text[:valueEnd], ':') |
| 123 | + if colon == -1 { |
| 124 | + p.clear() |
| 125 | + p.err = fmt.Errorf("parse debian control file: line %d: missing colon", p.lineno) |
| 126 | + return false |
| 127 | + } |
| 128 | + field := Field{Name: text[:colon]} |
| 129 | + if err := validateFieldName(field.Name); err != nil { |
| 130 | + p.clear() |
| 131 | + p.err = fmt.Errorf("parse debian control file: line %d: %w", p.lineno, err) |
| 132 | + return false |
| 133 | + } |
| 134 | + if p.para.find(field.Name) != -1 { |
| 135 | + p.clear() |
| 136 | + p.err = fmt.Errorf("parse debian control file: line %d: multiple fields for %q", p.lineno, field.Name) |
| 137 | + return false |
| 138 | + } |
| 139 | + |
| 140 | + // Locate end of field value, considering any continuation lines. |
| 141 | + startLine := p.lineno |
| 142 | + for valueEnd+1 < len(text) && strings.IndexByte(" \t#", text[valueEnd+1]) != -1 { |
| 143 | + p.lineno++ |
| 144 | + if text[valueEnd+1] == '#' { |
| 145 | + p.clear() |
| 146 | + p.err = fmt.Errorf("parse debian control file: line %d: comments not allowed", p.lineno) |
| 147 | + return false |
| 148 | + } |
| 149 | + i := strings.IndexByte(text[valueEnd+1:], '\n') |
| 150 | + if i == -1 { |
| 151 | + valueEnd = len(text) |
| 152 | + } else { |
| 153 | + valueEnd += 1 + i |
| 154 | + } |
| 155 | + } |
| 156 | + switch p.Fields[field.Name] { |
| 157 | + case Simple: |
| 158 | + if p.lineno != startLine { |
| 159 | + p.clear() |
| 160 | + p.err = fmt.Errorf("parse debian control file: line %d: field %q must be a single line", startLine, field.Name) |
| 161 | + return false |
| 162 | + |
| 163 | + } |
| 164 | + field.Value = strings.TrimSpace(text[colon+1 : valueEnd]) |
| 165 | + case Folded: |
| 166 | + field.Value = strings.ReplaceAll(strings.TrimSpace(text[colon+1:valueEnd]), "\n", "") |
| 167 | + case Multiline: |
| 168 | + field.Value = text[colon+1 : valueEnd] |
| 169 | + default: |
| 170 | + panic("unknown field type") |
| 171 | + } |
| 172 | + if field.Value == "" { |
| 173 | + p.clear() |
| 174 | + p.err = fmt.Errorf("parse debian control file: line %d: empty field %q", startLine, field.Name) |
| 175 | + return false |
| 176 | + } |
| 177 | + |
| 178 | + // Add field to paragraph and advance to following line. |
| 179 | + p.para = append(p.para, field) |
| 180 | + text = strings.TrimPrefix(text[valueEnd:], "\n") |
| 181 | + p.lineno++ |
| 182 | + } |
| 183 | + return true |
| 184 | +} |
| 185 | + |
| 186 | +func validateFieldName(name string) error { |
| 187 | + if name == "" { |
| 188 | + return errors.New("empty field name") |
| 189 | + } |
| 190 | + if name[0] == '-' { |
| 191 | + return fmt.Errorf("field name %q begins with hyphen", name) |
| 192 | + } |
| 193 | + for i := 0; i < len(name); i++ { |
| 194 | + c := name[i] |
| 195 | + if !('!' <= c && c <= '9' || ';' <= c && c <= '~') { |
| 196 | + return fmt.Errorf("field name %q has forbidden character %q", name, c) |
| 197 | + } |
| 198 | + } |
| 199 | + return nil |
| 200 | +} |
| 201 | + |
| 202 | +// FieldType is an enumeration of the types of fields. |
| 203 | +type FieldType int |
| 204 | + |
| 205 | +const ( |
| 206 | + // Simple indicates a single-line field. |
| 207 | + Simple FieldType = iota |
| 208 | + // Multiline indicates a field that may contain multiple lines. |
| 209 | + Multiline |
| 210 | + // Folded indicates a field that may span multiple lines, but newlines are |
| 211 | + // stripped before being returned. |
| 212 | + Folded |
| 213 | +) |
| 214 | + |
| 215 | +func (p *Parser) clear() { |
| 216 | + for i := range p.para { |
| 217 | + p.para[i] = Field{} |
| 218 | + } |
| 219 | + p.para = p.para[:0] |
| 220 | +} |
| 221 | + |
| 222 | +func (p *Parser) Paragraph() Paragraph { |
| 223 | + return p.para[:len(p.para):len(p.para)] |
| 224 | +} |
| 225 | + |
| 226 | +func (p *Parser) Err() error { |
| 227 | + return p.err |
| 228 | +} |
| 229 | + |
| 230 | +// Field is a single field in a control file. |
| 231 | +type Field struct { |
| 232 | + Name string |
| 233 | + Value string |
| 234 | +} |
| 235 | + |
| 236 | +// String formats the field as a line in a "Release" file. |
| 237 | +func (f Field) String() string { |
| 238 | + sb := new(strings.Builder) |
| 239 | + f.appendTo(sb) |
| 240 | + return sb.String() |
| 241 | +} |
| 242 | + |
| 243 | +func (f Field) appendTo(sb *strings.Builder) { |
| 244 | + sb.WriteString(f.Name) |
| 245 | + sb.WriteString(": ") |
| 246 | + sb.WriteString(f.Value) |
| 247 | +} |
| 248 | + |
| 249 | +// Paragraph is an ordered mapping of fields in a control file. |
| 250 | +type Paragraph []Field |
| 251 | + |
| 252 | +func (m Paragraph) find(name string) int { |
| 253 | + for i, f := range m { |
| 254 | + if f.Name == name { |
| 255 | + return i |
| 256 | + } |
| 257 | + } |
| 258 | + return -1 |
| 259 | +} |
| 260 | + |
| 261 | +// Get returns the value of the field with the given name or the empty string |
| 262 | +// if the field is not present in the paragraph. |
| 263 | +func (para Paragraph) Get(name string) string { |
| 264 | + i := para.find(name) |
| 265 | + if i == -1 { |
| 266 | + return "" |
| 267 | + } |
| 268 | + return para[i].Value |
| 269 | +} |
| 270 | + |
| 271 | +// Set sets the value of the named field, appending it to the paragraph if necessary. |
| 272 | +func (para *Paragraph) Set(name, value string) { |
| 273 | + i := para.find(name) |
| 274 | + if i == -1 { |
| 275 | + *para = append(*para, Field{name, value}) |
| 276 | + return |
| 277 | + } |
| 278 | + (*para)[i].Value = value |
| 279 | +} |
| 280 | + |
| 281 | +// String formats the fields as lines in a "Release" file. |
| 282 | +func (m Paragraph) String() string { |
| 283 | + sb := new(strings.Builder) |
| 284 | + m.appendTo(sb) |
| 285 | + return sb.String() |
| 286 | +} |
| 287 | + |
| 288 | +func (m Paragraph) appendTo(sb *strings.Builder) { |
| 289 | + for i, f := range m { |
| 290 | + if i > 0 { |
| 291 | + sb.WriteByte('\n') |
| 292 | + } |
| 293 | + f.appendTo(sb) |
| 294 | + } |
| 295 | +} |
| 296 | + |
| 297 | +func isEmptyLine(line []byte) bool { |
| 298 | + for _, b := range line { |
| 299 | + if b != ' ' && b != '\t' && b != '\n' { |
| 300 | + return false |
| 301 | + } |
| 302 | + } |
| 303 | + return true |
| 304 | +} |
0 commit comments