-
Notifications
You must be signed in to change notification settings - Fork 0
/
csv.go
116 lines (108 loc) · 3.21 KB
/
csv.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
package csv
import (
"encoding/csv"
"fmt"
"reflect"
)
// Reader is a structured data reader from CSV.
type Reader[T any] struct {
rd *csv.Reader // Underlying CSV reader
fieldIndex map[int]int // Converts record field index to struct field index
parsedHeader bool
}
// NewReader creates a new structured data reader from an underlying
// raw CSV record reader. It returns error if the generic type T is
// not a valid type to stored the parsed data.
func NewReader[T any](r *csv.Reader) (*Reader[T], error) {
csvReader := &Reader[T]{rd: r}
if err := csvReader.validateFields(); err != nil {
return nil, err
}
return csvReader, nil
}
var (
errNotPointer = fmt.Errorf("fields should be a pointer")
errNotStructPointer = fmt.Errorf("fields should be a pointer to a struct")
errFieldNotAssignable = fmt.Errorf("field is not assignable")
)
// validateFieldsType checks that the generic type T can be used to store
// record field values of a CSV file. T should be a pointer to a struct.
// All tagged fields should be string.
func (r *Reader[T]) validateFields() error {
var rowPtr T
rowPtrType := reflect.TypeOf(rowPtr) // reflect.Value of rowPtr
if rowPtrType.Kind() != reflect.Pointer {
return errNotPointer
}
rowStruct := rowPtrType.Elem()
if rowStruct.Kind() != reflect.Struct {
return errNotStructPointer
}
for i := 0; i < rowStruct.NumField(); i++ {
f := rowStruct.Field(i)
tag := ParseTag(f.Tag.Get("csv"))
if tag.FieldHeader != "" {
if rowStruct.FieldByIndex([]int{i}).Type.Kind() != reflect.String {
return fmt.Errorf("invalid field %s: %w", rowStruct.Field(i).Name, errFieldNotAssignable)
}
}
}
return nil
}
// parseHeader parses the header row of the CSV and prepares to store
// record fields to variables of type T.
func (r *Reader[T]) parseHeader(header []string, rowPtr T) error {
headerToIndex := make(map[string]int)
for i, field := range header {
headerToIndex[field] = i
}
rowStruct := reflect.Indirect(reflect.ValueOf(rowPtr))
for i := 0; i < rowStruct.NumField(); i++ {
f := rowStruct.Type().Field(i)
tag := ParseTag(f.Tag.Get("csv"))
if r.fieldIndex == nil {
r.fieldIndex = make(map[int]int)
}
if _, exists := headerToIndex[tag.FieldHeader]; !exists {
// Tag specifies a field that isn't in the header, all
// records will use zero value for that struct field.
continue
}
r.fieldIndex[headerToIndex[tag.FieldHeader]] = i
}
return nil
}
// assignFields takes a record and assigns to rowPtr struct.
func (r *Reader[T]) assignFields(record []string, rowPtr T) error {
for i, field := range record {
sfIndex, exists := r.fieldIndex[i]
if !exists {
continue
}
rowStruct := reflect.Indirect(reflect.ValueOf(rowPtr))
rowStruct.FieldByIndex([]int{sfIndex}).SetString(field)
}
return nil
}
// Read reads one record as rowPtr.
// It returns io.EOF if there's no more record to read.
func (r *Reader[T]) Read(rowPtr T) error {
if !r.parsedHeader {
rcd, err := r.rd.Read()
if err != nil {
return err
}
if err := r.parseHeader(rcd, rowPtr); err != nil {
return err
}
r.parsedHeader = true
}
rcd, err := r.rd.Read()
if err != nil {
return err
}
if err := r.assignFields(rcd, rowPtr); err != nil {
return err
}
return nil
}