/
three-column.go
80 lines (66 loc) · 1.77 KB
/
three-column.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
package demographics
import (
"os"
"strings"
"time"
mapset "github.com/deckarep/golang-set"
"github.com/gocarina/gocsv"
log "github.com/sirupsen/logrus"
)
var _ CsvInput = new(ThreeColumnCsv)
type ThreeColumnCsv struct {
ages map[string]Age
sexes map[string]Sex
indvs []string
}
func NewThreeColumnCsv(f *os.File) *ThreeColumnCsv {
y := uint(time.Now().Year())
type entry struct {
ID string `csv:"ID"`
Sex string `csv:"Sex"`
BirthYear uint `csv:"BirthYear"`
}
entries := make([]*entry, 0, 100)
gocsv.FailIfUnmatchedStructTags = true
if err := gocsv.UnmarshalFile(f, &entries); err != nil {
log.Fatalf("Misread in CSV: %s, rename column to match names used here\n", err)
}
c := &ThreeColumnCsv{
ages: make(map[string]Age),
sexes: make(map[string]Sex),
}
ids := mapset.NewSet()
for _, e := range entries {
if ids.Contains(e.ID) {
log.Warnf("Demographics for ID %q duplicated, using: %+v\n", e.ID, e)
}
switch {
case strings.ToUpper(e.Sex) == "F", strings.ToUpper(e.Sex) == "FEMALE":
c.sexes[e.ID] = Female
case strings.ToUpper(e.Sex) == "M", strings.ToUpper(e.Sex) == "MALE":
c.sexes[e.ID] = Male
case strings.ToUpper(e.Sex) == "U", strings.ToUpper(e.Sex) == "UNKNOWN":
c.sexes[e.ID] = Unknown
default:
log.Warnf("Did not understand Sex in entry: %v; setting Sex to Unknown\n", e)
c.sexes[e.ID] = Unknown
}
c.ages[e.ID] = CalculateAge(y, e.BirthYear)
ids.Add(e.ID)
}
for _, indv := range ids.ToSlice() {
c.indvs = append(c.indvs, indv.(string))
}
return c
}
func (c *ThreeColumnCsv) Age(id string) (Age, bool) {
age, ok := c.ages[id]
return age, ok
}
func (c *ThreeColumnCsv) Sex(id string) (Sex, bool) {
sex, ok := c.sexes[id]
return sex, ok
}
func (c *ThreeColumnCsv) Indvs() []string {
return c.indvs
}