-
Notifications
You must be signed in to change notification settings - Fork 7
/
article.go
106 lines (101 loc) · 3.77 KB
/
article.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
package dblp
import (
"encoding/base64"
"encoding/xml"
"fmt"
"strings"
"time"
"github.com/miku/span"
"github.com/miku/span/formats/finc"
)
// Article was generated 2021-07-30 13:08:22 by tir on optiplex.
type Article struct {
XMLName xml.Name `xml:"article"`
Text string `xml:",chardata"`
Mdate string `xml:"mdate,attr"`
Key string `xml:"key,attr"`
Publtype string `xml:"publtype,attr"`
Cdate string `xml:"cdate,attr"`
Author []struct {
Text string `xml:",chardata"` // Paul Kocher, Daniel Genki...
Orcid string `xml:"orcid,attr"`
Aux string `xml:"aux,attr"`
} `xml:"author"`
Title struct {
Text string `xml:",chardata"` // Spectre Attacks: Exploiti...
Bibtex string `xml:"bibtex,attr"`
Tt []string `xml:"tt"` // PROP, IIPBF, MATLAB, Repi...
} `xml:"title"`
Journal string `xml:"journal"` // meltdownattack.com, meltd...
Year string `xml:"year"` // 2018, 2018, 1994, 1993, 1...
Ee []struct {
Text string `xml:",chardata"` // https://spectreattack.com...
Type string `xml:"type,attr"`
} `xml:"ee"`
Volume string `xml:"volume"` // TR-0263-08-94-165, TR-022...
Month string `xml:"month"` // August, March, December, ...
URL string `xml:"url"` // db/journals/gtelab/index....
Note []struct {
Text string `xml:",chardata"` // This report is also avail...
Type string `xml:"type,attr"`
} `xml:"note"`
Cdrom string `xml:"cdrom"` // SQL/X3H2-90-412.pdf, SQL/...
Publisher string `xml:"publisher"` // IBM Germany Science Cente...
Editor []struct {
Text string `xml:",chardata"` // Thomas Wetter, Rolf Engel...
Orcid string `xml:"orcid,attr"`
} `xml:"editor"`
Pages string `xml:"pages"` // 2947-2962, 1-27, 409-421,...
Number string `xml:"number"` // 4, 1, 5, 3, 1, 1, 4, 1, 3...
Cite []struct {
Text string `xml:",chardata"` // journals/nca/HuangCL14a, ...
Label string `xml:"label,attr"`
} `xml:"cite"`
Crossref string `xml:"crossref"` // journals/tcci/2011-3, jou...
Publnr string `xml:"publnr"` // TR11-015, 1806.06017, 200...
Booktitle string `xml:"booktitle"` // Logical Methods in Comput...
}
func (article *Article) ID() string {
return fmt.Sprintf("ai-210-%s", base64.RawURLEncoding.EncodeToString([]byte(article.Key)))
}
// ToIntermediateSchema converts DBLP article to intermediate schema.
func (article *Article) ToIntermediateSchema() (*finc.IntermediateSchema, error) {
var err error
output := finc.NewIntermediateSchema()
for _, item := range article.Author {
output.Authors = append(output.Authors, finc.Author{Name: item.Text})
}
output.Date, err = time.Parse("2006", article.Year)
if err != nil {
return nil, span.Skip{Reason: fmt.Sprintf("unparsable date: %s", article.Year)}
}
output.MegaCollections = []string{"DBLP", "sid-210-coll-dblp"}
output.SourceID = "210"
output.ID = article.ID()
output.RawDate = output.Date.Format("2006-01-02")
output.Volume = article.Volume
switch {
case strings.HasPrefix(article.URL, "http"):
output.ArticleTitle = article.Title.Text
output.URL = []string{article.URL}
case strings.HasPrefix(article.URL, "db/journals"):
output.ArticleTitle = article.Title.Text
output.URL = []string{fmt.Sprintf("https://dblp.org/%s", article.URL)}
case strings.HasPrefix(article.URL, "db/conf"):
output.ArticleTitle = article.Title.Text
output.URL = []string{fmt.Sprintf("https://dblp.org/%s", article.URL)}
case strings.HasPrefix(article.URL, "db/books"):
output.BookTitle = article.Title.Text
output.URL = []string{fmt.Sprintf("https://dblp.org/%s", article.URL)}
}
for _, extra := range article.Ee {
if strings.HasPrefix(extra.Text, "http") {
output.URL = append(output.URL, extra.Text)
}
}
if article.Publisher != "" {
output.Publishers = []string{article.Publisher}
}
output.Issue = article.Number
return output, nil
}