-
Notifications
You must be signed in to change notification settings - Fork 13
/
client.go
138 lines (120 loc) · 3.46 KB
/
client.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package metha
import (
"bytes"
"compress/gzip"
"encoding/xml"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"strings"
"time"
"github.com/sethgrid/pester"
)
const (
DefaultTimeout = 5 * time.Minute
DefaultMaxRetries = 8
)
var (
StdClient = Client{Doer: http.DefaultClient}
DefaultClient = Client{Doer: CreateDoer(DefaultTimeout, DefaultMaxRetries)}
// Example for broken XML: http://eprints.vu.edu.au/perl/oai2. Add more
// weird things to be cleaned before XML parsing here. Another faulty:
// http://digitalcommons.gardner-webb.edu/do/oai/?from=2016-02-29&metadataPr
// efix=oai_dc&until=2016-03-31&verb=ListRecords. Replace control chars
// outside XML char range.
ControlCharReplacer = strings.NewReplacer(
"\u0001", "", "\u0002", "", "\u0003", "",
"\u0004", "", "\u0005", "", "\u0006", "",
"\u0007", "", "\u0008", "", "\u0009", "",
"\u000B", "", "\u000C", "", "\u000E", "",
"\u000F", "", "\u0010", "", "\u0011", "",
"\u0012", "", "\u0013", "", "\u0014", "",
"\u0015", "", "\u0016", "", "\u0017", "",
"\u0018", "", "\u0019", "", "\u001A", "",
"\u001B", "", "\u001C", "", "\u001D", "",
"\u001E", "", "\u001F", "")
)
// CreateDoer will return http request clients with specific timeout and retry
// properties.
func CreateDoer(timeout time.Duration, retries int) Doer {
if timeout == 0 && retries == 0 {
return http.DefaultClient
}
c := pester.New()
c.Timeout = timeout
c.MaxRetries = retries
c.Backoff = pester.ExponentialBackoff
return c
}
// Create a client with timeout and retry properties.
func CreateClient(timeout time.Duration, retries int) Client {
return Client{Doer: CreateDoer(timeout, retries)}
}
// Doer is a minimal HTTP interface.
type Doer interface {
Do(*http.Request) (*http.Response, error)
}
// A client that can execute requests.
type Client struct {
Doer Doer
}
// Do is a shortcut for DefaultClient.Do.
func Do(r *Request) (*Response, error) {
return DefaultClient.Do(r)
}
// anyReadCloser detects compressed content and decompresses it on the fly.
func maybeCompressed(r io.Reader) (io.ReadCloser, error) {
buf, err := ioutil.ReadAll(r)
if err != nil {
return nil, err
}
if gr, err := gzip.NewReader(bytes.NewReader(buf)); err == nil {
log.Println("decompress-on-the-fly")
return gr, nil
}
return ioutil.NopCloser(bytes.NewReader(buf)), nil
}
// Do executes a single OAIRequest. ResumptionToken handling must happen in the
// caller. Only Identify and GetRecord requests will return a complete response.
func (c *Client) Do(r *Request) (*Response, error) {
link, err := r.URL()
if err != nil {
return nil, err
}
log.Println(link)
req, err := http.NewRequest("GET", link.String(), nil)
if err != nil {
return nil, err
}
resp, err := c.Doer.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode >= 400 {
return nil, fmt.Errorf("error: server returned %s for %s", http.StatusText(resp.StatusCode), link)
}
defer resp.Body.Close()
var reader io.ReadCloser = resp.Body
// detect compressed response
reader, err = maybeCompressed(reader)
if err != nil {
return nil, err
}
defer reader.Close()
if r.CleanBeforeDecode {
// remove some chars, that the XML decoder will complain about
b, err := ioutil.ReadAll(reader)
if err != nil {
return nil, err
}
reader = ioutil.NopCloser(strings.NewReader(ControlCharReplacer.Replace(string(b))))
}
dec := xml.NewDecoder(reader)
var response Response
if err := dec.Decode(&response); err != nil {
return nil, err
}
return &response, nil
}