forked from googleapis/google-cloud-go
-
Notifications
You must be signed in to change notification settings - Fork 0
/
job.go
348 lines (289 loc) · 9.84 KB
/
job.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
// Copyright 2015 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bigquery
import (
"errors"
"math/rand"
"os"
"sync"
"time"
"cloud.google.com/go/internal"
gax "github.com/googleapis/gax-go"
"golang.org/x/net/context"
bq "google.golang.org/api/bigquery/v2"
)
// A Job represents an operation which has been submitted to BigQuery for processing.
type Job struct {
c *Client
projectID string
jobID string
isQuery bool
destinationTable *bq.TableReference // table to read query results from
}
// JobFromID creates a Job which refers to an existing BigQuery job. The job
// need not have been created by this package. For example, the job may have
// been created in the BigQuery console.
func (c *Client) JobFromID(ctx context.Context, id string) (*Job, error) {
job, err := c.service.getJob(ctx, c.projectID, id)
if err != nil {
return nil, err
}
job.c = c
return job, nil
}
// ID returns the job's ID.
func (j *Job) ID() string {
return j.jobID
}
// State is one of a sequence of states that a Job progresses through as it is processed.
type State int
const (
Pending State = iota
Running
Done
)
// JobStatus contains the current State of a job, and errors encountered while processing that job.
type JobStatus struct {
State State
err error
// All errors encountered during the running of the job.
// Not all Errors are fatal, so errors here do not necessarily mean that the job has completed or was unsuccessful.
Errors []*Error
// Statistics about the job.
Statistics *JobStatistics
}
// setJobRef initializes job's JobReference if given a non-empty jobID.
// projectID must be non-empty.
func setJobRef(job *bq.Job, jobID, projectID string) {
if jobID == "" {
// Generate an ID on the client so that insertJob can be idempotent.
jobID = randomJobID()
}
// We don't check whether projectID is empty; the server will return an
// error when it encounters the resulting JobReference.
job.JobReference = &bq.JobReference{
JobId: jobID,
ProjectId: projectID,
}
}
const alphanum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
var (
rngMu sync.Mutex
rng = rand.New(rand.NewSource(time.Now().UnixNano() ^ int64(os.Getpid())))
)
func randomJobID() string {
// As of August 2017, the BigQuery service uses 27 alphanumeric characters.
var b [27]byte
rngMu.Lock()
for i := 0; i < len(b); i++ {
b[i] = alphanum[rng.Intn(len(alphanum))]
}
rngMu.Unlock()
return string(b[:])
}
// Done reports whether the job has completed.
// After Done returns true, the Err method will return an error if the job completed unsuccesfully.
func (s *JobStatus) Done() bool {
return s.State == Done
}
// Err returns the error that caused the job to complete unsuccesfully (if any).
func (s *JobStatus) Err() error {
return s.err
}
// Status returns the current status of the job. It fails if the Status could not be determined.
func (j *Job) Status(ctx context.Context) (*JobStatus, error) {
js, err := j.c.service.jobStatus(ctx, j.projectID, j.jobID)
if err != nil {
return nil, err
}
// Fill in the client field of Tables in the statistics.
if js.Statistics != nil {
if qs, ok := js.Statistics.Details.(*QueryStatistics); ok {
for _, t := range qs.ReferencedTables {
t.c = j.c
}
}
}
return js, nil
}
// Cancel requests that a job be cancelled. This method returns without waiting for
// cancellation to take effect. To check whether the job has terminated, use Job.Status.
// Cancelled jobs may still incur costs.
func (j *Job) Cancel(ctx context.Context) error {
return j.c.service.jobCancel(ctx, j.projectID, j.jobID)
}
// Wait blocks until the job or the context is done. It returns the final status
// of the job.
// If an error occurs while retrieving the status, Wait returns that error. But
// Wait returns nil if the status was retrieved successfully, even if
// status.Err() != nil. So callers must check both errors. See the example.
func (j *Job) Wait(ctx context.Context) (*JobStatus, error) {
if j.isQuery {
// We can avoid polling for query jobs.
if _, err := j.c.service.waitForQuery(ctx, j.projectID, j.jobID); err != nil {
return nil, err
}
// Note: extra RPC even if you just want to wait for the query to finish.
js, err := j.Status(ctx)
if err != nil {
return nil, err
}
return js, nil
}
// Non-query jobs must poll.
var js *JobStatus
err := internal.Retry(ctx, gax.Backoff{}, func() (stop bool, err error) {
js, err = j.Status(ctx)
if err != nil {
return true, err
}
if js.Done() {
return true, nil
}
return false, nil
})
if err != nil {
return nil, err
}
return js, nil
}
// Read fetches the results of a query job.
// If j is not a query job, Read returns an error.
func (j *Job) Read(ctx context.Context) (*RowIterator, error) {
if !j.isQuery {
return nil, errors.New("bigquery: cannot read from a non-query job")
}
var projectID string
if j.destinationTable != nil {
projectID = j.destinationTable.ProjectId
} else {
projectID = j.c.projectID
}
schema, err := j.c.service.waitForQuery(ctx, projectID, j.jobID)
if err != nil {
return nil, err
}
// The destination table should only be nil if there was a query error.
if j.destinationTable == nil {
return nil, errors.New("bigquery: query job missing destination table")
}
return newRowIterator(ctx, j.c.service, &readTableConf{
projectID: j.destinationTable.ProjectId,
datasetID: j.destinationTable.DatasetId,
tableID: j.destinationTable.TableId,
schema: schema,
}), nil
}
// JobStatistics contains statistics about a job.
type JobStatistics struct {
CreationTime time.Time
StartTime time.Time
EndTime time.Time
TotalBytesProcessed int64
Details Statistics
}
// Statistics is one of ExtractStatistics, LoadStatistics or QueryStatistics.
type Statistics interface {
implementsStatistics()
}
// ExtractStatistics contains statistics about an extract job.
type ExtractStatistics struct {
// The number of files per destination URI or URI pattern specified in the
// extract configuration. These values will be in the same order as the
// URIs specified in the 'destinationUris' field.
DestinationURIFileCounts []int64
}
// LoadStatistics contains statistics about a load job.
type LoadStatistics struct {
// The number of bytes of source data in a load job.
InputFileBytes int64
// The number of source files in a load job.
InputFiles int64
// Size of the loaded data in bytes. Note that while a load job is in the
// running state, this value may change.
OutputBytes int64
// The number of rows imported in a load job. Note that while an import job is
// in the running state, this value may change.
OutputRows int64
}
// QueryStatistics contains statistics about a query job.
type QueryStatistics struct {
// Billing tier for the job.
BillingTier int64
// Whether the query result was fetched from the query cache.
CacheHit bool
// The type of query statement, if valid.
StatementType string
// Total bytes billed for the job.
TotalBytesBilled int64
// Total bytes processed for the job.
TotalBytesProcessed int64
// Describes execution plan for the query.
QueryPlan []*ExplainQueryStage
// The number of rows affected by a DML statement. Present only for DML
// statements INSERT, UPDATE or DELETE.
NumDMLAffectedRows int64
// ReferencedTables: [Output-only, Experimental] Referenced tables for
// the job. Queries that reference more than 50 tables will not have a
// complete list.
ReferencedTables []*Table
// The schema of the results. Present only for successful dry run of
// non-legacy SQL queries.
Schema Schema
// Standard SQL: list of undeclared query parameter names detected during a
// dry run validation.
UndeclaredQueryParameterNames []string
}
// ExplainQueryStage describes one stage of a query.
type ExplainQueryStage struct {
// Relative amount of the total time the average shard spent on CPU-bound tasks.
ComputeRatioAvg float64
// Relative amount of the total time the slowest shard spent on CPU-bound tasks.
ComputeRatioMax float64
// Unique ID for stage within plan.
ID int64
// Human-readable name for stage.
Name string
// Relative amount of the total time the average shard spent reading input.
ReadRatioAvg float64
// Relative amount of the total time the slowest shard spent reading input.
ReadRatioMax float64
// Number of records read into the stage.
RecordsRead int64
// Number of records written by the stage.
RecordsWritten int64
// Current status for the stage.
Status string
// List of operations within the stage in dependency order (approximately
// chronological).
Steps []*ExplainQueryStep
// Relative amount of the total time the average shard spent waiting to be scheduled.
WaitRatioAvg float64
// Relative amount of the total time the slowest shard spent waiting to be scheduled.
WaitRatioMax float64
// Relative amount of the total time the average shard spent on writing output.
WriteRatioAvg float64
// Relative amount of the total time the slowest shard spent on writing output.
WriteRatioMax float64
}
// ExplainQueryStep describes one step of a query stage.
type ExplainQueryStep struct {
// Machine-readable operation type.
Kind string
// Human-readable stage descriptions.
Substeps []string
}
func (*ExtractStatistics) implementsStatistics() {}
func (*LoadStatistics) implementsStatistics() {}
func (*QueryStatistics) implementsStatistics() {}