/
getHTTPData_filebased_final-cdap-data-pipeline.json
146 lines (146 loc) · 6.53 KB
/
getHTTPData_filebased_final-cdap-data-pipeline.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
{
"name": "getHTTPData_filebased_final",
"description": "Data Pipeline Application",
"artifact": {
"name": "cdap-data-pipeline",
"version": "6.5.1",
"scope": "SYSTEM"
},
"config": {
"resources": {
"memoryMB": 2048,
"virtualCores": 1
},
"driverResources": {
"memoryMB": 2048,
"virtualCores": 1
},
"connections": [
{
"from": "HTTP",
"to": "BigQuery"
},
{
"from": "BigQuery",
"to": "FileDelete"
}
],
"comments": [],
"postActions": [],
"properties": {},
"processTimingEnabled": true,
"stageLoggingEnabled": false,
"stages": [
{
"name": "HTTP",
"plugin": {
"name": "HTTP",
"type": "batchsource",
"label": "HTTP",
"artifact": {
"name": "http-plugins",
"version": "1.2.2",
"scope": "USER"
},
"properties": {
"referenceName": "GetF1",
"url": "${start_url}",
"httpMethod": "GET",
"format": "json",
"oauth2Enabled": "false",
"httpErrorsHandling": "2..:Success,.*:Fail",
"errorHandling": "stopOnError",
"retryPolicy": "exponential",
"linearRetryInterval": "30",
"maxRetryDuration": "600",
"connectTimeout": "120",
"readTimeout": "120",
"paginationType": "Custom",
"verifyHttps": "false",
"keystoreType": "Java KeyStore (JKS)",
"keystoreKeyAlgorithm": "SunX509",
"trustStoreType": "Java KeyStore (JKS)",
"trustStoreKeyAlgorithm": "SunX509",
"transportProtocols": "TLSv1.2",
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"from\",\"type\":\"string\"},{\"name\":\"to\",\"type\":\"string\"},{\"name\":\"exchange\",\"type\":\"string\"},{\"name\":\"price\",\"type\":\"string\"}]}",
"resultPath": "/tickers",
"fieldsMapping": "from:/from,to:/to,exchange:/exchange,price:/price",
"customPaginationCode": "import json\n\ndef get_next_page_url(url, page, headers):\n\n url_file = open('/tmp/url_output/part-r-00000', 'r')\n urls = [u.strip() for u in url_file.readlines()]\n if url in urls:\n position_index=urls.index(url)\n if len(urls)==position_index+1:\n return None\n else:\n next_url = urls[position_index+1]\n return next_url\n else:\n next_url = urls[0]\n return next_url\n"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"from\",\"type\":\"string\"},{\"name\":\"to\",\"type\":\"string\"},{\"name\":\"exchange\",\"type\":\"string\"},{\"name\":\"price\",\"type\":\"string\"}]}"
}
],
"id": "HTTP"
},
{
"name": "BigQuery",
"plugin": {
"name": "BigQueryTable",
"type": "batchsink",
"label": "BigQuery",
"artifact": {
"name": "google-cloud",
"version": "0.18.1",
"scope": "SYSTEM"
},
"properties": {
"referenceName": "BQ",
"project": "auto-detect",
"dataset": "demo_data",
"table": "f123_table",
"serviceAccountType": "filePath",
"serviceFilePath": "auto-detect",
"operation": "insert",
"truncateTable": "true",
"allowSchemaRelaxation": "false",
"location": "US",
"createPartitionedTable": "false",
"partitioningType": "NONE",
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"from\",\"type\":\"string\"},{\"name\":\"to\",\"type\":\"string\"},{\"name\":\"exchange\",\"type\":\"string\"},{\"name\":\"price\",\"type\":\"string\"}]}"
}
},
"outputSchema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"from\",\"type\":\"string\"},{\"name\":\"to\",\"type\":\"string\"},{\"name\":\"exchange\",\"type\":\"string\"},{\"name\":\"price\",\"type\":\"string\"}]}",
"inputSchema": [
{
"name": "HTTP",
"schema": "{\"type\":\"record\",\"name\":\"etlSchemaBody\",\"fields\":[{\"name\":\"from\",\"type\":\"string\"},{\"name\":\"to\",\"type\":\"string\"},{\"name\":\"exchange\",\"type\":\"string\"},{\"name\":\"price\",\"type\":\"string\"}]}"
}
],
"id": "BigQuery"
},
{
"name": "FileDelete",
"plugin": {
"name": "FileDelete",
"type": "action",
"label": "FileDelete",
"artifact": {
"name": "core-plugins",
"version": "2.7.1",
"scope": "SYSTEM"
},
"properties": {
"path": "/tmp/url_output/part-r-00000",
"continueOnError": "false"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
"schema": ""
}
],
"id": "FileDelete"
}
],
"schedule": "0 1 */1 * *",
"engine": "spark",
"numOfRecordsPreview": 100,
"description": "Data Pipeline Application",
"maxConcurrentRuns": 1
}
}