Permalink
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
212 lines (157 sloc) 9.6 KB

Rest API

Note

  • Experimental API, meaning there is no production use-case on this yet.

Create Hive Table pointing to Rest table

The following hive table points to a Rest API

create external table pcatalog.youtube
(payload string )
LOCATION '/tmp/youtube'
TBLPROPERTIES
(
  'gimel.restapi.baseURL' = 'https://www.googleapis.com/youtube'
  ,'gimel.restapi.apiVersion' = 'v3'
  ,'gimel.restapi.accessKey' = 'YOURKEY'
  ,'gimel.restapi.url.pattern' = 'gimel.restapi.pattern.subscriptions'
  ,'gimel.restapi.videoId' = 'F7C0xojv2fE'
  ,'gimel.restapi.channelId' = 'UCXe1qKfGweMKTnmRrMw9yOg'
  ,'gimel.restapi.pattern.subscriptions' = '{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/subscriptions/?channelId={gimel.restapi.channelId}&part=snippet%2CcontentDetails&key={gimel.restapi.accessKey}'
  ,'gimel.restapi.pattern.channelsById' = '{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/channels/?id={gimel.restapi.channelId}&part=snippet%2CcontentDetails%2Cstatistics&key={gimel.restapi.accessKey}'
  ,'gimel.restapi.pattern.channelsByUserName' = '{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/channels?key={gimel.restapi.accessKey}&forUsername={gimel.restapi.userName}&part=id'
  ,'gimel.restapi.pattern.commentsByChannelId' = '{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/comments/?parentId={gimel.restapi.channelId}&part=snippet&key={gimel.restapi.accessKey}'
  ,'gimel.restapi.pattern.commentThreads' = '{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/commentThreads/?videoId={gimel.restapi.videoId}&part=snippet%2Creplies&key={gimel.restapi.accessKey}'
)

Catalog Properties

Property Mandatory? Description Example Default
gimel.restapi.parse.payload N if set to true, the resulting dataframe will show the json payload parsed into fields true/false false
gimel.restapi.use.payload N if set to true, only payload column from dataframe will be use to write via Post/Put true/false false
gimel.restapi.url N URL will be used to directly read or write without any consideration given to other properties (except above) complete URL Empty

Common Imports in all Rest API Usages

import com.paypal.gimel._
import com.paypal.gimel.common.catalog.{DataSetProperties,Field}
val dataset = DataSet(spark);

Rest API Usage

// Setting catalog provider as user

spark.conf.set("gimel.catalog.provider" , "USER");
spark.conf.set("gimel.logging.level" , "CONSOLE");

// Properties, that can go into either Hive TBLPROPERTIES or as a Map programmatically

val baseDetailsYoutube = Map(
  "gimel.restapi.baseURL" -> "https://www.googleapis.com/youtube"
  , "gimel.restapi.apiVersion" -> "v3"
  , "gimel.restapi.accessKey" -> "YOURKEY"
  , "gimel.restapi.url.pattern" -> "gimel.restapi.pattern.subscriptions"
  , "gimel.restapi.videoId" -> "F7C0xojv2fE"
  , "gimel.restapi.channelId" -> "UCXe1qKfGweMKTnmRrMw9yOg"
  , "gimel.restapi.pattern.subscriptions" -> "{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/subscriptions/?channelId={gimel.restapi.channelId}&part=snippet%2CcontentDetails&key={gimel.restapi.accessKey}"
  , "gimel.restapi.pattern.channelsById" -> "{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/channels/?id={gimel.restapi.channelId}&part=snippet%2CcontentDetails%2Cstatistics&key={gimel.restapi.accessKey}"
  , "gimel.restapi.pattern.channelsByUserName" -> "{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/channels?key={gimel.restapi.accessKey}&forUsername={gimel.restapi.userName}&part=id"
  , "gimel.restapi.pattern.commentsByChannelId" -> "{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/comments/?parentId={gimel.restapi.channelId}&part=snippet&key={gimel.restapi.accessKey}"
  , "gimel.restapi.pattern.commentThreads" -> "{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/commentThreads/?videoId={gimel.restapi.videoId}&part=snippet%2Creplies&key={gimel.restapi.accessKey}"
)

// Constructing DataSetProperties object programmatically

val dataSetProperties = DataSetProperties("RESTAPI",Array(),Array(),baseDetailsYoutube)

// Setting dataSetProperties

val props = Map("youtube.dataSetProperties" ->dataSetProperties )

// Data API - Read

val urlData = dataset.read("youtube",  props)

// Without Parsing response PayLoad, a resulting DataFrame with just one column - "payload"

spark.conf.set("gimel.restapi.parse.payload" , "true");
val urlData = dataset.read("youtube",  props)
urlData.printSchema
root
 |-- etag: string (nullable = true)
 |-- items: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- contentDetails: struct (nullable = true)
 |    |    |    |-- activityType: string (nullable = true)
 |    |    |    |-- newItemCount: long (nullable = true)
 |    |    |    |-- totalItemCount: long (nullable = true)
 |    |    |-- etag: string (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- kind: string (nullable = true)
 |    |    |-- snippet: struct (nullable = true)
 |    |    |    |-- channelId: string (nullable = true)
 |    |    |    |-- description: string (nullable = true)
 |    |    |    |-- publishedAt: string (nullable = true)
 |    |    |    |-- resourceId: struct (nullable = true)
 |    |    |    |    |-- channelId: string (nullable = true)
 |    |    |    |    |-- kind: string (nullable = true)
 |    |    |    |-- thumbnails: struct (nullable = true)
 |    |    |    |    |-- default: struct (nullable = true)
 |    |    |    |    |    |-- url: string (nullable = true)
 |    |    |    |    |-- high: struct (nullable = true)
 |    |    |    |    |    |-- url: string (nullable = true)
 |    |    |    |    |-- medium: struct (nullable = true)
 |    |    |    |    |    |-- url: string (nullable = true)
 |    |    |    |-- title: string (nullable = true)
 |-- kind: string (nullable = true)
 |-- nextPageToken: string (nullable = true)
 |-- pageInfo: struct (nullable = true)
 |    |-- resultsPerPage: long (nullable = true)
 |    |-- totalResults: long (nullable = true)


// With Parsing Payload into DataFrame with fields.

spark.conf.set("gimel.restapi.parse.payload" , "false");
val urlData = dataset.read("youtube",  props)
urlData.printSchema
root
 |-- payload: string (nullable = true)



// Adding additional runtime props as example to showcase overriding options

spark.conf.set("gimel.restapi.url.pattern","gimel.restapi.pattern.channelsById")
spark.conf.set("gimel.restapi.channelId", "UCXe1qKfGweMKTnmRrMw9yOg")
spark.conf.set("gimel.restapi.parse.payload" , "false");
val urlData = dataset.read("youtube",  props)
urlData.collect.foreach(println)

// Override all properties and just set the complete-URL directly

spark.conf.set("gimel.restapi.url","https://www.googleapis.com/youtube/v3/activities/?maxResults=10&channelId=UC_x5XG1OV2P6uZZ5FSM9Ttw&part=snippet%2CcontentDetails&key=AIzaSyBeYqw8TdtDjwnoXQBfxyokhUmyyxGExY0")
val urlData = dataset.read("youtube",  props)
urlData.collect.foreach(println)

Rest API Usage GSQL


* GSQL

```scala
val ddl = """
|create external table pcatalog.youtube
|(payload string )
|LOCATION '/tmp/youtube'
|TBLPROPERTIES
|(
|  'gimel.restapi.baseURL' = 'https://www.googleapis.com/youtube'
|  ,'gimel.restapi.apiVersion' = 'v3'
|  ,'gimel.restapi.accessKey' = 'YOURKEY'
|  ,'gimel.restapi.url.pattern' = 'gimel.restapi.pattern.subscriptions'
|  ,'gimel.restapi.videoId' = 'F7C0xojv2fE'
|  ,'gimel.restapi.channelId' = 'UCXe1qKfGweMKTnmRrMw9yOg'
|  ,'gimel.restapi.pattern.subscriptions' = '{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/subscriptions/?channelId={gimel.restapi.channelId}&part=snippet%2CcontentDetails&key={gimel.restapi.accessKey}'
|  ,'gimel.restapi.pattern.channelsById' = '{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/channels/?id={gimel.restapi.channelId}&part=snippet%2CcontentDetails%2Cstatistics&key={gimel.restapi.accessKey}'
|  ,'gimel.restapi.pattern.channelsByUserName' = '{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/channels?key={gimel.restapi.accessKey}&forUsername={gimel.restapi.userName}&part=id'
|  ,'gimel.restapi.pattern.commentsByChannelId' = '{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/comments/?parentId={gimel.restapi.channelId}&part=snippet&key={gimel.restapi.accessKey}'
|  ,'gimel.restapi.pattern.commentThreads' = '{gimel.restapi.baseURL}/{gimel.restapi.apiVersion}/commentThreads/?videoId={gimel.restapi.videoId}&part=snippet%2Creplies&key={gimel.restapi.accessKey}'
|)
|"""

val gsql = com.paypal.gimel.sql.GimelQueryProcessor.executeBatch(_:String,spark)
gsql: String => org.apache.spark.sql.DataFrame = <function1>

// Create DDL
gsql(ddl)

// Set Catalog Provider Hive
gsql("set gimel.catalog.provider=HIVE")
gsql("select * from pcatalog.youtube")