In [4]:
import requests, json
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("PySpark API Test").getOrCreate()

def read_api(url: str):
    normalized_data = dict()
    data = requests.get(api_url).json() 
    normalized_data["_data"] = data 
    return json.dumps(normalized_data)

In [5]:
 api_url = r"https://api.coindesk.com/v1/bpi/currentprice.json"

In [3]:
payload = json.loads(read_api(api_url))
payload_rdd = spark.sparkContext.parallelize([payload])

df = spark.read.json(payload_rdd)
df.select("_data").printSchema()

root
 |-- _data: struct (nullable = true)
 |    |-- bpi: struct (nullable = true)
 |    |    |-- EUR: struct (nullable = true)
 |    |    |    |-- code: string (nullable = true)
 |    |    |    |-- description: string (nullable = true)
 |    |    |    |-- rate: string (nullable = true)
 |    |    |    |-- rate_float: double (nullable = true)
 |    |    |    |-- symbol: string (nullable = true)
 |    |    |-- GBP: struct (nullable = true)
 |    |    |    |-- code: string (nullable = true)
 |    |    |    |-- description: string (nullable = true)
 |    |    |    |-- rate: string (nullable = true)
 |    |    |    |-- rate_float: double (nullable = true)
 |    |    |    |-- symbol: string (nullable = true)
 |    |    |-- USD: struct (nullable = true)
 |    |    |    |-- code: string (nullable = true)
 |    |    |    |-- description: string (nullable = true)
 |    |    |    |-- rate: string (nullable = true)
 |    |    |    |-- rate_float: double (nullable = true)
 |    |    |    |-- symbol

In [6]:
 # Expand root element to read Struct Data
 df.select("_data.*").show(truncate=False)

+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------+
|bpi                                                                                                                                                                      |chartName|disclaimer                                                                                                                                                 |time                                                                           |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [8]:
df.select("_data.*").select("bpi.*").select("USD.*").show(truncate=False)

+----+--------------------+----------+----------+------+
|code|description         |rate      |rate_float|symbol|
+----+--------------------+----------+----------+------+
|USD |United States Dollar|98,716.605|98716.6054|&#36; |
+----+--------------------+----------+----------+------+

