# PySpark - Create Data Frame from API

In [1]:
# Create Spark Session

from pyspark.sql import SparkSession

spark = SparkSession \
    .builder \
    .appName("Read data from API") \
    .master("local[*]") \
    .getOrCreate()

spark

In [78]:
# Create Python function to read data from API
import requests, json

def read_api(url: str):
    normalized_data = dict()
    data = requests.get(api_url).json() 
    normalized_data["_data"] = data # Normalize payload to handle array situtations
    return json.dumps(normalized_data)

In [80]:
api_url = r"https://api.coindesk.com/v1/bpi/currentprice.json"
# api_url = "https://api.wazirx.com/sapi/v1/tickers/24hr"

# Read data into Data Frame
# Create payload rdd
payload = json.loads(read_api(api_url))
payload_rdd = spark.sparkContext.parallelize([payload])

# Read from JSON
df = spark.read.json(payload_rdd)
df.select("_data").printSchema()

root
 |-- _data: struct (nullable = true)
 |    |-- bpi: struct (nullable = true)
 |    |    |-- EUR: struct (nullable = true)
 |    |    |    |-- code: string (nullable = true)
 |    |    |    |-- description: string (nullable = true)
 |    |    |    |-- rate: string (nullable = true)
 |    |    |    |-- rate_float: double (nullable = true)
 |    |    |    |-- symbol: string (nullable = true)
 |    |    |-- GBP: struct (nullable = true)
 |    |    |    |-- code: string (nullable = true)
 |    |    |    |-- description: string (nullable = true)
 |    |    |    |-- rate: string (nullable = true)
 |    |    |    |-- rate_float: double (nullable = true)
 |    |    |    |-- symbol: string (nullable = true)
 |    |    |-- USD: struct (nullable = true)
 |    |    |    |-- code: string (nullable = true)
 |    |    |    |-- description: string (nullable = true)
 |    |    |    |-- rate: string (nullable = true)
 |    |    |    |-- rate_float: double (nullable = true)
 |    |    |    |-- symbol

In [81]:
# Expand root element to read Struct Data
df.select("_data.*").show(truncate=False)

+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------+
|bpi                                                                                                                                                                        |chartName|disclaimer                                                                                                                                                 |time                                                                           |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------

In [82]:
# Expand further elements to read USD data
df.select("_data.*").select("bpi.*").select("USD.*").show(truncate=False)

+----+--------------------+-----------+----------+------+
|code|description         |rate       |rate_float|symbol|
+----+--------------------+-----------+----------+------+
|USD |United States Dollar|20,239.6509|20239.6509|&#36; |
+----+--------------------+-----------+----------+------+

