In [0]:
import requests
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType
from pyspark.sql import Row
from datetime import datetime
from pyspark.sql.functions import lit, current_date

# Define DBFS save location
db_name = "site_info"
table_name = "person_info"
output_path = f"/{db_name}/{table_name}"

#  Define schema
schema = StructType([
    StructField("id", IntegerType(), True),
    StructField("email", StringType(), True),
    StructField("first_name", StringType(), True),
    StructField("last_name", StringType(), True),
    StructField("avatar", StringType(), True),
    StructField("site_address", StringType(), True),
    StructField("load_date", DateType(), True)
])

# Fetch paginated data from API
page = 1
rows = []

while True:
    url = f"https://reqres.in/api/users?page={page}"
    response = requests.get(url)
    data = response.json()

    # Stop if no more data
    if not data.get("data"):
        break

    # Process each user row
    for item in data["data"]:
        site_address = item["email"].split("@")[-1]
        rows.append(Row(
            id=item["id"],
            email=item["email"],
            first_name=item["first_name"],
            last_name=item["last_name"],
            avatar=item["avatar"],
            site_address=site_address,
            load_date=datetime.today().date()
        ))

    page += 1

# Create DataFrame from collected rows
df = spark.createDataFrame(rows, schema=schema)

# Step 4: Save to DBFS in Delta format
df.write.format("delta") \
    .mode("overwrite") \
    .save(output_path)

# Register table in metastore
spark.sql(f"CREATE DATABASE IF NOT EXISTS {db_name}")
spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {db_name}.{table_name}
    USING DELTA
    LOCATION '{output_path}'
""")

print(f" Data written to `{db_name}.{table_name}` at {output_path}")

 Data written to `site_info.person_info` at /site_info/person_info


In [0]:
%sql
select * from site_info.person_info

id,email,first_name,last_name,avatar,site_address,load_date
7,michael.lawson@reqres.in,Michael,Lawson,https://reqres.in/img/faces/7-image.jpg,reqres.in,2025-04-19
10,byron.fields@reqres.in,Byron,Fields,https://reqres.in/img/faces/10-image.jpg,reqres.in,2025-04-19
11,george.edwards@reqres.in,George,Edwards,https://reqres.in/img/faces/11-image.jpg,reqres.in,2025-04-19
12,rachel.howell@reqres.in,Rachel,Howell,https://reqres.in/img/faces/12-image.jpg,reqres.in,2025-04-19
8,lindsay.ferguson@reqres.in,Lindsay,Ferguson,https://reqres.in/img/faces/8-image.jpg,reqres.in,2025-04-19
9,tobias.funke@reqres.in,Tobias,Funke,https://reqres.in/img/faces/9-image.jpg,reqres.in,2025-04-19
1,george.bluth@reqres.in,George,Bluth,https://reqres.in/img/faces/1-image.jpg,reqres.in,2025-04-19
5,charles.morris@reqres.in,Charles,Morris,https://reqres.in/img/faces/5-image.jpg,reqres.in,2025-04-19
6,tracey.ramos@reqres.in,Tracey,Ramos,https://reqres.in/img/faces/6-image.jpg,reqres.in,2025-04-19
2,janet.weaver@reqres.in,Janet,Weaver,https://reqres.in/img/faces/2-image.jpg,reqres.in,2025-04-19
