Reading in credentials 

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import *
from pyspark.sql import SparkSession
import urllib

In [0]:
# Define the path to the Delta table
delta_table_path = "dbfs:/user/hive/warehouse/authentication_credentials"

# Read the Delta table to a Spark DataFrame
aws_keys_df = spark.read.format("delta").load(delta_table_path)

In [0]:
# Get the AWS access key and secret key from the spark dataframe
ACCESS_KEY = aws_keys_df.select('Access key ID').collect()[0]['Access key ID']
SECRET_KEY = aws_keys_df.select('Secret access key').collect()[0]['Secret access key']
# Encode the secrete key
ENCODED_SECRET_KEY = urllib.parse.quote(string=SECRET_KEY, safe="")

In [0]:
%sql
-- Disable format checks during the reading of Delta tables
SET spark.databricks.delta.formatCheck.enabled=false

key,value
spark.databricks.delta.formatCheck.enabled,False


Getting the pin stream from kinesis, cleaning it, then writing it to a Delta Table

In [0]:
df_pin_raw = spark \
.readStream \
.format('kinesis') \
.option('streamName','streaming-0ebb0073c95b-pin') \
.option('initialPosition','earliest') \
.option('region','us-east-1') \
.option('awsAccessKey', ACCESS_KEY) \
.option('awsSecretKey', SECRET_KEY) \
.load()

(display(df_pin))

partitionKey,data,stream,shardId,sequenceNumber,approximateArrivalTimestamp
471,eyJpbmRleCI6MTQ5LCJ1bmlxdWVfaWQiOiIzNTE5NDQzMi0zNWY5LTQ2ZmItYTU1Ny0yN2MxNjBjNjIwODgiLCJ0aXRsZSI6IkZhbGwgQXJ0IFByb2plY3RzLUZhbGwgQ3JhZnRzIGZyb20gTmF0dXJlIC0gVGhlIEtpdGNoZW4= (truncated),streaming-0ebb0073c95b-pin,shardId-000000000002,49657351103706729435675262446134520444477546845934977058,2024-11-06T08:19:47.019+0000
471,eyJpbmRleCI6MTQ5LCJ1bmlxdWVfaWQiOiIzNTE5NDQzMi0zNWY5LTQ2ZmItYTU1Ny0yN2MxNjBjNjIwODgiLCJ0aXRsZSI6IkZhbGwgQXJ0IFByb2plY3RzLUZhbGwgQ3JhZnRzIGZyb20gTmF0dXJlIC0gVGhlIEtpdGNoZW4= (truncated),streaming-0ebb0073c95b-pin,shardId-000000000002,49657351103706729435675262686055520753557630540440404002,2024-11-06T08:21:29.284+0000
471,eyJpbmRleCI6MTQ5LCJ1bmlxdWVfaWQiOiIzNTE5NDQzMi0zNWY5LTQ2ZmItYTU1Ny0yN2MxNjBjNjIwODgiLCJ0aXRsZSI6IkZhbGwgQXJ0IFByb2plY3RzLUZhbGwgQ3JhZnRzIGZyb20gTmF0dXJlIC0gVGhlIEtpdGNoZW4= (truncated),streaming-0ebb0073c95b-pin,shardId-000000000002,49657351103706729435675262800612122494420279598799388706,2024-11-06T08:22:13.430+0000
471,eyJpbmRleCI6MTQ5LCJ1bmlxdWVfaWQiOiIzNTE5NDQzMi0zNWY5LTQ2ZmItYTU1Ny0yN2MxNjBjNjIwODgiLCJ0aXRsZSI6IkZhbGwgQXJ0IFByb2plY3RzLUZhbGwgQ3JhZnRzIGZyb20gTmF0dXJlIC0gVGhlIEtpdGNoZW4= (truncated),streaming-0ebb0073c95b-pin,shardId-000000000002,49657351103706729435675262895437845933352565690620444706,2024-11-06T08:22:55.510+0000
471,eyJpbmRleCI6MTQ5LCJ1bmlxdWVfaWQiOiIzNTE5NDQzMi0zNWY5LTQ2ZmItYTU1Ny0yN2MxNjBjNjIwODgiLCJ0aXRsZSI6IkZhbGwgQXJ0IFByb2plY3RzLUZhbGwgQ3JhZnRzIGZyb20gTmF0dXJlIC0gVGhlIEtpdGNoZW4= (truncated),streaming-0ebb0073c95b-pin,shardId-000000000002,49657351103706729435675263615985439717522711314679463970,2024-11-06T08:28:00.651+0000
471,eyJpbmRleCI6MTQ5LCJ1bmlxdWVfaWQiOiIzNTE5NDQzMi0zNWY5LTQ2ZmItYTU1Ny0yN2MxNjBjNjIwODgiLCJ0aXRsZSI6IkZhbGwgQXJ0IFByb2plY3RzLUZhbGwgQ3JhZnRzIGZyb20gTmF0dXJlIC0gVGhlIEtpdGNoZW4= (truncated),streaming-0ebb0073c95b-pin,shardId-000000000002,49657351103706729435675263636133397427220121415210500130,2024-11-06T08:28:06.015+0000
471,eyJpbmRleCI6MTQ5LCJ1bmlxdWVfaWQiOiIzNTE5NDQzMi0zNWY5LTQ2ZmItYTU1Ny0yN2MxNjBjNjIwODgiLCJ0aXRsZSI6IkZhbGwgQXJ0IFByb2plY3RzLUZhbGwgQ3JhZnRzIGZyb20gTmF0dXJlIC0gVGhlIEtpdGNoZW4= (truncated),streaming-0ebb0073c95b-pin,shardId-000000000002,49657351103706729435675263652102098578509758527501762594,2024-11-06T08:28:11.043+0000
471,eyJpbmRleCI6MTQ5LCJ1bmlxdWVfaWQiOiIzNTE5NDQzMi0zNWY5LTQ2ZmItYTU1Ny0yN2MxNjBjNjIwODgiLCJ0aXRsZSI6IkZhbGwgQXJ0IFByb2plY3RzLUZhbGwgQ3JhZnRzIGZyb20gTmF0dXJlIC0gVGhlIEtpdGNoZW4= (truncated),streaming-0ebb0073c95b-pin,shardId-000000000002,49657351103706729435675264070086991784448193147615313954,2024-11-06T08:31:06.674+0000
471,eyJpbmRleCI6MTQ5LCJ1bmlxdWVfaWQiOiIzNTE5NDQzMi0zNWY5LTQ2ZmItYTU1Ny0yN2MxNjBjNjIwODgiLCJ0aXRsZSI6IkZhbGwgQXJ0IFByb2plY3RzLUZhbGwgQ3JhZnRzIGZyb20gTmF0dXJlIC0gVGhlIEtpdGNoZW4= (truncated),streaming-0ebb0073c95b-pin,shardId-000000000002,49657351103706729435675264208583951531139344646033899554,2024-11-06T08:32:05.480+0000
471,eyJpbmRleCI6MTQ5LCJ1bmlxdWVfaWQiOiIzNTE5NDQzMi0zNWY5LTQ2ZmItYTU1Ny0yN2MxNjBjNjIwODgiLCJ0aXRsZSI6IkZhbGwgQXJ0IFByb2plY3RzLUZhbGwgQ3JhZnRzIGZyb20gTmF0dXJlIC0gVGhlIEtpdGNoZW4= (truncated),streaming-0ebb0073c95b-pin,shardId-000000000002,49657351103706729435675264299630572857956299594126065698,2024-11-06T08:32:41.184+0000


In [0]:
df_pin_string = df_pin_raw.selectExpr("CAST(data as STRING)")
display(df_pin_string)

data
"{""index"":149,""unique_id"":""35194432-35f9-46fb-a557-27c160c62088"",""title"":""Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom"",""description"":""These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!"",""poster_name"":""The Kitchen Table Classroom"",""follower_count"":""221k"",""tag_list"":""Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png"",""downloaded"":1,""save_location"":""Local save in /data/art"",""category"":""art""}"
"{""index"":149,""unique_id"":""35194432-35f9-46fb-a557-27c160c62088"",""title"":""Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom"",""description"":""These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!"",""poster_name"":""The Kitchen Table Classroom"",""follower_count"":""221k"",""tag_list"":""Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png"",""downloaded"":1,""save_location"":""Local save in /data/art"",""category"":""art""}"
"{""index"":149,""unique_id"":""35194432-35f9-46fb-a557-27c160c62088"",""title"":""Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom"",""description"":""These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!"",""poster_name"":""The Kitchen Table Classroom"",""follower_count"":""221k"",""tag_list"":""Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png"",""downloaded"":1,""save_location"":""Local save in /data/art"",""category"":""art""}"
"{""index"":149,""unique_id"":""35194432-35f9-46fb-a557-27c160c62088"",""title"":""Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom"",""description"":""These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!"",""poster_name"":""The Kitchen Table Classroom"",""follower_count"":""221k"",""tag_list"":""Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png"",""downloaded"":1,""save_location"":""Local save in /data/art"",""category"":""art""}"
"{""index"":149,""unique_id"":""35194432-35f9-46fb-a557-27c160c62088"",""title"":""Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom"",""description"":""These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!"",""poster_name"":""The Kitchen Table Classroom"",""follower_count"":""221k"",""tag_list"":""Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png"",""downloaded"":1,""save_location"":""Local save in /data/art"",""category"":""art""}"
"{""index"":149,""unique_id"":""35194432-35f9-46fb-a557-27c160c62088"",""title"":""Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom"",""description"":""These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!"",""poster_name"":""The Kitchen Table Classroom"",""follower_count"":""221k"",""tag_list"":""Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png"",""downloaded"":1,""save_location"":""Local save in /data/art"",""category"":""art""}"
"{""index"":149,""unique_id"":""35194432-35f9-46fb-a557-27c160c62088"",""title"":""Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom"",""description"":""These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!"",""poster_name"":""The Kitchen Table Classroom"",""follower_count"":""221k"",""tag_list"":""Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png"",""downloaded"":1,""save_location"":""Local save in /data/art"",""category"":""art""}"
"{""index"":149,""unique_id"":""35194432-35f9-46fb-a557-27c160c62088"",""title"":""Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom"",""description"":""These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!"",""poster_name"":""The Kitchen Table Classroom"",""follower_count"":""221k"",""tag_list"":""Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png"",""downloaded"":1,""save_location"":""Local save in /data/art"",""category"":""art""}"
"{""index"":149,""unique_id"":""35194432-35f9-46fb-a557-27c160c62088"",""title"":""Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom"",""description"":""These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!"",""poster_name"":""The Kitchen Table Classroom"",""follower_count"":""221k"",""tag_list"":""Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png"",""downloaded"":1,""save_location"":""Local save in /data/art"",""category"":""art""}"
"{""index"":149,""unique_id"":""35194432-35f9-46fb-a557-27c160c62088"",""title"":""Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom"",""description"":""These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!"",""poster_name"":""The Kitchen Table Classroom"",""follower_count"":""221k"",""tag_list"":""Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png"",""downloaded"":1,""save_location"":""Local save in /data/art"",""category"":""art""}"


In [0]:
# Define a custom schema
custom_schema = StructType([
    StructField("index", LongType(), True),
    StructField("unique_id", StringType(), True),
    StructField("title", StringType(), True),
    StructField("description", StringType(), True),
    StructField("follower_count", StringType(), True),
    StructField("tag_list", StringType(), True),
    StructField("is_image_or_video", StringType(), True),
    StructField("image_src", StringType(), True),
    StructField("downloaded", LongType(), True),
    StructField("save_location", StringType(), True),
    StructField("category", StringType(), True),
    StructField("poster_name", StringType(), True),
    # Add more fields as needed
])

# Parse the JSON data in the `data` column using the custom schema
df_pin_parsed = df_pin_string.withColumn("data_parsed", F.from_json(F.col("data"), custom_schema))

# Expand the parsed JSON fields into individual columns
df_pin = df_pin_parsed.select("data_parsed.*")  # Expands all JSON fields as columns

# Display the resulting DataFrame
display(df_pin)





index,unique_id,title,description,follower_count,tag_list,is_image_or_video,image_src,downloaded,save_location,category,poster_name
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221k,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,1,Local save in /data/art,art,The Kitchen Table Classroom
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221k,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,1,Local save in /data/art,art,The Kitchen Table Classroom
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221k,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,1,Local save in /data/art,art,The Kitchen Table Classroom
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221k,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,1,Local save in /data/art,art,The Kitchen Table Classroom
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221k,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,1,Local save in /data/art,art,The Kitchen Table Classroom
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221k,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,1,Local save in /data/art,art,The Kitchen Table Classroom
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221k,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,1,Local save in /data/art,art,The Kitchen Table Classroom
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221k,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,1,Local save in /data/art,art,The Kitchen Table Classroom
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221k,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,1,Local save in /data/art,art,The Kitchen Table Classroom
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221k,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,1,Local save in /data/art,art,The Kitchen Table Classroom


In [0]:
#Cleaning the pin data

# Replacing empty and null values
df_pin = df_pin.fillna("None")    
df_pin = df_pin.replace("", "None") 

# Removing the text from follower count and turning it into an int
# Use regexp_replace to remove letters
df_pin = df_pin.withColumn("follower_count", regexp_replace("follower_count", "[^0-9]", ""))

# Cast the column to integer type
df_pin = df_pin.withColumn("follower_count", col("follower_count").cast("int"))

# Clean the data in the save_location column to include only the save location path
df_pin = df_pin.withColumn("save_location", regexp_replace("save_location", "Local save in ", ""))   

#rename index column to ind
df_pin = df_pin.withColumnRenamed("index", "ind")

#reorder columns 
df_pin = df_pin.select("ind", "unique_id", "title", "description", "follower_count", "poster_name", "tag_list", "is_image_or_video", "image_src", "save_location", "category")

display(df_pin)

ind,unique_id,title,description,follower_count,poster_name,tag_list,is_image_or_video,image_src,save_location,category
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221,The Kitchen Table Classroom,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,/data/art,art
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221,The Kitchen Table Classroom,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,/data/art,art
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221,The Kitchen Table Classroom,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,/data/art,art
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221,The Kitchen Table Classroom,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,/data/art,art
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221,The Kitchen Table Classroom,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,/data/art,art
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221,The Kitchen Table Classroom,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,/data/art,art
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221,The Kitchen Table Classroom,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,/data/art,art
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221,The Kitchen Table Classroom,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,/data/art,art
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221,The Kitchen Table Classroom,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,/data/art,art
149,35194432-35f9-46fb-a557-27c160c62088,Fall Art Projects-Fall Crafts from Nature - The Kitchen Table Classroom,These fall art projects showcase the best the season has to offer. These simple fall crafts use simeple materials to take advantage of nature's beauty!,221,The Kitchen Table Classroom,"Fall Paper Crafts,Fall Crafts For Kids,Arts And Crafts,Art Crafts,Nature Crafts,Autumn Crafts,Summer Crafts,Easter Crafts,Paper Crafting",image,https://i.pinimg.com/originals/4f/a7/55/4fa75527fe7de2dac148e006f8f401fe.png,/data/art,art


In [0]:
display(df_pin.dtypes)

_1,_2
ind,bigint
unique_id,string
title,string
description,string
follower_count,int
poster_name,string
tag_list,string
is_image_or_video,string
image_src,string
save_location,string


In [0]:
#Writing the data to Delta Table

df_pin.writeStream \
  .format("delta") \
  .outputMode("append") \
  .option("checkpointLocation", "/tmp/kinesis/_checkpoints/") \
  .table("0ebb0073c95b_pin_table")

Getting the geo stream from kinesis, cleaning it, then writing it to a Delta Table

In [0]:
df_geo_raw = spark \
.readStream \
.format('kinesis') \
.option('streamName','streaming-0ebb0073c95b-geo') \
.option('initialPosition','earliest') \
.option('region','us-east-1') \
.option('awsAccessKey', ACCESS_KEY) \
.option('awsSecretKey', SECRET_KEY) \
.load()

display(df_geo)

data
"{""ind"":465,""timestamp"":""2018-10-22T03:15:31"",""latitude"":-17.5166,""longitude"":-70.4578,""country"":""Ghana""}"
"{""ind"":465,""timestamp"":""2018-10-22T03:15:31"",""latitude"":-17.5166,""longitude"":-70.4578,""country"":""Ghana""}"
"{""ind"":465,""timestamp"":""2018-10-22T03:15:31"",""latitude"":-17.5166,""longitude"":-70.4578,""country"":""Ghana""}"
"{""ind"":465,""timestamp"":""2018-10-22T03:15:31"",""latitude"":-17.5166,""longitude"":-70.4578,""country"":""Ghana""}"
"{""ind"":465,""timestamp"":""2018-10-22T03:15:31"",""latitude"":-17.5166,""longitude"":-70.4578,""country"":""Ghana""}"
"{""ind"":465,""timestamp"":""2018-10-22T03:15:31"",""latitude"":-17.5166,""longitude"":-70.4578,""country"":""Ghana""}"
"{""ind"":465,""timestamp"":""2018-10-22T03:15:31"",""latitude"":-17.5166,""longitude"":-70.4578,""country"":""Ghana""}"
"{""ind"":465,""timestamp"":""2018-10-22T03:15:31"",""latitude"":-17.5166,""longitude"":-70.4578,""country"":""Ghana""}"
"{""ind"":465,""timestamp"":""2018-10-22T03:15:31"",""latitude"":-17.5166,""longitude"":-70.4578,""country"":""Ghana""}"
"{""ind"":465,""timestamp"":""2018-10-22T03:15:31"",""latitude"":-17.5166,""longitude"":-70.4578,""country"":""Ghana""}"


In [0]:
#cast binary form to string 
df_geo_string = df_geo_raw.selectExpr("CAST(data as STRING)")

# Define a custom schema
custom_schema = StructType([
    StructField("country", StringType(), True),
    StructField("ind", LongType(), True),
    StructField("latitude", DoubleType(), True), 
    StructField("longitude", DoubleType(), True),
    StructField("timestamp", StringType(), True),
])

# Parse the JSON data in the `data` column using the custom schema
df_geo_parsed = df_geo_string.withColumn("data_parsed", F.from_json(F.col("data"), custom_schema))

# Expand the parsed JSON fields into individual columns
df_geo = df_geo_parsed.select("data_parsed.*")  # Expands all JSON fields as columns

# Display the resulting DataFrame
display(df_geo)



country,ind,latitude,longitude,timestamp
Ghana,465,-17.5166,-70.4578,2018-10-22T03:15:31
Ghana,465,-17.5166,-70.4578,2018-10-22T03:15:31
Ghana,465,-17.5166,-70.4578,2018-10-22T03:15:31
Ghana,465,-17.5166,-70.4578,2018-10-22T03:15:31
Ghana,465,-17.5166,-70.4578,2018-10-22T03:15:31
Ghana,465,-17.5166,-70.4578,2018-10-22T03:15:31
Ghana,465,-17.5166,-70.4578,2018-10-22T03:15:31
Ghana,465,-17.5166,-70.4578,2018-10-22T03:15:31
Ghana,465,-17.5166,-70.4578,2018-10-22T03:15:31
Ghana,465,-17.5166,-70.4578,2018-10-22T03:15:31


In [0]:
#Cleaning the geo dataframe 

# Create a new column 'coordinates' by combining the latitude and longitude columns 
df_geo = df_geo.withColumn("coordinates", array(col("latitude"), col("longitude")))

# Drop the latitude and longitude column as they're no longer needed 
df_geo = df_geo.drop("latitude", "longitude")

# Change 'timestamp' column to datatype timestamp
df_geo = df_geo.withColumn("timestamp", col("timestamp").cast("timestamp"))

# Reorder the columns
df_geo = df_geo.select("ind", "country", "coordinates", "timestamp")

display(df_geo.limit(10))

ind,country,coordinates,timestamp
465,Ghana,"List(-17.5166, -70.4578)",2018-10-22T03:15:31.000+0000
465,Ghana,"List(-17.5166, -70.4578)",2018-10-22T03:15:31.000+0000
465,Ghana,"List(-17.5166, -70.4578)",2018-10-22T03:15:31.000+0000
465,Ghana,"List(-17.5166, -70.4578)",2018-10-22T03:15:31.000+0000
465,Ghana,"List(-17.5166, -70.4578)",2018-10-22T03:15:31.000+0000
465,Ghana,"List(-17.5166, -70.4578)",2018-10-22T03:15:31.000+0000
465,Ghana,"List(-17.5166, -70.4578)",2018-10-22T03:15:31.000+0000
465,Ghana,"List(-17.5166, -70.4578)",2018-10-22T03:15:31.000+0000
465,Ghana,"List(-17.5166, -70.4578)",2018-10-22T03:15:31.000+0000
465,Ghana,"List(-17.5166, -70.4578)",2018-10-22T03:15:31.000+0000


In [0]:
df_geo.writeStream \
  .format("delta") \
  .outputMode("append") \
  .option("checkpointLocation", "/tmp/kinesis/_checkpoints/") \
  .table("0ebb0073c95b_geo_table")

Getting the user data stream from kinesis, cleaning it, then writing it to a Delta Table

In [0]:
df_user_raw = spark \
.readStream \
.format('kinesis') \
.option('streamName','streaming-0ebb0073c95b-user') \
.option('initialPosition','earliest') \
.option('region','us-east-1') \
.option('awsAccessKey', ACCESS_KEY) \
.option('awsSecretKey', SECRET_KEY) \
.load()

display(df_user_raw)

partitionKey,data,stream,shardId,sequenceNumber,approximateArrivalTimestamp
179,eyJpbmQiOjc4OSwiZmlyc3RfbmFtZSI6IkFkYW0iLCJsYXN0X25hbWUiOiJBY29zdGEiLCJhZ2UiOjIwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTAtMjFUMjE6MjY6NDUifQ==,streaming-0ebb0073c95b-user,shardId-000000000002,49657350016500799516901886668286799735057762559167823906,2024-11-06T08:19:48.929+0000
179,eyJpbmQiOjc4OSwiZmlyc3RfbmFtZSI6IkFkYW0iLCJsYXN0X25hbWUiOiJBY29zdGEiLCJhZ2UiOjIwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTAtMjFUMjE6MjY6NDUifQ==,streaming-0ebb0073c95b-user,shardId-000000000002,49657350016500799516901886821040621672464238938553319458,2024-11-06T08:21:31.118+0000
179,eyJpbmQiOjc4OSwiZmlyc3RfbmFtZSI6IkFkYW0iLCJsYXN0X25hbWUiOiJBY29zdGEiLCJhZ2UiOjIwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTAtMjFUMjE6MjY6NDUifQ==,streaming-0ebb0073c95b-user,shardId-000000000002,49657350016500799516901886887533959602908075829399388194,2024-11-06T08:22:15.255+0000
179,eyJpbmQiOjc4OSwiZmlyc3RfbmFtZSI6IkFkYW0iLCJsYXN0X25hbWUiOiJBY29zdGEiLCJhZ2UiOjIwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTAtMjFUMjE6MjY6NDUifQ==,streaming-0ebb0073c95b-user,shardId-000000000002,49657350016500799516901886952356562050644495132082044962,2024-11-06T08:22:57.319+0000
179,eyJpbmQiOjc4OSwiZmlyc3RfbmFtZSI6IkFkYW0iLCJsYXN0X25hbWUiOiJBY29zdGEiLCJhZ2UiOjIwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTAtMjFUMjE6MjY6NDUifQ==,streaming-0ebb0073c95b-user,shardId-000000000002,49657350016500799516901887409936193700601273346984771618,2024-11-06T08:28:02.449+0000
179,eyJpbmQiOjc4OSwiZmlyc3RfbmFtZSI6IkFkYW0iLCJsYXN0X25hbWUiOiJBY29zdGEiLCJhZ2UiOjIwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTAtMjFUMjE6MjY6NDUifQ==,streaming-0ebb0073c95b-user,shardId-000000000002,49657350016500799516901887417293716238775906847843942434,2024-11-06T08:28:07.805+0000
179,eyJpbmQiOjc4OSwiZmlyc3RfbmFtZSI6IkFkYW0iLCJsYXN0X25hbWUiOiJBY29zdGEiLCJhZ2UiOjIwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTAtMjFUMjE6MjY6NDUifQ==,streaming-0ebb0073c95b-user,shardId-000000000002,49657350016500799516901887424559360414659828600144920610,2024-11-06T08:28:12.809+0000
179,eyJpbmQiOjc4OSwiZmlyc3RfbmFtZSI6IkFkYW0iLCJsYXN0X25hbWUiOiJBY29zdGEiLCJhZ2UiOjIwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTAtMjFUMjE6MjY6NDUifQ==,streaming-0ebb0073c95b-user,shardId-000000000002,49657350016500799516901887710273261844922455888586866722,2024-11-06T08:31:08.456+0000
179,eyJpbmQiOjc4OSwiZmlyc3RfbmFtZSI6IkFkYW0iLCJsYXN0X25hbWUiOiJBY29zdGEiLCJhZ2UiOjIwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTAtMjFUMjE6MjY6NDUifQ==,streaming-0ebb0073c95b-user,shardId-000000000002,49657350016500799516901887800201626788595880361904308258,2024-11-06T08:32:07.256+0000
179,eyJpbmQiOjc4OSwiZmlyc3RfbmFtZSI6IkFkYW0iLCJsYXN0X25hbWUiOiJBY29zdGEiLCJhZ2UiOjIwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTAtMjFUMjE6MjY6NDUifQ==,streaming-0ebb0073c95b-user,shardId-000000000002,49657350016500799516901887855974210550697185112980717602,2024-11-06T08:32:42.921+0000


In [0]:
#cast binary form to string 
df_user_string = df_user_raw.selectExpr("CAST(data as STRING)")

# Define a custom schema
custom_schema = StructType([
    StructField("age", LongType(), True),
    StructField("date_joined", StringType(), True),
    StructField("first_name", StringType(), True), 
    StructField("ind", LongType(), True),
    StructField("last_name", StringType(), True), 
])

# Parse the JSON data in the `data` column using the custom schema
df_user_parsed = df_user_string.withColumn("data_parsed", F.from_json(F.col("data"), custom_schema))

# Expand the parsed JSON fields into individual columns
df_user = df_user_parsed.select("data_parsed.*")  # Expands all JSON fields as columns

# Display the resulting DataFrame
display(df_user)

age,date_joined,first_name,ind,last_name
20,2015-10-21T21:26:45,Adam,789,Acosta
20,2015-10-21T21:26:45,Adam,789,Acosta
20,2015-10-21T21:26:45,Adam,789,Acosta
20,2015-10-21T21:26:45,Adam,789,Acosta
20,2015-10-21T21:26:45,Adam,789,Acosta
20,2015-10-21T21:26:45,Adam,789,Acosta
20,2015-10-21T21:26:45,Adam,789,Acosta
20,2015-10-21T21:26:45,Adam,789,Acosta
20,2015-10-21T21:26:45,Adam,789,Acosta
20,2015-10-21T21:26:45,Adam,789,Acosta


In [0]:
#Cleaning the user data 

#Create a new column user_name that concatenates the information found in the first_name and last_name columns
df_user = df_user.withColumn("user_name", concat(col("first_name"), lit(" "), col("last_name"))) 

# Drop first and last name columns as we don't need them 
df_user = df_user.drop("first_name", "last_name")

# Change column 'date_joined' column to timestamp data format 
df_user = df_user.withColumn("date_joined", col("date_joined").cast("timestamp"))

# Rearrange the order of the columns 
df_user = df_user.select("ind", "user_name", "age", "date_joined")

display(df_user)

ind,user_name,age,date_joined
789,Adam Acosta,20,2015-10-21T21:26:45.000+0000
789,Adam Acosta,20,2015-10-21T21:26:45.000+0000
789,Adam Acosta,20,2015-10-21T21:26:45.000+0000
789,Adam Acosta,20,2015-10-21T21:26:45.000+0000
789,Adam Acosta,20,2015-10-21T21:26:45.000+0000
789,Adam Acosta,20,2015-10-21T21:26:45.000+0000
789,Adam Acosta,20,2015-10-21T21:26:45.000+0000
789,Adam Acosta,20,2015-10-21T21:26:45.000+0000
789,Adam Acosta,20,2015-10-21T21:26:45.000+0000
789,Adam Acosta,20,2015-10-21T21:26:45.000+0000


In [0]:

df_user.writeStream \
  .format("delta") \
  .outputMode("append") \
  .option("checkpointLocation", "/tmp/kinesis/_checkpoints/") \
  .table("0ebb0073c95b_user_table")


In [0]:
#Before running the writeStream function again, you will need to delete the checkpoint folder using the following command:

dbutils.fs.rm("/tmp/kinesis/_checkpoints/", True)