In [0]:
from pyspark.sql.types import StructType, StructField, StringType, TimestampType

In [0]:
%run "/Workspace/Users/cronies-02gorilla@icloud.com/Pinterest Access"

key,value
spark.databricks.delta.formatCheck.enabled,False


In [0]:
stream_df = spark \
.readStream \
.format('kinesis') \
.option('streamName','streaming-0affea73130b-pin') \
.option('initialPosition','latest') \
.option('region','us-east-1') \
.option('awsAccessKey', ACCESS_KEY) \
.option('awsSecretKey', SECRET_KEY) \
.load()

In [0]:
# Define the schema to speed up processing
jsonSchema = StructType([ 
                         StructField("index", StringType(), True), 
                         StructField("unique_id", StringType(), True), 
                         StructField("title", StringType(), True), 
                         StructField("description", StringType(), True), 
                         StructField("poster_name", StringType(), True), 
                         StructField("follower_count", StringType(), True), 
                         StructField("tag_list", StringType(), True), 
                         StructField("is_image_or_video", StringType(), True), 
                         StructField("image_src", StringType(), True), 
                         StructField("downloaded", StringType(), True), 
                         StructField("save_location", StringType(), True), 
                         StructField("category", StringType(), True)
                         ])


In [0]:
string_df = stream_df.selectExpr("CAST(data as STRING)")

In [0]:
json_df = string_df.select(from_json(col("data"), jsonSchema))

In [0]:
pin_stream_df = json_df.withColumn("index", col("from_json(data)")["index"])\
.withColumn("unique_id", col("from_json(data)")["unique_id"])\
.withColumn("title", col("from_json(data)")["title"])\
.withColumn("description", col("from_json(data)")["description"])\
.withColumn("poster_name", col("from_json(data)")["poster_name"])\
.withColumn("follower_count", col("from_json(data)")["follower_count" ])\
.withColumn("tag_list", col("from_json(data)")["tag_list" ])\
.withColumn("is_image_or_video",col("from_json(data)")["is_image_or_video"])\
.withColumn("image_src", col("from_json(data)")["image_src"])\
.withColumn("downloaded", col("from_json(data)")["downloaded"])\
.withColumn("save_location", col("from_json(data)")["save_location"])\
.withColumn("category",col("from_json(data)")["category"])

In [0]:
# drop from_json(data) column
cleaned_df = pin_stream_df.drop("from_json(data)")

# replace empty values with none
cleaned_df = cleaned_df.replace({'No description available Story format': None}, subset=['description'])
cleaned_df = cleaned_df.replace({'User Info Error': None}, subset=['follower_count'])
cleaned_df = cleaned_df.replace({'Image src error.': None}, subset=['image_src'])
cleaned_df = cleaned_df.replace({'User Info Error': None}, subset=['poster_name'])
cleaned_df = cleaned_df.replace({'N,o, ,T,a,g,s, ,A,v,a,i,l,a,b,l,e': None}, subset=['tag_list'])
cleaned_df = cleaned_df.replace({'No Title Data Available': None}, subset=['title'])

# cast int data type to follower_count column
cleaned_df = cleaned_df.withColumn("follower_count", regexp_replace("follower_count", "k", "000"))
cleaned_df = cleaned_df.withColumn("follower_count", regexp_replace("follower_count", "M", "000000"))
cleaned_df = cleaned_df.withColumn("follower_count", cleaned_df["follower_count"].cast("int"))

# cast int data type to numeric data
cleaned_df = cleaned_df.withColumn("downloaded", cleaned_df["downloaded"].cast("int"))
cleaned_df = cleaned_df.withColumn("index", cleaned_df["index"].cast("int"))

# clean save_location column to only include directory path
cleaned_df = cleaned_df.withColumn("save_location", regexp_replace("save_location", "Local save in ", ""))

# rename index to ind
cleaned_df = cleaned_df.withColumnRenamed("index", "ind")

# drop duplicates
cleaned_df = cleaned_df.dropDuplicates()

# reorder columns
cleaned_df = cleaned_df.select("ind", "unique_id", "title", "description", "follower_count", "poster_name", "tag_list", "is_image_or_video", "image_src", "save_location", "category")

In [0]:
display(cleaned_df)

ind,unique_id,title,description,follower_count,poster_name,tag_list,is_image_or_video,image_src,save_location,category
7683,d01db019-c7c4-4888-8ee8-86ae3724438c,"Dear Me, F*ck You - A Raw and Real Open Letter to Myself",When was the last time you wrote yourself a letter and poured out every emotion deep inside of you. Here's my open letter to myself.,228000,Iva A. Ursano Amazing Me Movement,"Motivacional Quotes,Woman Quotes,Great Quotes,Inspirational Quotes,Sassy Quotes,Badass Quotes Women,Quotes For Me,Life Love Quotes,True To Yourself Quotes",image,https://i.pinimg.com/originals/c3/73/3a/c3733a5c87625096142d0bfbb0693a95.jpg,/data/quotes,quotes
809,9af9959d-6b6e-4646-a053-46161409ea84,"Serie: Tarot – Naipe de Ouros – Dinheiro, Materialidade, Realizações","O dinheiro e os bens materiais são reflexos da nossa postura interior e da nossa capacidade de conservar e promover avanços. O perigo dessa energia é o apego, a procrastinação,…",39,Nina Gooden,"Black Love Art,Black Girl Art,Art Girl,Art Beauté,Tarot,Black Art Painting,Black Art Pictures,Goddess Art,Black Goddess",image,https://i.pinimg.com/originals/98/c6/f5/98c6f5b879e9c454b7d972381214c812.jpg,/data/art,art
8477,438c3aa2-b40b-450c-af3e-ec098178d56d,48 Mother Daughter Quotes To Make You Laugh & Cry,"Whether you are searching for inspiration to pull you through or looking to hear some sweet sentiments, you will love these mother daughter quotes!",19000,Rookie Moms,"Citations Top,The Words,Love Quotes For Her,Quotes To Live By,Love Quotes For Kids,Quotes About Little Boys,Family Quotes And Sayings,Mom Sayings,Thoughts",image,https://i.pinimg.com/originals/34/8c/0e/348c0e258a227dbd86ec1e2f53cb3d9a.jpg,/data/quotes,quotes
4724,4b247514-3c7b-4dd5-a58e-29eb4246bbfa,25 Ways To Use Balloons in Your Wedding Décor,"Using balloons in your wedding décor is a fresh and fun way to celebrate. Here, we've rounded up fun ideas you'll love.",371000,BRIDES,"Corporate Event Design,Floating Lanterns,Balloon Arrangements,Love Balloon,Diy Backdrop,Elegant Wedding,Wedding Reception,Party Wedding,Gold Party",image,https://i.pinimg.com/originals/de/dc/38/dedc38e36d4a4ed0282d5a2e854af7ae.png,/data/event-planning,event-planning
1735,4a0a5961-2b56-4e28-a991-8b260a07ecdb,Pinecone Christmas Trees,"Pinecone Christmas Tree Craft - a festive DIY your kids will enjoy! Make adorable Christmas trees with pinecones, paint and colorful pom poms!",85000,Made to be a Momma,"Christmas Tree Crafts,Colorful Christmas Tree,Pinecone Crafts Kids,Pine Cone Christmas Tree,Preschool Christmas Crafts,Christmas Trees For Kids,Christmas Tree Painting,Christmas Activities,Painted Christmas Ornaments",image,https://i.pinimg.com/originals/a3/f4/ad/a3f4ade8016b2e1663e5fa341c17fd18.jpg,/data/christmas,christmas
1919,30b08615-11a5-415e-99c9-4bac9a3596fd,Modern Farmhouse Christmas Decor Inspiration,"Christmas is fast approaching and if you are anything like me, decorating your home for the holidays can be stressful and I love modern farmhouse Christmas decor. I look at Pint…",45000,Hudson Farmhouse,"Elegant Christmas Centerpieces,Decoration Christmas,Farmhouse Christmas Decor,Christmas Kitchen,Noel Christmas,Country Christmas,White Christmas,Christmas Ideas,Christmas Design",image,https://i.pinimg.com/originals/dd/11/69/dd1169d62a5d87b840bd77f722e419e8.jpg,/data/christmas,christmas
2633,894cb59e-b00c-4dad-89a1-9060ba46b287,Cozy Rustic Farmhouse Christmas Living Room,"Rustic Christmas Trees in Old Crocks for that Farmhouse, Country, Primitive Decorating Style.",30000,Rocky Hedge Farm - Simple Life | Simple Home,"Country Christmas Decorations,Christmas Porch,Farmhouse Christmas Decor,Winter Christmas,Outdoor Decorations,Christmas Cactus,Primitive Christmas Tree,Christmas 2019,Christmas Lights",image,https://i.pinimg.com/originals/65/ae/dd/65aeddeff07ce74a863d333e5463bb9e.jpg,/data/christmas,christmas
9333,4ba47c18-6725-44d2-9b02-cbb4f649429b,Top 79 Filler Tattoo Design Ideas - [2021 Inspiration Guide],Discover smaller sized ink inspiration with the top 79 best filler tattoos. Explore cool empty gap design ideas from traditional to contemporary,800000,Next Luxury,"Neotraditionelles Tattoo,Tatto Old,Body Art Tattoos,Small Tattoos,Sleeve Tattoos,Cool Tattoos,Traditional Heart Tattoos,Traditional Tattoo Filler,Traditional Tattoo Design",image,https://i.pinimg.com/originals/bc/71/39/bc71394b672391d792945a6627ad817b.jpg,/data/tattoos,tattoos
2529,01199c1e-3f4a-4958-9abb-ebc52ea2a5d3,55+ Christmas Gifts for Mom | What to get mom for Christmas | Giftideascorner,Holiday Activities List for the Month of December Leading up to Christmas,8000,Gift Ideas Corner,"25 Days Of Christmas,Noel Christmas,Christmas Vacation,Christmas Countdown,Winter Christmas,Christmas To Do List,Christmas Island,Christmas Ideas For Mom,Christmas Presents",image,https://i.pinimg.com/originals/ee/51/fa/ee51fa8237e93594e66af111f56304e5.jpg,/data/christmas,christmas
8552,0c996198-f6c7-478e-9bd9-491c5fa4362f,35 Amazingly Pretty Flower Tattoos That Are Perfect For The Spring & Summer,Want a new tattoo for the Spring or Summer? Look to these 35 best tattoo ideas to inspire you to get your own version of a pretty flower tattoo or nature tattoo. From watercolor…,942000,YourTango,"Dainty Tattoos,Unique Tattoos,Small Tattoos,Cool Tattoos,Floral Tattoos,Tatoos,Cute Hand Tattoos,Inspiring Tattoos,Elegant Tattoos",image,https://i.pinimg.com/originals/8b/d1/5d/8bd15d66d96308e157ea359ed7c135e5.jpg,/data/tattoos,tattoos


In [0]:
output_post_df = cleaned_df

In [0]:
pin_output = output_post_df.writeStream \
  .format("delta") \
  .queryName("0affea73130b_post_query") \
  .outputMode("append") \
  .option("checkpointLocation", "/tmp/kinesis/_checkpoints/0affea73130b_post_query") \
  .table("0affea73130b_pin_table")

In [0]:
# dbutils.fs.rm("/tmp/kinesis/_checkpoints/0affea73130b_post_query", True)