Assume you're given two tables containing data about Facebook Pages and their respective likes (as in "Like a Facebook Page").

Write a query to return the IDs of the Facebook pages that have zero likes. The output should be sorted in ascending order based on the page IDs.

pages Table:

#Column Name	Type

page_id	integer

page_name	varchar

#pages Example Input:

page_id	page_name

20001	SQL Solutions

20045	Brain Exercises

20701	Tips for Data Analysts

#page_likes Table:

Column Name	Type

user_id	integer

page_id	integer

liked_date	datetime

#page_likes Example Input:

user_id	page_id	liked_date

111	20001	04/08/2022 00:00:00

121	20045	03/12/2022 00:00:00

156	20001	07/25/2022 00:00:00

#Example Output:

page_id

20701

The dataset you are querying against may have different input & output - this is just an example!

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, count

# Initialize Spark session
spark = SparkSession.builder.appName("FacebookPages").getOrCreate()

# Sample data
pages_data = [
    (20001, 'SQL Solutions'),
    (20045, 'Brain Exercises'),
    (20701, 'Tips for Data Analysts')
]

page_likes_data = [
    (111, 20001, '2022-04-08 00:00:00'),
    (121, 20045, '2022-03-12 00:00:00'),
    (156, 20001, '2022-07-25 00:00:00')
]

# Create DataFrames
pages_df = spark.createDataFrame(pages_data, ["page_id", "page_name"])
page_likes_df = spark.createDataFrame(page_likes_data, ["user_id", "page_id", "liked_date"])

# Perform left join between pages and page_likes
joined_df = pages_df.join(page_likes_df, on="page_id", how="left")

# Filter pages with zero likes
zero_likes_df = joined_df.groupBy("page_id").agg(count("user_id").alias("likes_count")).filter(col("likes_count") == 0)

# Sort the result by page_id in ascending order
result_df = zero_likes_df.orderBy("page_id")

# Show the results
result_df.display()


page_id,likes_count
20701,0


In [0]:
pages_df.createOrReplaceTempView('pages')
page_likes_df.createOrReplaceTempView('page_likes')

In [0]:
%sql
SELECT p.page_id
FROM pages p
LEFT JOIN page_likes pl ON p.page_id = pl.page_id
WHERE pl.page_id IS NULL
ORDER BY p.page_id ASC;


page_id
20701
