# Ex-2060 Filtering nested

In [1]:
# Import necessary libraries
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, StructType

# Create Spark session
spark = SparkSession.builder.appName("LEGO_Sets").getOrCreate()

# Define schema for the nested properties
properties_schema = StructType([
    StructField("Description", StringType(), True),
    StructField("Min_Age", IntegerType(), True),
    StructField("Bricks_Count", IntegerType(), True),
    StructField("Models_Buildable", IntegerType(), True)
])

# Define schema for LEGO sets
lego_schema = StructType([
    StructField("SetNumber", IntegerType(), True),
    StructField("SetName", StringType(), True),
    StructField("Properties", properties_schema, True)
])

# Create fictional LEGO dataset with 20 sets
data = [
    (2001, "Dungeon Explorers", {"Description": "Build a dungeon with hidden traps!", "Min_Age": 8, "Bricks_Count": 520, "Models_Buildable": 5}),
    (2002, "Castle Defense", {"Description": "Defend the fortress with knights and archers.", "Min_Age": 7, "Bricks_Count": 480, "Models_Buildable": 4}),
    (2003, "Pirate Treasure Hunt", {"Description": "Sail the seas to find hidden treasure!", "Min_Age": 6, "Bricks_Count": 450, "Models_Buildable": 3}),
    (2004, "Intergalactic Battle", {"Description": "Space warriors fighting for control of the galaxy.", "Min_Age": 9, "Bricks_Count": 550, "Models_Buildable": 6}),
    (2005, "Mystic Quest", {"Description": "A magical world filled with enchanted castles.", "Min_Age": 8, "Bricks_Count": 510, "Models_Buildable": 5}),
    (2006, "Cyber Arena", {"Description": "Robotic warriors battle in a high-tech arena.", "Min_Age": 10, "Bricks_Count": 600, "Models_Buildable": 7}),
    (2007, "Wild West Showdown", {"Description": "Cowboys vs. Outlaws in the Wild West.", "Min_Age": 7, "Bricks_Count": 490, "Models_Buildable": 4}),
    (2008, "Underwater Kingdom", {"Description": "A deep-sea world full of mystery.", "Min_Age": 6, "Bricks_Count": 460, "Models_Buildable": 3}),
    (2009, "Tactical Warzone", {"Description": "Modern military tactics in a strategic battlefield.", "Min_Age": 10, "Bricks_Count": 620, "Models_Buildable": 8}),
    (2010, "Fantasy Realm", {"Description": "A kingdom ruled by dragons and sorcerers.", "Min_Age": 9, "Bricks_Count": 530, "Models_Buildable": 5}),
    (2011, "Secret Agent Mission", {"Description": "Spy gadgets and covert operations.", "Min_Age": 8, "Bricks_Count": 500, "Models_Buildable": 4}),
    (2012, "Jungle Expedition", {"Description": "Navigate through dense forests and lost temples.", "Min_Age": 7, "Bricks_Count": 470, "Models_Buildable": 3}),
    (2013, "Medieval Tournament", {"Description": "Compete in jousting and sword duels.", "Min_Age": 8, "Bricks_Count": 510, "Models_Buildable": 5}),
    (2014, "Steampunk City", {"Description": "A Victorian-era world powered by steam machines.", "Min_Age": 9, "Bricks_Count": 540, "Models_Buildable": 6}),
    (2015, "Space Colony", {"Description": "Build futuristic space cities on distant planets.", "Min_Age": 10, "Bricks_Count": 600, "Models_Buildable": 7}),
    (2016, "Zombie Apocalypse", {"Description": "Survive the undead invasion!", "Min_Age": 9, "Bricks_Count": 550, "Models_Buildable": 5}),
    (2017, "Gladiator Arena", {"Description": "Fight for glory in ancient Rome!", "Min_Age": 8, "Bricks_Count": 520, "Models_Buildable": 4}),
    (2018, "Arctic Exploration", {"Description": "Discover icy landscapes and frozen treasures.", "Min_Age": 7, "Bricks_Count": 480, "Models_Buildable": 3}),
    (2019, "Haunted Mansion", {"Description": "A spooky house filled with ghosts and secrets.", "Min_Age": 7, "Bricks_Count": 490, "Models_Buildable": 4}),
    (2020, "Cyberpunk Metropolis", {"Description": "Neon-lit skyscrapers and underground tech battles.", "Min_Age": 10, "Bricks_Count": 620, "Models_Buildable": 8})
]

# Create DataFrame
df = spark.createDataFrame(data, schema=lego_schema)

# Show DataFrame
df.show(truncate=False)

+---------+--------------------+-----------------------------------------------------------------+
|SetNumber|SetName             |Properties                                                       |
+---------+--------------------+-----------------------------------------------------------------+
|2001     |Dungeon Explorers   |{Build a dungeon with hidden traps!, 8, 520, 5}                  |
|2002     |Castle Defense      |{Defend the fortress with knights and archers., 7, 480, 4}       |
|2003     |Pirate Treasure Hunt|{Sail the seas to find hidden treasure!, 6, 450, 3}              |
|2004     |Intergalactic Battle|{Space warriors fighting for control of the galaxy., 9, 550, 6}  |
|2005     |Mystic Quest        |{A magical world filled with enchanted castles., 8, 510, 5}      |
|2006     |Cyber Arena         |{Robotic warriors battle in a high-tech arena., 10, 600, 7}      |
|2007     |Wild West Showdown  |{Cowboys vs. Outlaws in the Wild West., 7, 490, 4}               |
|2008     

In [2]:
df_filtered = df.filter((df.Properties.Min_Age >= 8) & (df.Properties.Min_Age <= 10))
df_filtered.show(truncate=False)

+---------+--------------------+-----------------------------------------------------------------+
|SetNumber|SetName             |Properties                                                       |
+---------+--------------------+-----------------------------------------------------------------+
|2001     |Dungeon Explorers   |{Build a dungeon with hidden traps!, 8, 520, 5}                  |
|2004     |Intergalactic Battle|{Space warriors fighting for control of the galaxy., 9, 550, 6}  |
|2005     |Mystic Quest        |{A magical world filled with enchanted castles., 8, 510, 5}      |
|2006     |Cyber Arena         |{Robotic warriors battle in a high-tech arena., 10, 600, 7}      |
|2009     |Tactical Warzone    |{Modern military tactics in a strategic battlefield., 10, 620, 8}|
|2010     |Fantasy Realm       |{A kingdom ruled by dragons and sorcerers., 9, 530, 5}           |
|2011     |Secret Agent Mission|{Spy gadgets and covert operations., 8, 500, 4}                  |
|2013     

In [3]:
df_large_sets = df.filter(df.Properties.Bricks_Count > 500)
df_large_sets.show(truncate=False)

+---------+--------------------+-----------------------------------------------------------------+
|SetNumber|SetName             |Properties                                                       |
+---------+--------------------+-----------------------------------------------------------------+
|2001     |Dungeon Explorers   |{Build a dungeon with hidden traps!, 8, 520, 5}                  |
|2004     |Intergalactic Battle|{Space warriors fighting for control of the galaxy., 9, 550, 6}  |
|2005     |Mystic Quest        |{A magical world filled with enchanted castles., 8, 510, 5}      |
|2006     |Cyber Arena         |{Robotic warriors battle in a high-tech arena., 10, 600, 7}      |
|2009     |Tactical Warzone    |{Modern military tactics in a strategic battlefield., 10, 620, 8}|
|2010     |Fantasy Realm       |{A kingdom ruled by dragons and sorcerers., 9, 530, 5}           |
|2013     |Medieval Tournament |{Compete in jousting and sword duels., 8, 510, 5}                |
|2014     

In [4]:
df_filtered_sets = df.filter((df.Properties.Bricks_Count > 500) & (df.Properties.Min_Age <= 8))
df_filtered_sets.show(truncate=False)

+---------+-------------------+-----------------------------------------------------------+
|SetNumber|SetName            |Properties                                                 |
+---------+-------------------+-----------------------------------------------------------+
|2001     |Dungeon Explorers  |{Build a dungeon with hidden traps!, 8, 520, 5}            |
|2005     |Mystic Quest       |{A magical world filled with enchanted castles., 8, 510, 5}|
|2013     |Medieval Tournament|{Compete in jousting and sword duels., 8, 510, 5}          |
|2017     |Gladiator Arena    |{Fight for glory in ancient Rome!, 8, 520, 4}              |
+---------+-------------------+-----------------------------------------------------------+



In [5]:
# Check for sets with more than 8 buildable models
df_more_than_7_models = df.filter(df.Properties.Models_Buildable > 7)
df_more_than_7_models.show(truncate=False)


+---------+--------------------+-----------------------------------------------------------------+
|SetNumber|SetName             |Properties                                                       |
+---------+--------------------+-----------------------------------------------------------------+
|2009     |Tactical Warzone    |{Modern military tactics in a strategic battlefield., 10, 620, 8}|
|2020     |Cyberpunk Metropolis|{Neon-lit skyscrapers and underground tech battles., 10, 620, 8} |
+---------+--------------------+-----------------------------------------------------------------+

