# Sets

## Creating Sets
Sets are unordered

In [1]:
# Set elements have to be immutable
my_set = {1, 2, 3, "s1", "s2"}
print("my_set:", my_set)

# Create empty sets with the set() constructor
empty_set = set()
print("empty_set:", empty_set)

# Empty curly braces would create a dictionary
# empty_dict = {} 

my_set: {1, 2, 3, 's1', 's2'}
empty_set: set()


## Set Elements are Unique

In [2]:
# Set will remove all duplicate elements
my_set = {1, 1, 2, 2, 3, 3}
print("my_set:", my_set)

# You can initialize sets with elements from other iterable structures
my_tuple = ("a", "b", "c")
set_from_tuple = set(my_tuple)
print("set_from_tuple:", set_from_tuple)

# This can be useful for removing duplicate elements 
visitor_id_list = ["user123", "user456", "user123", "user789", "user456", "user101"]
unique_visitors_set = set(visitor_id_list)
print("unique_visitors_set:", unique_visitors_set)

my_set: {1, 2, 3}
set_from_tuple: {'a', 'c', 'b'}
unique_visitors_set: {'user123', 'user456', 'user789', 'user101'}


## Sets and Strings

In [3]:
# Strings are iterable as well
string_set = set("Hello")
print("string_set:", string_set)

# If you want to store a full string as one item
string_set = {"Hello"}
print("string_set:", string_set)

string_set: {'H', 'l', 'o', 'e'}
string_set: {'Hello'}


## Aggregate Functions

In [4]:
numbers_set = {3, 7, 2, 9, 5}

print("Number of elements:", len(numbers_set))  
print("Sum of elements:", sum(numbers_set))     
print("Smallest element:", min(numbers_set))    
print("Largest element:", max(numbers_set))
print("Any truthy values?", any(numbers_set))   
print("All truthy values?", all(numbers_set))   

Number of elements: 5
Sum of elements: 26
Smallest element: 2
Largest element: 9
Any truthy values? True
All truthy values? True


## Adding New Elements

In [5]:
# A set of fruits
fruits = {"apple", "banana"}

fruits.add("orange")
print(fruits)

# Sets cannot contain mutable elements
try:
    fruits.add(["pear", "grape"])  # Lists are mutable and not hashable
except TypeError as e:
    print(f"Error adding list: {e}")

{'orange', 'apple', 'banana'}
Error adding list: unhashable type: 'list'


## Removing Elements with `remove()`

In [6]:
planets = {"earth", "mars", "venus"}

planets.remove("mars")
print(planets)

# Removing non-existent element will raise a KeyError
try:
    planets.remove("jupiter")
except KeyError as e:
    print(f"Error removing planet: {e}")

{'venus', 'earth'}
Error removing planet: 'jupiter'


## Removing Elements with `discard()`

In [7]:
tools = {"hammer", "wrench", "screwdriver"}

tools.discard("wrench")
print(tools)

tools.discard("drill")  # No error even though 'drill' is not in the set
print(tools)

{'hammer', 'screwdriver'}
{'hammer', 'screwdriver'}


## Removing a Random Element with `pop()`

In [8]:
colors = {"red", "blue", "green"}

# pop() returns the removed element
removed_color = colors.pop()
print(f"Removed: {removed_color}")
print(f"Remaining colors: {colors}")

# Empty set would raise KeyError if pop() is used
# empty_set = set()
# empty_set.pop()  # Would raise KeyError

Removed: blue
Remaining colors: {'green', 'red'}


## Clearing the Set with `clear()`

In [9]:
gadgets = {"phone", "tablet", "laptop"}

gadgets.clear()
print(gadgets)

set()


## Adding Multiple Elements with `update()`

In [10]:
# A set of vehicles
vehicles = {"car", "bike"}
print(vehicles)

# Add elements from a list
vehicles.update(["truck", "scooter"])
print(vehicles)

# Add elements from another set
vehicles.update({"boat", "plane"})
print(vehicles)

{'bike', 'car'}
{'truck', 'bike', 'scooter', 'car'}
{'bike', 'plane', 'car', 'boat', 'truck', 'scooter'}


## Iterating Over Set Elements

In [11]:
# Set of game levels
levels = {"forest", "desert", "ocean"}

# The order of iteration is random
for level in levels:
    print(f"Loading level: {level}")

Loading level: desert
Loading level: forest
Loading level: ocean


## Sets have Fast Membership Checking

Membership checking in sets is fast because sets use a hash table to quickly find elements without searching through all of them.

In [12]:
import time

# Create a big list and big set
big_list = list(range(1_000_000))
big_set = set(big_list)

# Element that doesn't exist
missing_element= -1

# Time membership check in list
start = time.time()
missing_element in big_list
end = time.time()
print(f"List membership took {end - start:.6f} seconds")

# Time membership check in set
start = time.time()
missing_element in big_set
end = time.time()
print(f"Set membership took  {end - start:.6f} seconds")

List membership took 0.006959 seconds
Set membership took  0.000033 seconds


## Check if All Elements Are Contained in Another Set with `issubset()`

In [13]:
# Set of my ingredients
ingredients_at_home = {"flour", "sugar", "eggs", "milk"}

# Subset we need for pancakes
pancake_ingredients = {"flour", "milk"}

print("Are all the pancake ingredients available at my home?")
print(pancake_ingredients.issubset(ingredients_at_home))

# Operator alternative:
# print(pancake_ingredients <= ingredients_at_home)

Are all the pancake ingredients available at my home?
True


## Check if a Set Contains All Elements of Another Set With `issuperset()`

In [14]:
# Set of available tools
my_tools = {"hammer", "wrench", "screwdriver", "pliers"}

# Tools needed for building a chair
chair_tools = {"hammer", "screwdriver"}

print("Do my tools cover everything needed to build the chair?")
print(my_tools.issuperset(chair_tools))

# Operator alternative:
# print(my_tools >= chair_tools)

Do my tools cover everything needed to build the chair?
True


## Proper and Improper Forms of Subsets and Supersets

A set is a proper subset of another set if all its elements are in the other set and the two sets are not equal.<br/>
A set is a proper superset of another set if it contains all elements of the other set and the two sets are not equal.

In [15]:
A = {1, 2, 3}
B = {1, 2}
C = {1, 2, 3}

# Proper Subset
print("Is B a proper subset of A?", B < A)

# Improper Superset (sets are equal)
print("Is C a superset (proper or improper) of A?", C >= A)

Is B a proper subset of A? True
Is C a superset (proper or improper) of A? True


## Check if Two Sets Have No Elements in Common With `isdisjoint()`

In [16]:
# Set of known allergens
allergens = {"peanuts", "gluten", "soy", "dairy"}

# Ingredients in a chocolate bar
chocolate_bar_ingredients = {"cocoa", "sugar", "dairy", "vanilla"}
# Ingredients in a fruit salad
fruit_salad_ingredients = {"apple", "banana", "grapes", "melon"}

print("Is the chocolate bar free from allergens?")
print(allergens.isdisjoint(chocolate_bar_ingredients))

print("Is the fruit salad free from allergens?")
print(allergens.isdisjoint(fruit_salad_ingredients))

Is the chocolate bar free from allergens?
False
Is the fruit salad free from allergens?
True


# Set Operations

Sets have special methods and operators for performing particular operations.<br>
Methods can be used with any iterable like a list or tuple.<br>
Operators only work with other sets or subclasses of sets.<br>

## Applying Union on Sets

In [17]:
# Interests of Group A
group_a_interests = {"hiking", "photography", "traveling", "cooking"}
# Interests of Group B
group_b_interests = {"traveling", "gaming", "cooking", "painting"}

# Union -> All unique interests from both groups combined
print("What are all the interests across both groups?")
print(group_a_interests.union(group_b_interests))

# Operator alternative:
print(group_a_interests | group_b_interests)

What are all the interests across both groups?
{'traveling', 'painting', 'photography', 'cooking', 'hiking', 'gaming'}
{'traveling', 'painting', 'photography', 'cooking', 'hiking', 'gaming'}


<center>
  <img src="union_diagram_w.png" style="width: 800px;">
</center

## Applying Intersection on Sets

In [18]:
# Interests of Group A
group_a_interests = {"hiking", "photography", "traveling", "cooking"}
# Interests of Group B
group_b_interests = {"traveling", "gaming", "cooking", "painting"}

# Intersection -> Interests both groups share
print("What interests do both groups have in common?")
print(group_a_interests.intersection(group_b_interests))

# Operator alternative:
print(group_a_interests & group_b_interests)

What interests do both groups have in common?
{'traveling', 'cooking'}
{'traveling', 'cooking'}


<center>
  <img src="intersection_diagram_w.png" style="width: 800px;">
</center

## Applying Difference on Sets

In [19]:
# Interests of Group A
group_a_interests = {"hiking", "photography", "traveling", "cooking"}
# Interests of Group B
group_b_interests = {"traveling", "gaming", "cooking", "painting"}

# Difference -> Interests that are in Group A but not in Group B.
print("What interests are unique to Group A?")
print(group_a_interests.difference(group_b_interests))
# Operator alternative:
print(group_a_interests - group_b_interests)

# Difference -> Interests that are in Group B but not in Group A.
print("What interests are unique to Group B?")
print(group_b_interests.difference(group_a_interests))
# Operator alternative:
print(group_b_interests - group_a_interests)

What interests are unique to Group A?
{'photography', 'hiking'}
{'photography', 'hiking'}
What interests are unique to Group B?
{'painting', 'gaming'}
{'painting', 'gaming'}


<div style="display: flex; justify-content: center; align-items: center; width: 100%;">
  <img src="difference_a_b_diagram_w.png" style="width: 650px; margin: 0 20px;">
  <img src="difference_b_a_diagram_w.png" style="width: 650px; margin: 0 20px;">
</div>

## Applying Symmetric Difference on Sets

In [20]:
# Interests of Group A
group_a_interests = {"hiking", "photography", "traveling", "cooking"}
# Interests of Group B
group_b_interests = {"traveling", "gaming", "cooking", "painting"}

# Symmetric Difference -> Interests in either Group A or Group B, but not both
print("What interests are different between the groups (not shared)?")
print(group_a_interests.symmetric_difference(group_b_interests))

# Operator alternative:
print(group_a_interests ^ group_b_interests)

What interests are different between the groups (not shared)?
{'painting', 'photography', 'hiking', 'gaming'}
{'painting', 'photography', 'hiking', 'gaming'}


<center>
  <img src="symmetric_difference_diagram_w.png" style="width: 800px;">
</center>

# Augmented Assignment Methods and Operators

## Applying Union Update on Set

In [21]:
# Skills I currently have
my_skills = {"Python", "SQL", "HTML"}
print("My initial skills:", my_skills)

# New skills from an online course
course_skills = {"Python", "Java", "C++"}
print("Skills that I can learn from the course:", course_skills)

# Using update() method (Union)
# Include all of the skills from my_skills and course_skills
my_skills.update(course_skills)
print("\nMy skills after taking the course (update):", my_skills)

# Operator alternative:
# my_skills = my_skills | course_skills
# Or in short:
# my_skills |= course_skills

My initial skills: {'Python', 'HTML', 'SQL'}
Skills that I can learn from the course: {'Python', 'C++', 'Java'}

My skills after taking the course (update): {'C++', 'Python', 'HTML', 'Java', 'SQL'}


![Union Update Skills](skills_union_diagram_w.png)

## Applying Intersection Update on Set

In [22]:
my_skills = {"Python", "SQL", "HTML", "Java", "C++"}
print("My skills:", my_skills)

# Skills required for a job offer
job_required_skills = {"Python", "SQL", "AWS"}
print("Job required skills:", job_required_skills)

# Using intersection_update() method (Intersection)
# Only leave the skills that are also present in the job_required_skills
my_skills.intersection_update(job_required_skills)
print("\nRequired skills from the job that I have (intersection_update):", my_skills)

# Operator alternative:
# my_skills = my_skills & job_required_skills
# Or in short:
# my_skills &= job_required_skills

My skills: {'Python', 'C++', 'Java', 'HTML', 'SQL'}
Job required skills: {'Python', 'AWS', 'SQL'}

Required skills from the job that I have (intersection_update): {'Python', 'SQL'}


![Intersection Update Skills](skills_intersection_diagram_w.png)

## Applying Difference Update on Set

In [23]:
my_skills = {"Python", "SQL"}
print("My skills:", my_skills)

# Skills trending in the industry
trending_skills = {"Python", "Rust", "C++", "AWS"}
print("Trending skills:", trending_skills)

# Using difference_update() method (Difference)
# Remove from my_skills anything that also appears in trending_skills
my_skills.difference_update(trending_skills)
print("\nMy skills which are not trending (difference_update):", my_skills)

# Operator alternative:
# my_skills = my_skills - trending_skills
# Or in short:
# my_skills -= trending_skills

My skills: {'Python', 'SQL'}
Trending skills: {'Python', 'C++', 'AWS', 'Rust'}

My skills which are not trending (difference_update): {'SQL'}


![Difference Update Skills](skills_difference_diagram_w.png)

## Applying Symmetric Difference Update on Set

In [24]:
# Files currently on my laptop
current_system_files = {"project.docx", "report.pdf", "photo1.jpg", "photo2.jpg"}
print("Current system files:", current_system_files)

# Files saved on my backup hard drive
backup_drive_files = {"project.docx", "report.pdf", "photo1.jpg", "photo3.jpg"}
print("Backup drive files:", backup_drive_files)

# Using symmetric_difference_update() method (Symmetric Difference)
# Only keep the files which do not appear in both locations
current_system_files.symmetric_difference_update(backup_drive_files)
print("\nFiles missing from backup or deleted from system (symmetric_difference_update):", current_system_files)

# Operator alternative:
# current_system_files = current_system_files ^ backup_drive_files
# Or in short:
# current_system_files ^= backup_drive_files

Current system files: {'project.docx', 'report.pdf', 'photo1.jpg', 'photo2.jpg'}
Backup drive files: {'project.docx', 'report.pdf', 'photo1.jpg', 'photo3.jpg'}

Files missing from backup or deleted from system (symmetric_difference_update): {'photo3.jpg', 'photo2.jpg'}


![Symmetric Difference Update Files](files_symmetric_difference_diagram_w.png)

## Frozen Sets

Frozen sets are immutable and therefore hashable.

In [25]:
# Frozenset Basics

# Creating a frozenset
fset = frozenset(["tomato", "banana", "cherry"])

# Frozensets are immutable: you cannot add, remove, or change elements
try:
    fset.add("orange")
except AttributeError as e:
    print("Error:", e)
    print("Frozensets are immutable — you cannot add or remove elements.")

Error: 'frozenset' object has no attribute 'add'
Frozensets are immutable — you cannot add or remove elements.


## Prevent Set Modifications

In [26]:
admin_permissions = frozenset(["read", "write", "delete"])
user_permissions = frozenset(["read"])

def can_do(permissions, action):
    return action in permissions

# Example usage:
print(can_do(admin_permissions, "delete"))
print(can_do(user_permissions, "delete"))

# This will raise an error:
# admin_permissions.add("export")  # AttributeError: 'frozenset' object has no attribute 'add'

True
False


## Sets as Dictionary Keys

Dictionary keys have to be immutable, because they are hashed. 

In [27]:
# Mapping ingredients to recipes
recipes = {
    frozenset(["flour", "sugar", "eggs"]): "Cake",
    frozenset(["flour", "milk", "eggs"]): "Pancakes",
}

# Search by available ingredients
available = frozenset(["milk", "eggs", "flour"]) # Order doesn't have to match in sets
print(recipes.get(available))

Pancakes


## Partial Matches Example

In [28]:
# Set up recipes (ingredients stored in frozensets)
recipes = {
    "Cake": frozenset(["flour", "sugar", "eggs"]),
    "Pancakes": frozenset(["flour", "milk", "eggs"]),
    "Omelette": frozenset(["eggs", "milk", "cheese"]),
}

# Ingredients you have at home
available_ingredients = {"flour", "milk"}

# Find possible recipes you can ALMOST make
for recipe_name, ingredients_needed in recipes.items():
    missing_ingredients = ingredients_needed - available_ingredients
    if len(missing_ingredients) <= 1:
        print(f"You can almost make {recipe_name}! Missing: {missing_ingredients}")

You can almost make Pancakes! Missing: frozenset({'eggs'})


## Sets Inside Other Sets

In [29]:
# Frozensets of the harmonized C major scale triads
c_major_triads = {
    frozenset(["C", "E", "G"]),    # C major (I)
    frozenset(["D", "F", "A"]),    # D minor (ii)
    frozenset(["E", "G", "B"]),    # E minor (iii)
    frozenset(["F", "A", "C"]),    # F major (IV)
    frozenset(["G", "B", "D"]),    # G major (V)
    frozenset(["A", "C", "E"]),    # A minor (vi)
    frozenset(["B", "D", "F"]),    # B diminished (vii°)
}

# Notes played by a guitarist (could include repeated notes)
played_notes = ["E", "B", "E", "G", "B", "E"]
# Remove duplicates by turning into a set
unique_played_notes = set(played_notes) # set("E", "B", "G")

# Check if the played notes form a valid triad from the C major harmonization
if frozenset(unique_played_notes) in c_major_triads:
    print("You played a valid triad from the C major scale!")
else:
    print("Not a triad from the C major scale.")

You played a valid triad from the C major scale!
