In [58]:
import matplotlib.pyplot as plt
import numpy as np
import torch 
import pandas as pd
# Retina mode
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
Set_A = set([1,2,3,4,5,6,7,8,9,10])
Set_B = set([2,4,6,8,10])

# Union
Union = Set_A.union(Set_B)
print('Union:', Union)

Union: {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}


In [3]:
# Intersection
Intersection = Set_A.intersection(Set_B)
print('Intersection:', Intersection)

Intersection: {2, 4, 6, 8, 10}


In [4]:
# Difference
Difference = Set_A.difference(Set_B)
print('Difference:', Difference)

Difference: {1, 3, 5, 7, 9}


In [5]:
Set_B.difference(Set_A)

set()

In [6]:
Set_A.issubset(Set_B)

False

In [7]:
Set_B.issubset(Set_A)

True

In [8]:
def is_subset(set_a, set_b):
    """
    set_a: set
    set_b: set
    ---
    return: True if set_a is a subset of set_b, False otherwise
    """
    assert isinstance(set_a, set), 'set_a must be a set'
    assert isinstance(set_b, set), 'set_b must be a set'
    
    # Loop through all elements in set_a and check if they are in set_b
    subset = True
    for element in set_a:
        if element in set_b:
            continue
        else:
            subset = False
            break
    return subset

In [9]:
is_subset(Set_A, Set_B)

False

In [10]:
is_subset(Set_B, Set_A)

True

# Set and Mutability
In Python, sets require their elements to be hashable. This is because sets rely on hashing to determine membership and handle uniqueness. Hashable objects must meet two criteria:

- Immutability: The object should not change after it is created.
- Defined hash: The object must have a hash value, which is a unique identifier used by Python to store and compare the object in a set or dictionary.

Why Lists Cannot Be Used in Sets
Lists are mutable (i.e., they can be changed after they are created). When you try to use a mutable object like a list as an element in a set, Python raises the TypeError: unhashable type: 'list' error. This happens because Python cannot guarantee that the contents of the list won't change, which would affect its identity and make it unreliable as a key in a set.


In [20]:
set_a = {1}

In [23]:
li = [1, 2, 3, 4, 5]
set_b = {li}

TypeError: unhashable type: 'list'

In [25]:
set_c = frozenset(li)
print(set_c)
li.append(6)
print(set_c)

frozenset({1, 2, 3, 4, 5})
frozenset({1, 2, 3, 4, 5})


In [17]:
# List all subsets
original_set = {"R", "G", "B"}

original_list = list(original_set)

subsets = []


# Subset of size 0
subsets.append([])
print(subsets)

# Subset of size 1
for element in original_list:
    subsets.append([element])
print(subsets)    

# Subset of size 2
for i in range(len(original_list)):
    for j in range(i+1, len(original_list)):
        subsets.append([original_list[i], original_list[j]])
print(subsets)

# Subset of size 3
for i in range(len(original_list)):
    for j in range(i+1, len(original_list)):
        for k in range(j+1, len(original_list)):
            subsets.append([original_list[i], original_list[j], original_list[k]])
print(subsets)

[[]]
[[], ['B'], ['R'], ['G']]
[[], ['B'], ['R'], ['G'], ['B', 'R'], ['B', 'G'], ['R', 'G']]
[[], ['B'], ['R'], ['G'], ['B', 'R'], ['B', 'G'], ['R', 'G'], ['B', 'R', 'G']]


In [19]:
all_subsets = {frozenset(subset) for subset in subsets}
print(all_subsets)

{frozenset({'B', 'R', 'G'}), frozenset({'R'}), frozenset({'B', 'G'}), frozenset({'G'}), frozenset({'B', 'R'}), frozenset({'B'}), frozenset({'R', 'G'}), frozenset()}


In [26]:
# Alternative way to list all subsets
from itertools import combinations

original_set = {"R", "G", "B"}

subsets = []
for i in range(len(original_set)+1):
    subsets += list(combinations(original_set, i))
    
all_subsets = {frozenset(subset) for subset in subsets}
print(all_subsets)

{frozenset({'B', 'R', 'G'}), frozenset({'R'}), frozenset({'B', 'G'}), frozenset({'G'}), frozenset({'B', 'R'}), frozenset({'B'}), frozenset({'R', 'G'}), frozenset()}


In [46]:
bin(1)[2:].zfill(3), bin(2)[2:].zfill(3), bin(3)[2:].zfill(3)

('001', '010', '011')

In [53]:
# Enumerate via binary representation
import pandas as pd
original_set = {"R", "G", "B"}
original_list = list(original_set)

# Binary representation of 0 to 2^3-1 in Pandas DataFrame

n = len(original_set)
binary = [list(bin(i)[2:].zfill(n)) for i in range(2**n)]
binary_df = pd.DataFrame(binary, columns=original_list)
binary_df = binary_df.astype(int)
print(binary_df)

   B  R  G
0  0  0  0
1  0  0  1
2  0  1  0
3  0  1  1
4  1  0  0
5  1  0  1
6  1  1  0
7  1  1  1


In [55]:
bool_df = binary_df.astype(bool)
bool_df

Unnamed: 0,B,R,G
0,False,False,False
1,False,False,True
2,False,True,False
3,False,True,True
4,True,False,False
5,True,False,True
6,True,True,False
7,True,True,True


In [63]:
# Create subsets based on the boolean DataFrame
subsets = [np.array(original_list)[row] for row in bool_df.values]


In [64]:
subsets

[array([], dtype='<U1'),
 array(['G'], dtype='<U1'),
 array(['R'], dtype='<U1'),
 array(['R', 'G'], dtype='<U1'),
 array(['B'], dtype='<U1'),
 array(['B', 'G'], dtype='<U1'),
 array(['B', 'R'], dtype='<U1'),
 array(['B', 'R', 'G'], dtype='<U1')]