In [2]:
from itertools import chain, combinations, product

# Define the hierarchy dictionary
hierarchy = {
    "AveBedrms": [],
    "HouseCharacteristics": ["AveRooms", "AveOccup", "HouseAge"],
    "Area": {
        "Location": ["Latitude", "Longitude"],
        "Neighbours": ["MedInc", "Population"]
    },
}

# Dummy columns to simulate real use-case
import pandas as pd
columns = pd.Series(['AveBedrms', 'AveRooms', 'AveOccup', 'HouseAge', 'Latitude', 'Longitude', 'MedInc', 'Population'])

class Node:
    def __init__(self, key):
        self.key = key
        self.child = []
        self.permutations = []

    def __repr__(self):
        return f"({self.key}): {self.child}"

def build_tree_and_generate_permutations(d, root):
    if isinstance(d, dict):
        for key, value in d.items():
            node = Node(key)
            root.child.append(node)
            build_tree_and_generate_permutations(value, node)
    elif isinstance(d, list):
        for item in d:
            node = Node(item)
            root.child.append(node)
    
    generate_permutations(root)

def generate_permutations(node):
    if not node.child:  # Leaf node
        node.permutations = []
        return
    
    children_keys = [child.key for child in node.child]
    node.permutations = {}
    
    for i, child in enumerate(node.child):
        excluded = children_keys[:i] + children_keys[i + 1:]
        generate_permutations(child)
        
        # Generate all unique combinations of permutations for each child
        child.permutations = list(all_subsets(excluded))

def all_subsets(iterable):
    "Return all subsets of a given iterable."
    return chain.from_iterable(combinations(iterable, n) for n in range(len(iterable) + 1))

def generate_paths_and_combinations(node):
    paths = []
    
    def dfs(current_node, current_path):
        current_path.append((current_node.key, current_node.permutations))
        
        if not current_node.child:  # Leaf node
            paths.append(current_path[:])  # Make a copy of current_path
        else:
            for child in current_node.child: 
                dfs(child, current_path)
        
        current_path.pop()  # Backtrack
    
    dfs(node, [])
    
    combinations_list = []

    for path in paths:
        filtered_path = [(key, perms) for key, perms in path if perms]
        if filtered_path:
            node_keys, permutations = zip(*filtered_path)
            path_combinations = list(product(*permutations))
            last_key = node_keys[-1]
            for combination in path_combinations:
                combinations_list.append((last_key, combination))
    
    return combinations_list

def create_masks1(node, columns):
    masks = []
    keys = []

    if not node.child:  # Check if the child list is empty
        mask = columns == node.key
        masks.append(mask)
        keys.append(node.key)
    else:
        # Create mask for current node
        current_node_mask = columns.isin(get_all_leaf_values(node))
        masks.append(current_node_mask)
        keys.append(node.key)
        
        # Recursively create masks for all child nodes
        for subset in node.child:
            child_masks, child_keys = create_masks1(subset, columns)
            masks.extend(child_masks)
            keys.extend(child_keys)

    return masks, keys

# Building the tree and generating permutations
root = Node("Root")
build_tree_and_generate_permutations(hierarchy, root)

# Generating paths and combinations
combinations_list = generate_paths_and_combinations(root)

# Printing the tree with permutations for verification
def print_tree_with_permutations(node, level=0):
    indent = "  " * level
    print(f"{indent}{node.key}")
    if hasattr(node, 'permutations'):
        print(f"{indent}  permutations: {node.permutations}")
    for child in node.child:
        print_tree_with_permutations(child, level + 1)

print_tree_with_permutations(root)

# Output the combinations with the last node key
print("Generated combinations:")
for last_key, combo in combinations_list:
    print(f"Last Key: {last_key}, Combination: {combo}")


Root
  permutations: {}
  AveBedrms
    permutations: [(), ('HouseCharacteristics',), ('Area',), ('HouseCharacteristics', 'Area')]
  HouseCharacteristics
    permutations: [(), ('AveBedrms',), ('Area',), ('AveBedrms', 'Area')]
    AveRooms
      permutations: [(), ('AveOccup',), ('HouseAge',), ('AveOccup', 'HouseAge')]
    AveOccup
      permutations: [(), ('AveRooms',), ('HouseAge',), ('AveRooms', 'HouseAge')]
    HouseAge
      permutations: [(), ('AveRooms',), ('AveOccup',), ('AveRooms', 'AveOccup')]
  Area
    permutations: [(), ('AveBedrms',), ('HouseCharacteristics',), ('AveBedrms', 'HouseCharacteristics')]
    Location
      permutations: [(), ('Neighbours',)]
      Latitude
        permutations: [(), ('Longitude',)]
      Longitude
        permutations: [(), ('Latitude',)]
    Neighbours
      permutations: [(), ('Location',)]
      MedInc
        permutations: [(), ('Population',)]
      Population
        permutations: [(), ('MedInc',)]
Generated combinations:
Last Key: AveBe

In [5]:
from itertools import chain, combinations, product

# Define the hierarchy dictionary
hierarchy = {
    "AveBedrms": [],
    "HouseCharacteristics": ["AveRooms", "AveOccup", "HouseAge"],
    "Area": {
        "Location": ["Latitude", "Longitude"],
        "Neighbours": ["MedInc", "Population"]
    },
}

# Dummy columns to simulate real use-case
import pandas as pd
columns = pd.Series(['AveBedrms', 'AveRooms', 'AveOccup', 'HouseAge', 'Latitude', 'Longitude', 'MedInc', 'Population'])

class Node:
    def __init__(self, key):
        self.key = key
        self.child = []
        self.permutations = []

    def __repr__(self):
        return f"({self.key}): {self.child}"

def build_tree_and_generate_permutations(d, root):
    if isinstance(d, dict):
        for key, value in d.items():
            node = Node(key)
            root.child.append(node)
            build_tree_and_generate_permutations(value, node)
    elif isinstance(d, list):
        for item in d:
            node = Node(item)
            root.child.append(node)
    
    generate_permutations(root)

def generate_permutations(node):
    if not node.child:  # Leaf node
        node.permutations = []
        return
    
    children_keys = [child.key for child in node.child]
    node.permutations = {}
    
    for i, child in enumerate(node.child):
        excluded = children_keys[:i] + children_keys[i + 1:]
        generate_permutations(child)
        
        # Generate all unique combinations of permutations for each child
        child.permutations = list(all_subsets(excluded))

def all_subsets(iterable):
    "Return all subsets of a given iterable."
    return chain.from_iterable(combinations(iterable, n) for n in range(len(iterable) + 1))

def generate_paths_and_combinations(node):
    paths = []
    
    def dfs(current_node, current_path):
        current_path.append((current_node.key, current_node.permutations))
        
        if not current_node.child:  # Leaf node
            paths.append(current_path[:])  # Make a copy of current_path
        else:
            for child in current_node.child: 
                dfs(child, current_path)
        
        current_path.pop()  # Backtrack
    
    dfs(node, [])
    
    combinations_list = []

    for path in paths:
        filtered_path = [(key, perms) for key, perms in path if perms]
        if filtered_path:
            node_keys, permutations = zip(*filtered_path)
            path_combinations = list(product(*permutations))
            last_key = node_keys[-1]
            for combination in path_combinations:
                # Add both the combination with and without the last key
                combinations_list.append((last_key, combination))
                combinations_list.append((last_key, combination + (last_key,)))
    
    return combinations_list

def create_masks1(node, columns):
    masks = []
    keys = []

    if not node.child:  # Check if the child list is empty
        mask = columns == node.key
        masks.append(mask)
        keys.append(node.key)
    else:
        # Create mask for current node
        current_node_mask = columns.isin(get_all_leaf_values(node))
        masks.append(current_node_mask)
        keys.append(node.key)
        
        # Recursively create masks for all child nodes
        for subset in node.child:
            child_masks, child_keys = create_masks1(subset, columns)
            masks.extend(child_masks)
            keys.extend(child_keys)

    return masks, keys

def get_all_leaf_values(node):
    if not node.child:
        return [node.key]
    else:
        leaf_values = []
        for child in node.child:
            leaf_values.extend(get_all_leaf_values(child))
        return leaf_values

# Building the tree and generating permutations
root = Node("Root")
build_tree_and_generate_permutations(hierarchy, root)

# Generating paths and combinations
combinations_list = generate_paths_and_combinations(root)

# Printing the tree with permutations for verification
def print_tree_with_permutations(node, level=0):
    indent = "  " * level
    print(f"{indent}{node.key}")
    if hasattr(node, 'permutations'):
        print(f"{indent}  permutations: {node.permutations}")
    for child in node.child:
        print_tree_with_permutations(child, level + 1)

print_tree_with_permutations(root)

# Output the combinations with the last node key
print("Generated combinations:")
for last_key, combo in combinations_list:
    print(f"Last Key: {last_key}, Combination: {combo}")

# Create masks for the generated combinations
masks, keys = create_masks1(root, columns)

# Display masks
print("Generated masks:")
for key, mask in zip(keys, masks):
    print(f"Key: {key}, Mask: {mask.values}")


Root
  permutations: {}
  AveBedrms
    permutations: [(), ('HouseCharacteristics',), ('Area',), ('HouseCharacteristics', 'Area')]
  HouseCharacteristics
    permutations: [(), ('AveBedrms',), ('Area',), ('AveBedrms', 'Area')]
    AveRooms
      permutations: [(), ('AveOccup',), ('HouseAge',), ('AveOccup', 'HouseAge')]
    AveOccup
      permutations: [(), ('AveRooms',), ('HouseAge',), ('AveRooms', 'HouseAge')]
    HouseAge
      permutations: [(), ('AveRooms',), ('AveOccup',), ('AveRooms', 'AveOccup')]
  Area
    permutations: [(), ('AveBedrms',), ('HouseCharacteristics',), ('AveBedrms', 'HouseCharacteristics')]
    Location
      permutations: [(), ('Neighbours',)]
      Latitude
        permutations: [(), ('Longitude',)]
      Longitude
        permutations: [(), ('Latitude',)]
    Neighbours
      permutations: [(), ('Location',)]
      MedInc
        permutations: [(), ('Population',)]
      Population
        permutations: [(), ('MedInc',)]
Generated combinations:
Last Key: AveBe

In [25]:
import numpy as np
def combine_masks(masks):
    combined_mask = np.logical_or.reduce(masks)
    return combined_mask

def create_combined_masks(combinations, masks_dict):
    combined_masks = []
    for last_key, combination in combinations:
        masks = [masks_dict[key] for key in combination if key in masks_dict]
        if masks:
            combined_mask = combine_masks(masks)
            combined_masks.append((last_key, combined_mask))
    return combined_masks

In [27]:
masks, keys = create_masks1(root, columns)
masks_dict = dict(zip(keys, masks))

create_combined_masks(combinations_list, masks_dict)

[('AveBedrms',
  array([ True, False, False, False, False, False, False, False])),
 ('AveBedrms',
  array([ True, False, False, False, False, False, False, False])),
 ('AveBedrms',
  array([ True, False, False, False, False, False, False, False])),
 ('AveBedrms',
  array([ True, False, False, False, False, False, False, False])),
 ('AveRooms', array([False,  True, False, False, False, False, False, False])),
 ('AveRooms', array([False,  True, False, False, False, False, False, False])),
 ('AveRooms', array([False,  True, False, False, False, False, False, False])),
 ('AveRooms', array([False,  True, False, False, False, False, False, False])),
 ('AveRooms', array([False,  True, False, False, False, False, False, False])),
 ('AveRooms', array([False,  True, False, False, False, False, False, False])),
 ('AveRooms', array([False,  True, False, False, False, False, False, False])),
 ('AveRooms', array([False,  True, False, False, False, False, False, False])),
 ('AveRooms', array([False, 