In [10148]:
import copy
import random
import datetime

class Item:
    def __init__(self, item: str, utility: int):
        self.item = item
        self.utility = utility
        self._twu = 0

    @property
    def twu(self) -> int:
        return self._twu

    @twu.setter
    def twu(self, value: int) -> None:
        self._twu = value

    def __repr__(self):
        # return f"({self.item},{self.twu})"
        return f"{self.item}"

    def __eq__(self, other):
        if isinstance(other, Item):
            return self.item == other.item and self.utility == other.utility
        return False

    def __hash__(self):
        return hash((self.item, self.utility))


class TransItem:
    def __init__(self, item: Item, quantity: int, probability: float):
        self.item = item
        self.quantity = quantity
        self.probability = probability

    def __repr__(self):
        return f"{self.item},{self.quantity},{self.probability}"

    def get_total_probability(self):
        return self.quantity * self.probability

def check_order_condition(a: Item, b: Item):
    if a.utility <= 0 and b.utility > 0:
        return True
    elif a.utility * b.utility >= 0:
        return a.twu >= b.twu
    return False


def check_order_item_and_set(ik: Item, X: set[Item]) -> bool:
    for i in X:
        if i != ik and check_order_condition(ik, i) == False:
            return False
    return True


class Transaction:
    def __init__(self, id: int, trans_items: set[TransItem]):
        self.id = id
        self.trans_items = trans_items

    def __repr__(self):
        return f"t{self.id}, {self.trans_items}"

    def contains_item_set(self, item_set: set[Item]) -> bool:
        transaction_items = {trans_item.item for trans_item in self.trans_items}
        return item_set.issubset(transaction_items)

    def get_quantity_of_item(self, item: Item) -> int:
        for trans_item in self.trans_items:
            if trans_item.item == item:
                return trans_item.quantity
        return 0

    def get_probability_of_item(self, item: Item) -> int:
        for trans_item in self.trans_items:
            if trans_item.item == item:
                return trans_item.probability
        return 0

    def get_items(self) -> set[Item]:
        return {trans_item.item for trans_item in self.trans_items}

    def get_probability_of_item_set(self, item_set: set[Item]) -> float:
        total_probability = 0.0
        if self.contains_item_set(item_set):
            total_probability = 1.0
            for trans_item in self.trans_items:
                if trans_item.item in item_set:
                    total_probability *= trans_item.probability
        return total_probability

    def get_positive_utility_of_item_set(self, items: set[Item]):
        pu = 0
        for item in items:
            utility = item.utility
            if utility > 0:
                quantity = self.get_quantity_of_item(item)
                pu += item.utility * quantity
        return pu

    def get_negative_utility_of_item_set(self, items: set[Item]):
        nu = 0
        for item in items:
            utility = item.utility
            if utility < 0:
                quantity = self.get_quantity_of_item(item)
                nu += item.utility * quantity
        return nu

    def get_utility_of_item_set(self, items: set[Item]):
        u = 0
        for item in items:
            quantity = self.get_quantity_of_item(item)
            u += item.utility * quantity
        return u

    def get_remaining_utility_of_item_set(self, items: set[Item]):
        ru = 0
        trans_item_set = self.get_items()
        for item in trans_item_set:
            if item.utility > 0:
                if item not in items:
                    if check_order_item_and_set(item, items) == True:
                        ru += item.utility * self.get_quantity_of_item(item)
        return ru


from collections import namedtuple

Utilities = namedtuple("Utilities", ["tid", "pro", "pu", "nu", "ru"])


class AbstractList:
    def __init__(self, items: set[Item], utility_values: list[Utilities]):
        self.items = items
        self.utility_values = utility_values

    def get_ru(self):
        ru = 0
        for i in self.utility_values:
            ru += i.ru
        return ru

    def get_pu(self):
        pu = 0
        for i in self.utility_values:
            pu += i.pu
        return pu

    def get_nu(self):
        nu = 0
        for i in self.utility_values:
            nu += i.nu
        return nu

    def get_pro(self):
        pro = 0
        for i in self.utility_values:
            pro += i.pro
        return pro

    def __repr__(self):
        if not self.utility_values:
            return "Empty PNU-List"

        # Column titles
        titles = ["PRO", "PU", "NU", "RU"]

        # Get the number of columns from the first utility value
        if isinstance(self.utility_values[0], (list, tuple)):
            num_columns = len(self.utility_values[0])
        else:
            num_columns = 1

        # Create combined items string
        items_str = ",".join(str(item) for item in self.items)
        items_str = "(" + items_str + ")"

        # Calculate column widths based on utility values and titles
        value_widths = []
        for i in range(num_columns):
            max_width = max(
                (
                    len(str(round(row[i], 3)))
                    if isinstance(row, (list, tuple))
                    else len(str(row))
                )
                for row in self.utility_values
            )
            # Consider width of titles and combined items
            if i == 0:
                max_width = max(max_width, len(items_str))
            else:
                max_width = max(max_width, len(titles[i - 1]))
            value_widths.append(max_width)

        # Build the table string
        result = []

        # Add border
        total_width = sum(value_widths) + 3 * num_columns + 1
        result.append("-" * total_width)

        # Add single row with all items and titles
        row = "|"
        row += f" {items_str.rjust(value_widths[0])} |"
        for i in range(1, num_columns):
            row += f" {titles[i-1].center(value_widths[i])} |"
        result.append(row)

        # Add separator
        result.append("-" * total_width)

        # Add utility values
        for utility in self.utility_values:
            row = "|"
            if isinstance(utility, (list, tuple)):
                for i, value in enumerate(utility):
                    row += f" {str(round(value, 3)).rjust(value_widths[i])} |"
            else:
                row += f" {str(utility).rjust(value_widths[0])} |"
            result.append(row)

        # Add bottom border
        result.append("-" * total_width)
        return "\n".join(result)

In [10149]:
def calculate_positive_utility_of_transaction(trans: Transaction):
    pu = 0
    items: set[Item] = trans.get_items()
    for item in items:
        if item.utility > 0:
            pu += trans.get_quantity_of_item(item) * item.utility
    return pu

In [10150]:
def calculate_transaction_weight_utility(items: set[Item], database: list[Transaction]):
    twu = 0
    for trans in database:
        if trans.contains_item_set(items):
            twu += calculate_positive_utility_of_transaction(trans)
    return twu

In [10151]:
def calculate_positive_utility_of_item_set_in_trans(
    items: set[Item], trans: Transaction
):
    pu = 0
    for item in items:
        if item.utility > 0:
            quantity = trans.get_quantity_of_item(item)
            pu += item.utility * quantity
    return pu

In [10152]:
def calculate_negative_utility_of_item_set_in_trans(
    items: set[Item], trans: Transaction
):
    nu = 0
    for item in items:
        utility = item.utility
        if utility < 0:
            quantity = trans.get_quantity_of_item(item)
            nu += item.utility * quantity
    return nu

In [10153]:
def calculate_remaining_utility_of_item_set_in_trans(
    items: set[Item], trans: Transaction
):
    ru = 0
    trans_items: set[Item] = trans.get_items()
    for item in trans_items:
        if item.utility > 0:
            if check_order_item_and_set(item, items) == True:
                if item not in items:
                    ru += item.utility * trans.get_quantity_of_item(item)
    return ru

In [10154]:
def create_eucs_dict(
    arr: list[Item], database: list[Transaction]
) -> dict[frozenset[Item], int]:
    n = len(arr)
    eucs_dict = {}
    for i in range(n):
        for j in range(n):
            if i < j:
                item_pair = frozenset({arr[i], arr[j]})
                twu_value = calculate_transaction_weight_utility(item_pair, database)
                eucs_dict[item_pair] = twu_value
    return eucs_dict

In [10155]:
def find_tuple_by_trans_id(P: AbstractList, target_trans_id: int) -> Utilities:
    utilities_list: list[Utilities] = P.utility_values
    for iTuple in utilities_list:
        if iTuple.tid == target_trans_id:
            return iTuple
    return None

def construct(P: AbstractList, Px: AbstractList, Py: AbstractList):
    x = Px.items
    y = Py.items
    xy = x | y
    utilities_list: list[Utilities] = list()
    Pxy = AbstractList(xy, utilities_list)
    utilities_list_of_px: list[Utilities] = Px.utility_values
    for xTuple in utilities_list_of_px:
        yTuple: Utilities = find_tuple_by_trans_id(Py, xTuple.tid)
        if yTuple is not None:
            if P.utility_values:
                pTuple: Utilities = find_tuple_by_trans_id(P, xTuple.tid)
                pro = 0.01 if pTuple.pro == 0 else pTuple.pro
                xyTuple: Utilities = Utilities(
                    xTuple.tid,
                    xTuple.pro * yTuple.pro / pro,
                    xTuple.pu + yTuple.pu - pTuple.pu,
                    xTuple.nu + yTuple.nu - pTuple.nu,
                    yTuple.ru,
                )
                utilities_list.append(xyTuple)
            else:
                xyTuple: Utilities = Utilities(
                    xTuple.tid,
                    xTuple.pro * yTuple.pro,
                    xTuple.pu + yTuple.pu,
                    xTuple.nu + yTuple.nu,
                    yTuple.ru,
                )
                utilities_list.append(xyTuple)
    return Pxy

In [10156]:
def calculate_utility_of_item_set_in_database(
    items: set[Item], database: list[Transaction]
):
    u = 0
    for trans in database:
        if trans.contains_item_set(items):
            for item in items:
                u += item.utility * trans.get_quantity_of_item(item)
    return u

In [10157]:
def calculate_probability_of_item_set(items: set[Item], database: list[Transaction]):
    prob = 0.0
    for trans in database:
        prob += trans.get_probability_of_item_set(items)
    return prob

In [10158]:
def sort_items_by_twu_and_utility(items: list[Item]) -> list[Item]:
    def sort_key(item: Item) -> tuple:
        return (0 if item.utility > 0 else 1, item.twu)
    return sorted(items, key=sort_key)

DFS - SEARCH PROCEDURE ALGORITHM


In [10159]:
def searching_procedure(
    PList: AbstractList,
    lists: list[AbstractList],
    minUtil,
    minPro,
    database: list[Transaction],
    eucs_dict: dict[frozenset[Item], int],
    output: list[set[Item]],
):
    for i in range(0, len(lists)):
        XList: AbstractList = lists[i]
        # print("X: " + str(XList.items))
        utility = XList.get_pu() + XList.get_nu()
        pro = XList.get_pro()
        # print(str(XList.items) + ", " + str(utility) + ", " + str(pro))
        if round(pro, 3) >= minPro * len(database) and utility >= minUtil:
            output.append(XList.items)
        if (
            round(pro, 3) >= minPro * len(database)
            and XList.get_pu() + XList.get_ru() >= minUtil
        ):
            new_lists: list[AbstractList] = list()
            for j in range(i + 1, len(lists)):
                YList: AbstractList = lists[j]
                # print("Y: " + str(YList.items))
                x = XList.items.difference(PList.items)
                y = YList.items.difference(PList.items)
                key = frozenset(x | y)
                twu_value = eucs_dict.get(key)
                if twu_value >= minUtil:
                    ZList = construct(PList, XList, YList)
                    if round(ZList.get_pro(), 3) >= minPro * len(database):
                        new_lists.append(ZList)
            # print("candidate_count: " + str(len(new_lists)))
            searching_procedure(
                XList, new_lists, minUtil, minPro, database, eucs_dict, output
            )

In [10160]:
def preparation_procedure(
    db: list[Transaction], item_list: list[Item], minUtility: int, minPro: float
):
    removed_list: list[Item] = list()
    for item in item_list:
        twu = calculate_transaction_weight_utility({item}, db)
        pro = calculate_probability_of_item_set({item}, db)
        if twu < minUtility or round(pro, 3) < minPro * len(db):
            removed_list.append(item)
        else:
            item.twu = twu

    # Remove unqualified item
    new_distinct_items = [item for item in item_list if item not in removed_list]
    new_distinct_items = sort_items_by_twu_and_utility(new_distinct_items)
    newDb = copy.deepcopy(db)

    # Remove unqualified item from transaction
    for trans in newDb:
        for item in removed_list:
            trans.get_items().discard(item)

    # Create list[AbstractList], prepare for algorithm 2
    lists: list[AbstractList] = list()

    for item in new_distinct_items:
        utility_values_list: list[tuple] = list()
        pnu_list = AbstractList({item}, utility_values_list)
        for trans in newDb:
            if trans.contains_item_set({item}):
                pro = trans.get_probability_of_item_set({item})
                pu = trans.get_positive_utility_of_item_set({item})
                nu = trans.get_negative_utility_of_item_set({item})
                ru = trans.get_remaining_utility_of_item_set({item})
                utility_values: Utilities = Utilities(trans.id, pro, pu, nu, ru)
                utility_values_list.append(utility_values)
        # print(pnu_list)
        lists.append(pnu_list)

    # Create EUCS
    eucs_dict: dict[frozenset[Item], int] = create_eucs_dict(new_distinct_items, newDb)

    root = AbstractList({}, list())
    # Call algorithm 2
    output = list()
    searching_procedure(root, lists, minUtility, minPro, newDb, eucs_dict, output)
    return output

In [10161]:
# a = Item("a", 6)
# b = Item("b", 7)
# c = Item("c", 1)
# d = Item("d", -5)
# e = Item("e", 3)

# item_list = [a, b, c, d, e]

# t1_trans_items = {TransItem(b, 3, 0.85), TransItem(c, 1, 1.0), TransItem(d, 2, 0.70)}

# t2_trans_items = {
#     TransItem(a, 1, 1.0),
#     TransItem(b, 1, 0.60),
#     TransItem(c, 3, 0.75),
#     TransItem(e, 1, 0.40),
# }

# t3_trans_items = {
#     TransItem(a, 1, 0.55),
#     TransItem(b, 2, 0.60),
#     TransItem(c, 4, 1.0),
#     TransItem(d, 1, 0.90),
#     TransItem(e, 5, 0.40),
# }

# t4_trans_items = {TransItem(b, 3, 0.90), TransItem(d, 1, 0.45)}

# t5_trans_items = {
#     TransItem(a, 4, 1.0),
#     TransItem(c, 3, 0.85),
#     TransItem(d, 2, 0.70),
#     TransItem(e, 2, 0.45),
# }

# t1 = Transaction(1, t1_trans_items)
# t2 = Transaction(2, t2_trans_items)
# t3 = Transaction(3, t3_trans_items)
# t4 = Transaction(4, t4_trans_items)
# t5 = Transaction(5, t5_trans_items)
# database = [t1, t2, t3, t4, t5]

BASED ON APRIORI - BFS


In [10162]:
def is_phui(
    item_set: set[Item], database: list[Transaction], min_util: int, min_pro: float
):
    utility = 0
    probability = 0
    for trans in database:
        if trans.contains_item_set(item_set):
            utility += trans.get_positive_utility_of_item_set(item_set) + trans.get_negative_utility_of_item_set(item_set)
            probability += trans.get_probability_of_item_set(item_set)
    return utility >= min_util and round(probability, 3) >= min_pro * len(database)

In [10163]:
def calculate_utilities_of_item_set(
    item_set: set[Item], database: list[Transaction]
) -> tuple[float, int, int, int]:
    p = 0
    u = 0
    ru = 0
    twu = 0
    for trans in database:
        if trans.contains_item_set(item_set):
            p += trans.get_probability_of_item_set(item_set)
            u += trans.get_positive_utility_of_item_set(item_set) + trans.get_negative_utility_of_item_set(item_set)
            ru += trans.get_remaining_utility_of_item_set(item_set)
            twu += calculate_positive_utility_of_transaction(trans)
    return p, u, ru, twu

In [10164]:
def is_htwui(item_set: set[Item], database: list[Transaction], min_util: int, min_prob: float):
    p, u, ru, twu = calculate_utilities_of_item_set(item_set, database)
    return twu >= min_util and round(p, 3) >= min_prob * len(database)

In [10165]:
def find_size_one_htwui(
    database: list[Transaction],
    item_list: set[Item],
    min_util: int,
    min_prob: float,
    phui_list: list[set[Item]],
) -> list[set[Item]]:
    htwui_size_one_list: list[set[Item]] = list()
    for item in item_list:
        p, u, ru, twu = calculate_utilities_of_item_set({item}, database)
        if twu >= min_util and round(p, 3) >= min_prob * len(database):
            htwui_size_one_list.append({item})
            if u >= min_util:
                phui_list.append({item})
    return htwui_size_one_list

In [10166]:
def find_size_k_htwui(
    database: list[Transaction],
    size_k_candidates: list[set[Item]],
    min_util: int,
    min_prob: float,
    phui_list: list[set[Item]]
) -> list[set[Item]]:
    htwui_size_k_list: list[set[Item]] = list()
    for item_set in size_k_candidates:
        p, u, ru, twu = calculate_utilities_of_item_set(item_set, database)
        if twu >= min_util and round(p, 3) >= min_prob * len(database):
            htwui_size_k_list.append(item_set)
            if u >= min_util:
                phui_list.append(item_set)
    return htwui_size_k_list

In [10167]:
def generate_size_k_candidates(
    prev_htwui: list[set[Item]],
    item_list: set[Item],
    database: list[Transaction],
    min_util: int,
    min_prob: float,
) -> list[set[Item]]:
    distinct_items = set().union(*prev_htwui)
    size_k_candidate: list[set[Item]] = list()
    for twui in prev_htwui:
        for item in distinct_items.difference(twui):
            p, u, ru, twu = calculate_utilities_of_item_set({item}, database)
            if twu >= min_util:
                if twui | {item} not in size_k_candidate:
                    size_k_candidate.append(twui | {item})
        for item in set(item_list).difference(distinct_items):
            p, u, ru, twu = calculate_utilities_of_item_set({item}, database)
            if twu >= min_util and round(p, 3) >= len(database) * min_prob:
                if twui | {item} not in size_k_candidate:
                    size_k_candidate.append(twui | {item})
    return size_k_candidate

In [10168]:
def high_utility_mining_apriori(
    database: list[Transaction], item_list: set[Item], min_util: int, min_prob: float
):
    htwui_list: list[list[set[Item]]] = list()
    phui_list: list[set[Item]] = list()
    size_1_htwui: list[set[Item]] = find_size_one_htwui(
        database, item_list, min_util, min_prob, phui_list
    )
    htwui_list.append(size_1_htwui)
    while htwui_list[-1]:
        candidate_size_k: list[set[Item]] = generate_size_k_candidates(
            htwui_list[-1], item_list, database, min_util, min_prob
        )
        size_k_htwui: list[set[Item]] = find_size_k_htwui(
            database, candidate_size_k, min_util, min_prob, phui_list
        )
        htwui_list.append(size_k_htwui)
    return phui_list

In [10169]:
class PHUNode:
    def __init__(
        self,
        item_set: set[Item] = None,
        utility: int = 0,
        ru: int = 0,
        prob: float = 0.0,
        children: list["PHUNode"] = None,
        parent: "PHUNode" = None,
    ):
        self.item_set = item_set if item_set is not None else set()
        self.utility = utility
        self.ru = ru
        self.prob = prob
        self.children = children if children is not None else []
        self.parent = parent

    def add_child(self, child: "PHUNode") -> None:
        self.children.append(child)

    def is_leaf(self) -> bool:
        return len(self.children) == 0

    def get_total_utility(self) -> int:
        total = self.utility
        for child in self.children:
            total += child.get_total_utility()
        return total

    def get_right_sibling(self):
        if self.parent is None:
            return []

        siblings = self.parent.children

        if not siblings or self not in siblings:
            return []

        current_index = siblings.index(self)
        if current_index < len(siblings) - 1:
            return siblings[current_index + 1 :]
        else:
            return []

    def __repr__(self) -> str:
        return f"{self.item_set}"

    def print_children(self, level=0):
        print("  " * level + f"{self.item_set}")
        for child in self.children:
            child.print_children(level + 1)

In [10170]:
def sort_children_by_twu_and_utility(items: list[PHUNode]) -> list[PHUNode]:
    def sort_key(node: PHUNode) -> tuple:
        item = next(iter(node.item_set))
        return (0 if item.twu > 0 else 1, item.twu)
    return sorted(items, key=sort_key)

In [10171]:
def build_subtree(
    node_x: PHUNode,
    database: list[Transaction],
    min_util: int,
    min_prob: float,
    collection: list[set[Item]],
):
    if not is_htwui(node_x.item_set, database, min_util, min_prob):
        return
    right_siblings = node_x.get_right_sibling()
    generates: list[PHUNode] = list()
    for node_y in right_siblings:
        if is_htwui(node_y.item_set, database, min_util, min_prob):
            xy = (node_x.item_set - node_y.item_set).union(
                node_y.item_set - node_x.item_set
            )
            xy_twu = calculate_transaction_weight_utility(xy, database)
            # EUCP Pruning
            if xy_twu >= min_util:
                z_item_set = node_x.item_set.union(node_y.item_set)
                z_p, z_u, z_ru, twu = calculate_utilities_of_item_set(z_item_set, database)
                node_z = PHUNode(z_item_set, z_u, z_ru, z_p, [], node_x)
                node_x.add_child(node_z)
                if round(z_p, 3) >= min_prob * len(database):
                    generates.append(node_z)
                    if z_u >= min_util:
                        collection.append(z_item_set)
    for gen_node in generates:
        build_subtree(gen_node, database, min_util, min_prob, collection)

In [10172]:
def build_tree(
    database: list[Transaction], item_list: set[Item], min_util: int, min_prob: float
):
    root: PHUNode = PHUNode()
    collection: list[set[Item]] = list()
    for item in item_list:
        p, u, ru, twu = calculate_utilities_of_item_set({item}, database)
        if round(p, 3) >= min_prob * len(database) and twu >= min_util:
            new_node = PHUNode({item}, u, ru, p, [], root)
            root.add_child(new_node)
            item.twu = twu
            if u >= min_util:
                collection.append({item})
        root.children = sort_children_by_twu_and_utility(root.children)
    for child in root.children:
        build_subtree(child, database, min_util, min_prob, collection)
    return collection

In [10173]:
# item_list = [Item(f"i{i+1}", random.randint(-10, 10)) for i in range(10)]

# database = list()
# for transaction_id in range(1, 21):
#     trans_items = set()
#     selected_items = random.sample(item_list, random.randint(1, 5))
#     for item in selected_items:
#         quantity = random.randint(1, 10)
#         probability = round(random.uniform(0.1, 1.0), 2)
#         trans_item = TransItem(item, quantity, probability)
#         trans_items.add(trans_item)

#     transaction = Transaction(transaction_id, trans_items)
#     database.append(transaction)

# database

In [10174]:
def find_database_projection(item_set: set[Item], database: list[Transaction]):
    db_projection = list()
    for trans in database:
        if trans.contains_item_set(item_set):
            db_projection.append(trans)
    return db_projection

In [10175]:
def calculate_utility_of_item_set_in_trans(items: set[Item], trans: Transaction):
    u = 0
    for item in items:
        u += item.utility * trans.get_quantity_of_item(item)
    return u

In [10176]:
def calculate_local_utility(alpha: set[Item], item: Item, database: list[Transaction]):
    lu = 0
    for trans in database:
        if trans.contains_item_set(alpha | {item}):
            lu += trans.get_utility_of_item_set(alpha) + trans.get_remaining_utility_of_item_set(alpha)
    return lu

In [10177]:
def calculate_subtree_utility(
    alpha: set[Item], item: Item, database: list[Transaction], secondary: set[Item]
):
    su = 0
    # print(
    #     str(item)
    #     + " > "
    #     + str(alpha)
    #     + " ? "
    #     + str(check_order_item_and_set(item, alpha))
    # )
    if check_order_item_and_set(item, alpha):
        for trans in database:
            if trans.contains_item_set(alpha | {item}):
                su += (
                    trans.get_utility_of_item_set(alpha)
                    + trans.get_utility_of_item_set({item})
                    + trans.get_remaining_utility_of_item_set(set(alpha | {item}))
                )

    return su

In [10178]:
def search_procedure(
    alpha: set[Item],
    alpha_db: list[Transaction],
    alpha_primary: list[Item],
    alpha_secondary: list[Item],
    min_util: int,
    user_prob_threshold: float,
    result: list[set[Item]],
):
    # print("Alpha: " + str(alpha))
    for pri_item in alpha_primary:
        # print("PriItem: " + str(pri_item))
        beta: set[Item] = alpha | {pri_item}
        # print("Beta: " + str(beta))
        p, u, ru, twu = calculate_utilities_of_item_set(beta, alpha_db)
        if u >= min_util and round(p, 3) >= user_prob_threshold:
            if beta not in result:
                result.append(beta)

        beta_dp = find_database_projection(beta, alpha_db)
        # print("Projection Database")
        # for trans in beta_dp:
        #     print(trans)
        beta_secondary = list()
        beta_primary = list()

        for sec_item in alpha_secondary:
            if sec_item != pri_item:
                lu = calculate_local_utility(beta, sec_item, beta_dp)
                su = calculate_subtree_utility(beta, sec_item, beta_dp, alpha_secondary)
                p, u, ru, twu = calculate_utilities_of_item_set({sec_item}, beta_dp)
                # print("SecondItem: " + str(sec_item) + ", (lu, su) = " + str(lu) + "," + str(su))
                if round(p, 3) >= user_prob_threshold and lu >= min_util:
                    beta_secondary.append(sec_item)
                    if su >= min_util:
                        beta_primary.append(sec_item)
        # print("=> Secondary: " + str(beta_secondary))
        # print("=> Primary: " + str(beta_primary))
        search_procedure(
            beta,
            beta_dp,
            beta_primary,
            beta_secondary,
            min_util,
            user_prob_threshold,
            result,
        )

In [10179]:
def mining(
    database: list[Transaction], item_list: list[Item], min_util: int, min_prob: float
):
    db = copy.deepcopy(database)
    user_prob_threshold = min_prob * len(database)
    alpha = set()
    primary = list()
    secondary = list()
    for i in item_list:
        p, u, ru, twu = calculate_utilities_of_item_set({i}, db)
        lu = calculate_local_utility(alpha, i, db)
        su = calculate_subtree_utility(alpha, i, db, set(secondary))
        if round(p, 3) >= user_prob_threshold and lu >= min_util:
            secondary.append(i)
            if su >= min_util:
                primary.append(i)
    removed = set(item_list).difference(secondary)
    secondary = sort_items_by_twu_and_utility(secondary)
    for trans in db:
        for item in removed:
            trans.get_items().discard(item)
    result = list()
    search_procedure(
        alpha, db, primary, secondary, min_util, user_prob_threshold, result
    )
    return result

In [10180]:
def test():
    for i in range(0, 2):
        item_list = [Item(f"i{i+1}", random.randint(-10, 10)) for i in range(40)]
        database = list()
        for transaction_id in range(1, 101):
            trans_items = set()
            selected_items = random.sample(item_list, random.randint(1, 20))
            for item in selected_items:
                quantity = random.randint(1, 20)
                probability = round(random.uniform(0.1, 1.0), 2)
                trans_item = TransItem(item, quantity, probability)
                trans_items.add(trans_item)

            transaction = Transaction(transaction_id, trans_items)
            database.append(transaction)

            t0 = datetime.datetime.now()
            a = preparation_procedure(database, item_list, 150, 0.1)
            t1 = datetime.datetime.now()
            print("PHUI: " + str(len(a)) + ": " + str(t1 - t0))
            # b = build_tree(database, item_list, 150, 0.01)
            t2 = datetime.datetime.now()
            # print("PHUTree: " + str(len(b)) + ": " + str(t2 - t1))
            # c = high_utility_mining_apriori(database, item_list, 150, 0.01)
            t3 = datetime.datetime.now()
            # print("Apriori: " + str(len(c)) + ": " + str(t3 - t2))
            d = mining(database, item_list, 150, 0.1)
            t4 = datetime.datetime.now()
            print("EFIM: " + str(len(d)) + ": " + str(t4 - t3))

In [10181]:
import random

item_list = [
    Item(
        f"i{i+1}",
        random.randint(-10, 10),
    )
    for i in range(10)
]
database = list()
for transaction_id in range(1, 21):
    trans_items = set()
    selected_items = random.sample(item_list, random.randint(5, 10))
    for item in selected_items:
        quantity = random.randint(1, 10)
        probability = round(random.uniform(0.1, 1.0), 2)
        trans_item = TransItem(item, quantity, probability)
        trans_items.add(trans_item)
    transaction = Transaction(transaction_id, trans_items)
    database.append(transaction)
for i in item_list:
    i.twu = calculate_transaction_weight_utility({i}, database)

In [10182]:
min_util = 150
threshold = 0.02
rs1 = preparation_procedure(database, item_list, min_util, threshold)
print(rs1)

rs2 = mining(database, item_list, min_util, threshold)
print(rs2)

if len(rs1) != len(rs2):
    print(0.01 * len(database))
    for i in rs1:
        if i not in rs2:
            print(str(i) + ": " + str(calculate_utilities_of_item_set(i, database)))
else:
    print("The result is the same")

[{i4}, {i4, i7}, {i4, i10, i7}, {i4, i7, i3, i10}, {i1, i4, i7, i3, i10}, {i8, i4, i7, i3, i10}, {i4, i7, i2, i10}, {i8, i4, i7, i2, i10}, {i4, i5, i7, i10}, {i4, i7, i9, i10}, {i4, i8, i7, i10}, {i4, i7, i3}, {i4, i7, i3, i2}, {i8, i4, i3, i7, i2}, {i4, i5, i7, i3}, {i8, i5, i4, i7, i3}, {i4, i9, i7, i3}, {i4, i1, i7, i3}, {i4, i6, i7, i3}, {i4, i8, i7, i3}, {i4, i7, i2}, {i4, i5, i7, i2}, {i4, i9, i7, i2}, {i4, i1, i7, i2}, {i4, i6, i7, i2}, {i4, i8, i7, i2}, {i4, i5, i7}, {i4, i7, i9}, {i4, i8, i7}, {i4, i10}, {i4, i10, i3}, {i4, i3, i10, i2}, {i9, i4, i3, i2, i10}, {i1, i4, i3, i2, i10}, {i6, i4, i3, i2, i10}, {i8, i4, i3, i2, i10}, {i4, i5, i3, i10}, {i8, i5, i4, i3, i10}, {i4, i9, i3, i10}, {i4, i8, i3, i10}, {i4, i10, i2}, {i4, i5, i2, i10}, {i4, i10, i5}, {i4, i3}, {i4, i2, i3}, {i4, i3, i5, i2}, {i4, i3, i9, i2}, {i4, i1, i3, i2}, {i4, i6, i3, i2}, {i4, i8, i3, i2}, {i4, i5, i3}, {i4, i9, i5, i3}, {i4, i8, i5, i3}, {i4, i9, i3}, {i4, i8, i3}, {i4, i2}, {i4, i5, i2}, {i4, i5}, 