In [90]:
from collections import namedtuple
import copy

Utilities = namedtuple("Utilities", ["tid", "pu", "nu", "ru"])

class Item:
    def __init__(self, item: str, utility: int):
        self.item = item
        self.utility = utility
        self._twu = 0

    @property
    def twu(self) -> int:
        return self._twu

    @twu.setter
    def twu(self, value: int) -> None:
        self._twu = value

    def __repr__(self):
        # return f"({self.item},{self.twu})"
        return f"{self.item}"

    def __eq__(self, other):
        if isinstance(other, Item):
            return self.item == other.item and self.utility == other.utility
        return False

    def __hash__(self):
        return hash((self.item, self.utility))


class Transaction:
    def __init__(self, id: int, items_quantities: dict):
        if any(q <= 0 for q in items_quantities.values()):
            raise ValueError(f"Quantities in trans{id} must be positive integers.")
        self.id = id
        self.items_quantities = items_quantities

    def __repr__(self):
        return f"(tid = {self.id}, frequencies = {self.items_quantities})"

class AbstractList:
    def __init__(self, items: set[Item], utility_values: list[Utilities]):
        self.items = items
        self.utility_values = utility_values

    def get_ru(self):
        ru = 0
        for i in self.utility_values:
            ru += i.ru
        return ru

    def get_pu(self):
        pu = 0
        for i in self.utility_values:
            pu += i.pu
        return pu


class PNUList(AbstractList):
    def __init__(self, items: set[Item], utility_values: list[Utilities]):
        super().__init__(items, utility_values)

    def __repr__(self):
        if not self.utility_values:
            return "Empty PNU-List"

        # Column titles
        titles = ["PU", "NU", "RU"]

        # Get the number of columns from the first utility value
        if isinstance(self.utility_values[0], (list, tuple)):
            num_columns = len(self.utility_values[0])
        else:
            num_columns = 1

        # Create combined items string
        items_str = ",".join(str(item) for item in self.items)
        items_str = "(" + items_str + ")"

        # Calculate column widths based on utility values and titles
        value_widths = []
        for i in range(num_columns):
            max_width = max(
                len(str(row[i])) if isinstance(row, (list, tuple)) else len(str(row))
                for row in self.utility_values
            )
            # Consider width of titles and combined items
            if i == 0:
                max_width = max(max_width, len(items_str))
            else:
                max_width = max(max_width, len(titles[i - 1]))
            value_widths.append(max_width)

        # Build the table string
        result = []

        # Add border
        total_width = sum(value_widths) + 3 * num_columns + 1
        result.append("-" * total_width)

        # Add single row with all items and titles
        row = "|"
        row += f" {items_str.rjust(value_widths[0])} |"
        for i in range(1, num_columns):
            row += f" {titles[i-1].center(value_widths[i])} |"
        result.append(row)

        # Add separator
        result.append("-" * total_width)

        # Add utility values
        for utility in self.utility_values:
            row = "|"
            if isinstance(utility, (list, tuple)):
                for i, value in enumerate(utility):
                    row += f" {str(value).rjust(value_widths[i])} |"
            else:
                row += f" {str(utility).rjust(value_widths[0])} |"
            result.append(row)

        # Add bottom border
        result.append("-" * total_width)
        return "\n".join(result)

# Subclass MList inheriting from AbstractList
class MList(AbstractList):

    def __init__(
        self,
        items: set[Item],
        true_items: set[Item],
        prefix: PNUList,
        utility_values: list[Utilities],
        ru: int,
        pu: int,
    ):
        super().__init__(items, utility_values)
        self.true_items = true_items
        self.prefix = prefix 
        self.ru = ru
        self.pu = pu

    def __repr__(self):
        return (f"MList(items={repr(self.items)}, "
                f"true_items={repr(self.true_items)}, "
                f"utility_values={repr(self.utility_values)}, "
                f"ru={self.ru}, pu={self.pu})")

## Table 1  
**Database**

| Tid  | Item        | Quantity  |
|------|-------------|-----------|
| T₁   | a b c d     | 5 2 1 2   |
| T₂   | a c d g     | 1 1 1 3   |
| T₃   | a c f       | 1 1 1     |
| T₄   | a f g       | 1 4 2     |
| T₅   | a g         | 1 2       |
| T₆   | b c d e     | 3 2 3 1   |
| T₇   | c e         | 6 4       |
| T₈   | e f         | 1 3       |

---
## Table 2  
**Unit Utility**

| Item   | a | b | c  | d  | e  | f  | g  |
|--------|---|---|----|----|----|----|----|
| Utility| 3 | 6 | -3 | 12 | -5 | -2 | -1 |


In [91]:
a = Item("A", 3)
b = Item("B", 6)
c = Item("C", -3)
d = Item("D", 12)
e = Item("E", -5)
f = Item("F", -2)
g = Item("G", -1)

trans1 = Transaction(1, dict(zip([a, b, c, d], [5, 2, 1, 2])))
trans2 = Transaction(2, dict(zip([a, c, d, g], [1, 1, 1, 3])))
trans3 = Transaction(3, dict(zip([a, c, f], [1, 1, 1])))
trans4 = Transaction(4, dict(zip([a, f, g], [1, 4, 2])))
trans5 = Transaction(5, dict(zip([a, g], [1, 2])))
trans6 = Transaction(6, dict(zip([b, c, d, e], [3, 2, 3, 1])))
trans7 = Transaction(7, dict(zip([c, e], [6, 4])))
trans8 = Transaction(8, dict(zip([e, f], [1, 3])))

items = [a, b, c, d, e, f, g]
database = [trans1, trans2, trans3, trans4, trans5, trans6, trans7, trans8]
new_database = copy.deepcopy(database)
database

[(tid = 1, frequencies = {A: 5, B: 2, C: 1, D: 2}),
 (tid = 2, frequencies = {A: 1, C: 1, D: 1, G: 3}),
 (tid = 3, frequencies = {A: 1, C: 1, F: 1}),
 (tid = 4, frequencies = {A: 1, F: 4, G: 2}),
 (tid = 5, frequencies = {A: 1, G: 2}),
 (tid = 6, frequencies = {B: 3, C: 2, D: 3, E: 1}),
 (tid = 7, frequencies = {C: 6, E: 4}),
 (tid = 8, frequencies = {E: 1, F: 3})]

Definition 1

In [92]:
def find_all_transactions_contained_item_set(
    item_set: set[Item], db: list[Transaction]
):
    transactionsList: list[Transaction] = list()
    for trans in db:
        if item_set.issubset(trans.items_quantities.keys()):
            transactionsList.append(trans)
    return transactionsList

In [93]:
def find_max_min_avg_periodic_of_item_set(item_set: set[Item], db: list[Transaction]):
    contained_transactions = find_all_transactions_contained_item_set(item_set, db)
    trans_ids: list[int] = [trans.id for trans in contained_transactions]
    m = len(contained_transactions)
    ps: list[int] = list()
    for i in range(0, m + 1):
        if(i == 0):
            ps.append(trans_ids[0]);
        elif(i == m):
            ps.append(len(db) - trans_ids[m - 1])
        else:
            ps.append(trans_ids[i] - trans_ids[i - 1])
    max_per, min_per = ps[0], ps[0]
    avg_per = len(db) / len(ps)
    for i in ps:
        if(i > max_per):
            max_per = i
        if(i < min_per):
            min_per = i
    return max_per, min_per, avg_per

<center>
Definition 4: The positive utility of X in Transaction T as:
<br></br>
PU(X,T) = Σ<sub>i<sub>k</sub>∈X,P(i<sub>k</sub>)>0</sub>(P(i<sub>k</sub>) * q<sub>k</sub>)
</center>

In [94]:
def calculate_positive_utility_of_item_set_in_trans(items: set[Item], trans: Transaction):
    pu = 0
    if(items.issubset(trans.items_quantities.keys())):
        for item in items:
            utility = item.utility 
            if(utility > 0):
                quantity = trans.items_quantities.get(item);
                if(quantity != None):
                    pu += item.utility * quantity
    return pu;

<center>
Definition 4: The positive utility of X in Database D are:
<br></br>
PU(X) = Σ<sub>(X⊆T<sub>j</sub>,T<sub>j</sub>∈D)</sub>PU(X,T<sub>j</sub>)
</center>

In [95]:
def calculate_positive_utility_of_item_set_in_database(items: set[Item], database: list[Transaction]):
    pu = 0
    for trans in database:
        pu += calculate_positive_utility_of_item_set_in_trans(items, trans)
    return pu

<center> Definition 4: The negative utility of X in Transaction T as:
<br></br>
NU(X,T) = Σ<sub>i<sub>k</sub>∈X,P(i<sub>k</sub>)<0</sub>(P(i<sub>k</sub>) * q<sub>k</sub>)
</center>



In [96]:
def calculate_negative_utility_of_item_set_in_trans(items: set[Item], trans: Transaction):
    nu = 0
    if(items.issubset(trans.items_quantities.keys())):
        for item in items:
            utility = item.utility 
            if(utility < 0):
                quantity = trans.items_quantities.get(item);
                if(quantity != None):
                    nu += item.utility * quantity
    return nu;

<center>
Definition 4: The negative utility of X in Database D are:
<br></br>
NU(X) = Σ<sub>(X⊆T<sub>j</sub>,T<sub>j</sub>∈D)</sub>NU(*X,T<sub>j</sub>)
</center>

In [97]:
def calculate_negative_utility_of_item_set_in_database(items: set[Item], database: list[Transaction]):
    nu = 0
    for trans in database:
        nu += calculate_negative_utility_of_item_set_in_trans(items, trans)
    return nu

In [98]:
def calculate_positive_utility_of_transaction(trans: Transaction):
    pu = 0;
    items : set[Item] = trans.items_quantities.keys()
    for item in items:
        if(item.utility > 0):
            pu += trans.items_quantities.get(item) * item.utility
    return pu;

<center>
Definition 6 (Transaction Weighted Utility TWU in N-database):
<br></br>
TWU(X) = Σ<sub>X⊆T<sub>j</sub>,T<sub>j</sub>∈D</sub>PU(T<sub>j</sub>)
</center>

In [99]:
def calculate_transaction_weight_utility(items: set[Item], database: list[Transaction]):
    twu = 0
    for trans in database:
        trans_items = trans.items_quantities.keys()
        if(items.issubset(trans_items)):
            twu += calculate_positive_utility_of_transaction(trans)
    return twu

In [100]:
# a > b
def check_order_condition(a: Item, b: Item):
    if(a.utility < 0 and b.utility > 0):
        return True
    elif(a.utility * b.utility > 0):
        return a.twu > b.twu
    return False

In [101]:
def check_order_item_and_set(ik: Item, X: set[Item]) -> bool:
    for i in X:
        if i != ik and check_order_condition(ik, i) == False:
            return False
    return True

<center>
Definition 8: The remaining utility of 𝑋 in T<sub>j</sub>
<br></br>
RU(X) = Σ<sub>X⊆T<sub>j</sub>,T<sub>j</sub>∈D</sub>RU(X, T<sub>j</sub>)</center>

In [102]:
def calculate_remaining_utility_of_item_set_in_trans(
    items: set[Item], trans: Transaction
):
    ru = 0
    trans_items: set[Item] = trans.items_quantities.keys()
    for item in trans_items:
        if item.utility > 0:
            if check_order_item_and_set(item, items) == True:
                if item not in items:
                    ru += item.utility * trans.items_quantities.get(item)
    return ru

In [103]:
for i in items:
    i.twu = calculate_transaction_weight_utility({i}, database)

In [104]:
def sort_items_by_twu_and_utility(items: list[Item]) -> list[Item]:
    def sort_key(item: Item) -> tuple:
        return (0 if item.utility > 0 else 1, item.twu)
    return sorted(items, key=sort_key)

In [105]:
items = sort_items_by_twu_and_utility(items)

In [106]:
def calculate_remaining_utility_of_item_set_in_database(items: set[Item], database: list[Transaction]):
    ru = 0
    for trans in database:
        if(items.issubset(trans.items_quantities.keys())):
            ru += calculate_remaining_utility_of_item_set_in_trans(items, trans)
    return ru

In [107]:
def calculate_utility_of_item_set_in_database(
    items: set[Item], database: list[Transaction]
):
    u = 0
    for trans in database:
        if(items.issubset(trans.items_quantities.keys())):
            for item in items:
                u += item.utility * trans.items_quantities.get(item)
    return u

In [108]:
from itertools import chain

def gather_item_set_from_database(database: list[Transaction]) -> set[Item]:
    return set(chain.from_iterable(trans.items_quantities.keys() for trans in database))

In [109]:
def create_eucs_dict(arr: list[Item], database: list[Transaction]) -> dict[frozenset[Item], int]:
    n = len(arr)
    eucs_dict = {}
    for i in range(n):
        for j in range(n):
            if i < j:
                item_pair = frozenset({arr[i], arr[j]})
                twu_value = calculate_transaction_weight_utility(item_pair, database)
                eucs_dict[item_pair] = twu_value
    return eucs_dict

In [110]:
def find_tuple_by_trans_id(P: PNUList, target_trans_id: int) -> Utilities:
    utilities_list: list[Utilities] = P.utility_values
    for iTuple in utilities_list:
        if iTuple.tid == target_trans_id:
            return iTuple
    return None

In [111]:
def construct(P: AbstractList, Px: AbstractList, Py: AbstractList):
    # print("start construct function")
    # print(Px)
    # print(Py)
    x = Px.items
    y = Py.items
    xy = x | y
    utilities_list: list[Utilities] = list()
    Pxy = AbstractList(xy, utilities_list)
    utilities_list_of_px: list[Utilities] = Px.utility_values
    for xTuple in utilities_list_of_px:
        yTuple: Utilities = find_tuple_by_trans_id(Py, xTuple.tid)
        if yTuple is not None:
            if P.utility_values:
                pTuple: Utilities = find_tuple_by_trans_id(P, xTuple.tid)
                xyTuple: Utilities = Utilities(
                    xTuple.tid,
                    xTuple.pu + yTuple.pu - pTuple.pu,
                    xTuple.nu + yTuple.nu - pTuple.nu,
                    yTuple.ru,
                )
                utilities_list.append(xyTuple)
            else:
                xyTuple: Utilities = Utilities(
                    xTuple.tid,
                    xTuple.pu + yTuple.pu,
                    xTuple.nu + yTuple.nu,
                    yTuple.ru,
                )
                utilities_list.append(xyTuple)
    return Pxy

In [112]:
#Y > X
def check_order_of_sets(Y: set[Item], X: set[Item]):
    for y in Y:
        if y not in X and check_order_item_and_set(y, X) == False:
            return False
    return True

In [113]:
def generate_mlist(X: PNUList, Y: PNUList, P: PNUList, du: int):
    z = X.items | Y.items
    mlist = MList(z, Y.items, P, Y.utility_values, du, 0)
    return mlist

In [114]:
def searching_procedure(
    PList: PNUList,
    lists: list[PNUList],
    minUtil,
    minPer,
    maxPer,
    minAvg,
    maxAvg,
    database: list[Transaction],
    eucs_dict: dict[frozenset[Item], int],
    output: list[set[Item]],
):
    for i in range(0, len(lists) - 1):
        XList: PNUList = lists[i]
        # print("X: " + str(XList.items))
        utility = calculate_utility_of_item_set_in_database(set(XList.items), database)
        remaining_utility = XList.get_ru()
        max_per, min_per, avg_per = find_max_min_avg_periodic_of_item_set(
            set(XList.items), database
        )
       
        if (
            utility >= minUtil
            and min_per >= minPer
            and max_per <= maxPer
            and avg_per >= minAvg
            and avg_per <= maxAvg
        ):
            output.append(XList.items)
        if (
            remaining_utility + utility >= minUtil
            and max_per <= maxPer
            and avg_per >= minAvg
            and avg_per <= maxAvg
        ):
            new_lists: list[PNUList] = list()
            for j in range(i + 1, len(lists)):
                YList: PNUList = lists[j]
                if True:
                    x = XList.items.difference(PList.items)
                    y = YList.items.difference(PList.items)
                    key = frozenset(x | y)
                    twu_value = eucs_dict.get(key)
                    if twu_value >= minUtil:
                        ZList = construct(PList, XList, YList)
                        new_lists.append(ZList)
                        print(
                            str(XList.items)
                            + " U "
                            + str(YList.items)
                            + " = "
                            + str(ZList.items)
                        )
            print()
            searching_procedure(
                XList,
                new_lists,
                minUtil,
                minPer,
                maxPer,
                minAvg,
                maxAvg,
                database,
                eucs_dict,
                output,
            )

In [115]:
def calculate_dynamic_upper_bound(
    Y: AbstractList, X: AbstractList, database: list[Transaction]
):
    x = X.items - Y.items
    return (
        Y.get_ru() + Y.get_pu() + calculate_utility_of_item_set_in_database(x, database)
    )

In [116]:
def searching_procedure_plus(
    PList: AbstractList,
    lists: list[AbstractList],
    minUtil,
    minPer,
    maxPer,
    minAvg,
    maxAvg,
    db: list[Transaction],
    eucs_dict: dict[frozenset[Item], int],
    output: list[set[Item]],
):
    for i in range(0, len(lists) - 1):
        XList: AbstractList = lists[i]
        utility = calculate_utility_of_item_set_in_database(set(XList.items), db)
        remaining_utility = XList.get_ru()
        max_per, min_per, avg_per = find_max_min_avg_periodic_of_item_set(
            set(XList.items), db
        )
        if (
            utility >= minUtil
            and min_per >= minPer
            and max_per <= maxPer
            and avg_per >= minAvg
            and avg_per <= maxAvg
        ):
            output.append(XList.items)
        if (
            remaining_utility + utility >= minUtil
            and max_per <= maxPer
            and avg_per >= minAvg
            and avg_per <= maxAvg
        ):
            new_lists: list[AbstractList] = list()
            for j in range(i + 1, len(lists)):
                YList: AbstractList = lists[j]
                x = XList.items.difference(PList.items)
                y = YList.items.difference(PList.items)
                key = frozenset(x | y)
                twu_value = eucs_dict.get(key)
                if twu_value >= minUtil:
                    du = calculate_dynamic_upper_bound(YList, XList, db)
                    if du >= minUtil:
                        if isinstance(YList, MList):
                            ZList = construct(YList.prefix, XList, YList)
                            print(
                                str(XList.items)
                                + " U "
                                + str(YList.items)
                                + " = "
                                + str(ZList.items)
                            )
                            new_lists.append(ZList)
                        else:
                            ZList = construct(PList, XList, YList)
                            print(
                                str(XList.items)
                                + " U "
                                + str(YList.items)
                                + " = "
                                + str(ZList.items)
                            )
                            new_lists.append(ZList)
                    else:
                        if isinstance(YList, MList):
                            ZMlist = generate_mlist(XList, YList, None, du)
                            new_lists.append(ZMlist)
                        else:
                            ZMlist = generate_mlist(XList, YList, PList, du)
                        new_lists.append(ZMlist)
            print()
            searching_procedure_plus(
                XList,
                new_lists,
                minUtil,
                minPer,
                maxPer,
                minAvg,
                maxAvg,
                db,
                eucs_dict,
                output,
            )

In [117]:
def preparation_procedure(
    db: list[Transaction],
    item_list: list[Item],
    minUtility,
    minPer,
    maxPer,
    minAvg,
    maxAvg,
    isPlus: bool
):
    removed_list: list[Item] = list()
    # Check TWU & Periodic condition
    for item in item_list:
        item_max_per, item_min_per, item_avg_per = (
            find_max_min_avg_periodic_of_item_set({item}, db)
        )
        twu = calculate_transaction_weight_utility({item}, db)
        if (
            twu < minUtility
            or item_min_per < minPer
            or item_max_per > maxPer
            or item_avg_per < minAvg
            or item_avg_per > maxAvg
        ):
            removed_list.append(item)
        else:
            item.twu = twu

    # Remove unqualified item
    new_distinct_items = [item for item in item_list if item not in removed_list]
    newDb = copy.deepcopy(db)

    # Remove unqualified item from transaction
    for trans in newDb:
        for item in removed_list:
            trans.items_quantities.pop(item, None)
        if len(trans.items_quantities.keys()) == 0:
            newDb.remove(trans)

    # Create list[PNUList], prepare for algorithm 2
    lists: list[AbstractList] = list()

    for item in new_distinct_items:
        utility_values_list: list[tuple] = list()
        pnu_list = AbstractList({item}, utility_values_list)
        for trans in newDb:
            if {item}.issubset(trans.items_quantities.keys()):
                pu = calculate_positive_utility_of_item_set_in_trans({item}, trans)
                nu = calculate_negative_utility_of_item_set_in_trans({item}, trans)
                ru = calculate_remaining_utility_of_item_set_in_trans({item}, trans)
                utility_values: Utilities = Utilities(trans.id, pu, nu, ru)
                utility_values_list.append(utility_values)
        lists.append(pnu_list)

    # Create EUCS
    eucs_dict: dict[frozenset[Item], int] = create_eucs_dict(
        new_distinct_items, database
    )

    root = AbstractList({}, list())
    # Call algorithm 2
    output = []
    if(isPlus == True):
        searching_procedure_plus(
            root,
            lists,
            minUtility,
            minPer,
            maxPer,
            minAvg,
            maxAvg,
            newDb,
            eucs_dict,
            output,
        )
    else:
        searching_procedure(
            root,
            lists,
            minUtility,
            minPer,
            maxPer,
            minAvg,
            maxAvg,
            newDb,
            eucs_dict,
            output,
        )
    return output

In [118]:
preparation_procedure(database, items, 30, 1, 5, 1, 3, True)

{A} U {B} = {A, B}
{A} U {D} = {A, D}

{A, D} U {A, C} = {C, A, D}

{B} U {D} = {B, D}
{B} U {C} = {C, B}

{B, D} U {C, B} = {C, B, D}

{D} U {C} = {C, D}



[{A, D}, {B}, {B, D}, {D}]

In [119]:
preparation_procedure(database, items, 30, 1, 5, 1, 3, False)

{A} U {B} = {A, B}
{A} U {D} = {A, D}
{A} U {C} = {A, C}

{A, D} U {A, C} = {C, A, D}

{B} U {D} = {B, D}
{B} U {C} = {C, B}

{B, D} U {C, B} = {C, B, D}

{D} U {C} = {C, D}



[{A, D}, {B}, {B, D}, {D}]