In [1]:
from collections import namedtuple

Utilities = namedtuple('Utilities', ['tid', 'pu', 'nu', 'ru'])

class Item:
    def __init__(self, item: str, utility: int):
        self.item = item
        self.utility = utility
        self._twu = 0

    @property
    def twu(self) -> int:
        return self._twu
    
    @twu.setter
    def twu(self, value: int) -> None:
        
        self._twu = value

    def __repr__(self):
        # return f"({self.item},{self.twu})"
        return f"{self.item}"

class Transaction:
    def __init__(self, id: int, items_quantities: dict):
        if any(q <= 0 for q in items_quantities.values()):
            raise ValueError(f"Quantities in trans{id} must be positive integers.")
        self.id = id
        self.items_quantities = items_quantities

    def __repr__(self):
        return f"(tid = {self.id}, frequencies = {self.items_quantities})"

class PNUList:
    def __init__(self, items: set[Item], utility_values: list[Utilities]):
        self.items = items
        self.utility_values = utility_values

    def add_item(self, item):
        self.items.add(item)

    def __repr__(self):
        if not self.utility_values:
            return "Empty PNU-List"

        # Column titles
        titles = ["PU", "NU", "RU"]

        # Get the number of columns from the first utility value
        if isinstance(self.utility_values[0], (list, tuple)):
            num_columns = len(self.utility_values[0])
        else:
            num_columns = 1

        # Create combined items string
        items_str = ",".join(str(item) for item in self.items)
        items_str = "(" + items_str + ")"
        
        # Calculate column widths based on utility values and titles
        value_widths = []
        for i in range(num_columns):
            max_width = max(
                len(str(row[i])) if isinstance(row, (list, tuple)) else len(str(row))
                for row in self.utility_values
            )
            # Consider width of titles and combined items
            if i == 0:
                max_width = max(max_width, len(items_str))
            else:
                max_width = max(max_width, len(titles[i - 1]))
            value_widths.append(max_width)

        # Build the table string
        result = []

        # Add border
        total_width = sum(value_widths) + 3 * num_columns + 1
        result.append("-" * total_width)

        # Add single row with all items and titles
        row = "|"
        row += f" {items_str.rjust(value_widths[0])} |"
        for i in range(1, num_columns):
            row += f" {titles[i-1].center(value_widths[i])} |"
        result.append(row)

        # Add separator
        result.append("-" * total_width)

        # Add utility values
        for utility in self.utility_values:
            row = "|"
            if isinstance(utility, (list, tuple)):
                for i, value in enumerate(utility):
                    row += f" {str(value).rjust(value_widths[i])} |"
            else:
                row += f" {str(utility).rjust(value_widths[0])} |"
            result.append(row)

        # Add bottom border
        result.append("-" * total_width)

        return "\n".join(result)

## Table 1  
**Database**

| Tid  | Item        | Quantity  |
|------|-------------|-----------|
| T₁   | a b c d     | 5 2 1 2   |
| T₂   | a c d g     | 1 1 1 3   |
| T₃   | a c f       | 1 1 1     |
| T₄   | a f g       | 1 4 2     |
| T₅   | a g         | 1 2       |
| T₆   | b c d e     | 3 2 3 1   |
| T₇   | c e         | 6 4       |
| T₈   | e f         | 1 3       |

---
## Table 2  
**Unit Utility**

| Item   | a | b | c  | d  | e  | f  | g  |
|--------|---|---|----|----|----|----|----|
| Utility| 3 | 6 | -3 | 12 | -5 | -2 | -1 |


In [2]:
a = Item("A", 3)
b = Item("B", 6)
c = Item("C", -3)
d = Item("D", 12)
e = Item("E", -5)
f = Item("F", -2)
g = Item("G", -1)

trans1 = Transaction(1, dict(zip([a, b, c, d], [5, 2, 1, 2])))
trans2 = Transaction(2, dict(zip([a, c, d, g], [1, 1, 1, 3])))
trans3 = Transaction(3, dict(zip([a, c, f], [1, 1, 1])))
trans4 = Transaction(4, dict(zip([a, f, g], [1, 4, 2])))
trans5 = Transaction(5, dict(zip([a, g], [1, 2])))
trans6 = Transaction(6, dict(zip([b, c, d, e], [3, 2, 3, 1])))
trans7 = Transaction(7, dict(zip([c, e], [6, 4])))
trans8 = Transaction(8, dict(zip([e, f], [1, 3])))

items = [a, b, c, d, e, f, g]
database = [trans1, trans2, trans3, trans4, trans5, trans6, trans7, trans8]

database

[(tid = 1, frequencies = {A: 5, B: 2, C: 1, D: 2}),
 (tid = 2, frequencies = {A: 1, C: 1, D: 1, G: 3}),
 (tid = 3, frequencies = {A: 1, C: 1, F: 1}),
 (tid = 4, frequencies = {A: 1, F: 4, G: 2}),
 (tid = 5, frequencies = {A: 1, G: 2}),
 (tid = 6, frequencies = {B: 3, C: 2, D: 3, E: 1}),
 (tid = 7, frequencies = {C: 6, E: 4}),
 (tid = 8, frequencies = {E: 1, F: 3})]

Definition 1

In [3]:
def find_all_transactions_contained_item_set(item_set: set[Item], database: list[Transaction]):
    transactionsList: list[Transaction] = []
    for trans in database:
        if item_set.issubset(trans.items_quantities.keys()):
            transactionsList.append(trans)
    return transactionsList

In [4]:
def find_max_min_avg_periodic_of_item_set(item_set: set[Item], database: list[Transaction]):
    contained_transactions = find_all_transactions_contained_item_set(item_set, database)
    trans_ids: list[int] = [trans.id for trans in contained_transactions]
    m = len(contained_transactions)
    ps: list[int] = []
    for i in range(0, m + 1):
        if(i == 0):
            ps.append(trans_ids[0]);
        elif(i == m):
            ps.append(len(database) - trans_ids[m - 1])
        else:
            ps.append(trans_ids[i] - trans_ids[i - 1])
    max_per, min_per = ps[0], ps[0]
    avg_per = len(database) / len(ps)
    for i in ps:
        if(i > max_per):
            max_per = i
        if(i < min_per):
            min_per = i
    return max_per, min_per, avg_per

<center>
Definition 4: The positive utility of X in Transaction T as:
<br></br>
PU(X,T) = Σ<sub>i<sub>k</sub>∈X,P(i<sub>k</sub>)>0</sub>(P(i<sub>k</sub>) * q<sub>k</sub>)
</center>

In [5]:
def calculate_positive_utility_of_item_set_in_trans(items: set[Item], trans: Transaction):
    pu = 0
    if(items.issubset(trans.items_quantities.keys())):
        for item in items:
            utility = item.utility 
            if(utility > 0):
                quantity = trans.items_quantities.get(item);
                if(quantity != None):
                    pu += item.utility * quantity
    return pu;

<center>
Definition 4: The positive utility of X in Database D are:
<br></br>
PU(X) = Σ<sub>(X⊆T<sub>j</sub>,T<sub>j</sub>∈D)</sub>PU(X,T<sub>j</sub>)
</center>

In [6]:
def calculate_positive_utility_of_item_set_in_database(items: set[Item], database: list[Transaction]):
    pu = 0
    for trans in database:
        pu += calculate_positive_utility_of_item_set_in_trans(items, trans)
    return pu

<center> Definition 4: The negative utility of X in Transaction T as:
<br></br>
NU(X,T) = Σ<sub>i<sub>k</sub>∈X,P(i<sub>k</sub>)<0</sub>(P(i<sub>k</sub>) * q<sub>k</sub>)
</center>



In [7]:
def calculate_negative_utility_of_item_set_in_trans(items: set[Item], trans: Transaction):
    nu = 0
    if(items.issubset(trans.items_quantities.keys())):
        for item in items:
            utility = item.utility 
            if(utility < 0):
                quantity = trans.items_quantities.get(item);
                if(quantity != None):
                    nu += item.utility * quantity
    return nu;

<center>
Definition 4: The negative utility of X in Database D are:
<br></br>
NU(X) = Σ<sub>(X⊆T<sub>j</sub>,T<sub>j</sub>∈D)</sub>NU(*X,T<sub>j</sub>)
</center>

In [8]:
def calculate_negative_utility_of_item_set_in_database(items: set[Item], database: list[Transaction]):
    nu = 0
    for trans in database:
        nu += calculate_negative_utility_of_item_set_in_trans(items, trans)
    return nu

In [9]:
def calculate_positive_utility_of_transaction(trans: Transaction):
    pu = 0;
    items : set[Item] = trans.items_quantities.keys()
    for item in items:
        # if(item.utility > 0):
        if True:
            pu += trans.items_quantities.get(item) * item.utility
    return pu;

<center>
Definition 6 (Transaction Weighted Utility TWU in N-database):
<br></br>
TWU(X) = Σ<sub>X⊆T<sub>j</sub>,T<sub>j</sub>∈D</sub>PU(T<sub>j</sub>)
</center>

In [10]:
def calculate_transaction_weight_utility(items: set[Item], database: list[Transaction]):
    twu = 0
    for trans in database:
        trans_items = trans.items_quantities.keys()
        if(items.issubset(trans_items)):
            twu += calculate_positive_utility_of_transaction(trans)
    return twu

In [11]:
# a > b
def check_order_condition(a: Item, b: Item):
    if(a.utility < 0 and b.utility > 0):
        return True
    elif(a.utility * b.utility > 0):
        return a.twu > b.twu
    return False

In [12]:
def check_order_item_and_set(ik: Item, X: set[Item]) -> bool:
    for i in X:
        if i != ik and check_order_condition(ik, i) == False:
            return False
    return True

<center>
Definition 8: The remaining utility of 𝑋 in T<sub>j</sub>
<br></br>
RU(X) = Σ<sub>X⊆T<sub>j</sub>,T<sub>j</sub>∈D</sub>RU(X, T<sub>j</sub>)</center>

In [13]:
def calculate_remaining_utility_of_item_set_in_trans(
    items: set[Item], trans: Transaction
):
    ru = 0
    trans_items: set[Item] = trans.items_quantities.keys()
    if items.issubset(trans_items):
        for item in trans_items:
            if item.utility > 0:
                if check_order_item_and_set(item, items) == True:
                    if item not in items:
                        ru += item.utility * trans.items_quantities.get(item)
    return ru

In [14]:
for i in items:
    i.twu = calculate_transaction_weight_utility({i}, database)

In [15]:
def sort_items_by_twu_and_utility(items: list[Item]) -> list[Item]:
    def sort_key(item: Item) -> tuple:
        return (0 if item.utility > 0 else 1, item.twu)
    return sorted(items, key=sort_key)

In [16]:
sort_items_by_twu_and_utility(items)

[A, B, D, F, E, G, C]

In [17]:
def calculate_remaining_utility_of_item_set_in_database(items: set[Item], database: list[Transaction]):
    ru = 0
    for trans in database:
        if(items.issubset(trans.items_quantities.keys())):
            ru += calculate_remaining_utility_of_item_set_in_trans(items, trans)
    return ru

In [18]:
def calculate_utility_of_item_set_in_database(
    items: set[Item], database: list[Transaction]
):
    u = 0
    for trans in database:
        if(items.issubset(trans.items_quantities.keys())):
            for item in items:
                u += item.utility * trans.items_quantities.get(item)
    return u

In [19]:
from itertools import chain

def gather_item_set_from_database(database: list[Transaction]) -> set[Item]:
    return set(chain.from_iterable(trans.items_quantities.keys() for trans in database))

In [20]:
def create_eucs_dict(arr: list[Item], database: list[Transaction]) -> dict[frozenset[Item], int]:
    n = len(arr)
    eucs_dict = {}
    for i in range(n):
        for j in range(n):
            if i < j:
                item_pair = frozenset({arr[i], arr[j]})
                twu_value = calculate_transaction_weight_utility(item_pair, database)
                eucs_dict[item_pair] = twu_value
    return eucs_dict

In [21]:
def find_tuple_by_trans_id(P: PNUList, target_trans_id: int) -> Utilities:
    utilities_list: list[Utilities] = P.utility_values
    for iTuple in utilities_list:
        if iTuple.tid == target_trans_id:
            return iTuple
    return None

In [22]:
def construct(P: PNUList, Px: PNUList, Py: PNUList):
    # print("start construct function")
    # print(Px)
    # print(Py)
    x = Px.items
    y = Py.items
    xy = x | y
    utilities_list: list[Utilities] = list()
    Pxy = PNUList(xy, utilities_list)
    utilities_list_of_px: list[Utilities] = Px.utility_values
    for xTuple in utilities_list_of_px:
        yTuple: Utilities = find_tuple_by_trans_id(Py, xTuple.tid)
        if yTuple is not None:
            if P.utility_values:
                pTuple: Utilities = find_tuple_by_trans_id(P, xTuple.tid)
                xyTuple: Utilities = Utilities(
                    xTuple.tid,
                    xTuple.pu + yTuple.pu - pTuple.pu,
                    xTuple.nu + yTuple.nu - pTuple.nu,
                    yTuple.ru,
                )
                utilities_list.append(xyTuple)
            else:
                # print(yTuple.pu)
                xyTuple: Utilities = Utilities(
                    xTuple.tid,
                    xTuple.pu + yTuple.pu,
                    xTuple.nu + yTuple.nu,
                    yTuple.ru,
                )
                utilities_list.append(xyTuple)
    # print(Pxy)
    # print("end construct function")
    return Pxy

In [23]:
#Y > X
def check_order_of_sets(Y: set[Item], X: set[Item]):
    for y in Y:
        if check_order_item_and_set(y, X) == False:
            return False
    return True

In [24]:
def searching_procedure(
    PList: PNUList,
    lists: list[PNUList],
    minUtil,
    minPer,
    maxPer,
    minAvg,
    maxAvg,
    database: list[Transaction],
    eucs_dict: dict[frozenset[Item], int],
    output: list[set[Item]],
):
    print("---start---")
    print("Prefix: " + str(PList.items))
    s = ""
    for i in lists:
        s += str(i.items) + " "
    print("Lists: " + str(s))

    for i in range(0, len(lists) - 1):
        XList: PNUList = lists[i]
        print("X: " + str(XList.items))
        utility = calculate_utility_of_item_set_in_database(set(XList.items), database)
        remaining_utility = calculate_remaining_utility_of_item_set_in_database(
            set(XList.items), database
        )
        min_per, max_per, avg_per = find_max_min_avg_periodic_of_item_set(
            set(XList.items), database
        )
        if (
            utility >= minUtil
            and min_per >= minPer
            and max_per <= maxPer
            and avg_per >= minAvg
            and avg_per <= maxAvg
        ):
            output.append(XList.items)
        if remaining_utility + utility >= minUtil:
            new_lists: list[PNUList] = list()
            for j in range(i + 1, len(lists)):
                YList: PNUList = lists[j]
                print("Y: " + str(YList.items))
                print(
                    str(YList.items)
                    + " > "
                    + str(XList.items)
                    + " ? "
                    + str(check_order_of_sets(YList.items, XList.items))
                )
                if check_order_of_sets(YList.items, XList.items):
                # if True:
                    x = XList.items.difference(PList.items)
                    y = YList.items.difference(PList.items)
                    key = frozenset(x | y)
                    twu_value = eucs_dict.get(key)
                    if twu_value >= minUtil:
                        ZList = construct(PList, XList, YList)
                        new_lists.append(ZList)
                        print("generate: " + str(ZList.items))
            if len(new_lists) == 0:
                print("Nothing generated")
                return
            print()
            searching_procedure(
                XList,
                new_lists,
                minUtil,
                minPer,
                maxPer,
                minAvg,
                maxAvg,
                database,
                eucs_dict,
                output,
            )

In [None]:
def preparation_procedure(
    database: list[Transaction],
    all_distinct_items: list[Item],
    minUtility,
    minPer,
    maxPer,
    minAvg,
    maxAvg,
):
    removed_list: list[Item] = list()
    # Check TWU & Periodic condition
    for item in all_distinct_items:
        item_max_per, item_min_per, item_avg_per = (
            find_max_min_avg_periodic_of_item_set({item}, database)
        )
        twu = calculate_transaction_weight_utility({item}, database)
        if (
            twu < minUtility
            or item_min_per < minPer
            or item_max_per > maxPer
            or item_avg_per < minAvg
            or item_avg_per > maxAvg
        ):
            removed_list.append(item)
        else:
            item.twu = twu

    # Remove unqualified item
    all_distinct_items = [
        item for item in all_distinct_items if item not in removed_list
    ]
    print(all_distinct_items)
    # Remove unqualified item from transaction
    for trans in database:
        for item in removed_list:
            trans.items_quantities.pop(item, None)
        if len(trans.items_quantities.keys()) == 0:
            database.remove(trans)

    # Create list[PNUList], prepare for algorithm 2
    lists: list[PNUList] = list()
    for item in all_distinct_items:
        utility_values_list: list[tuple] = list()
        pnu_list = PNUList({item}, utility_values_list)
        for trans in database:
            if {item}.issubset(trans.items_quantities.keys()):
                pu = calculate_positive_utility_of_item_set_in_trans({item}, trans)
                nu = calculate_negative_utility_of_item_set_in_trans({item}, trans)
                ru = calculate_remaining_utility_of_item_set_in_trans({item}, trans)
                utility_values: Utilities = Utilities(trans.id, pu, nu, ru)
                utility_values_list.append(utility_values)
        lists.append(pnu_list)

    # Create EUCS
    eucs_dict: dict[frozenset[Item], int] = create_eucs_dict(
        all_distinct_items, database
    )

    temp = PNUList({}, list())
    # Call algorithm 2
    output = []
    searching_procedure(
        temp,
        lists,
        minUtility,
        minPer,
        maxPer,
        minAvg,
        maxAvg,
        database,
        eucs_dict,
        output,
    )
    return output
preparation_procedure(database, items, 30, 1, 5, 1, 3)

[A, B, C, D, E, F, G]
[A, B, C, D]
---start---
Prefix: {}
Lists: {A} {B} {C} {D} 
X: {A}
Y: {B}
{B} > {A} ? True
generate: {B, A}
Y: {C}
{C} > {A} ? True
generate: {C, A}
Y: {D}
{D} > {A} ? True
generate: {D, A}

---start---
Prefix: {A}
Lists: {B, A} {C, A} {D, A} 
X: {B, A}
Y: {C, A}
{C, A} > {B, A} ? False
Y: {D, A}
{D, A} > {B, A} ? False
Nothing generated
X: {B}
Y: {C}
{C} > {B} ? True
generate: {B, C}
Y: {D}
{D} > {B} ? True
generate: {B, D}

---start---
Prefix: {B}
Lists: {B, C} {B, D} 
X: {B, C}
X: {C}


[{B}]

In [26]:
print(calculate_transaction_weight_utility({a, b}, database))
print(calculate_transaction_weight_utility({a, d}, database))
print(calculate_transaction_weight_utility({a, c}, database))

print(calculate_transaction_weight_utility({b, d}, database))
print(calculate_transaction_weight_utility({b, c}, database))

48
60
60
96
96
