In [159]:
from collections import namedtuple

Utilities = namedtuple("Utilities", ["tid", "pu", "nu", "ru"])

class Item:
    def __init__(self, item: str, utility: int):
        self.item = item
        self.utility = utility
        self._twu = 0

    @property
    def twu(self) -> int:
        return self._twu

    @twu.setter
    def twu(self, value: int) -> None:
        self._twu = value

    def __repr__(self):
        # return f"({self.item},{self.twu})"
        return f"{self.item}"

    def __eq__(self, other):
        if isinstance(other, Item):
            return self.item == other.item and self.utility == other.utility
        return False

    def __hash__(self):
        return hash((self.item, self.utility))

class Transaction:
    def __init__(self, id: int, items_quantities: dict):
        if any(q <= 0 for q in items_quantities.values()):
            raise ValueError(f"Quantities in trans{id} must be positive integers.")
        self.id = id
        self.items_quantities = items_quantities

    def __repr__(self):
        return f"(tid = {self.id}, frequencies = {self.items_quantities})"

In [160]:
a = Item("a", 5)
b = Item("b", 2)
c = Item("c", 1)
d = Item("d", 2)
e = Item("e", 3)
f = Item("f", 1)
g = Item("g", 1)

trans1 = Transaction(1, dict(zip([a, c, d], [1, 1, 1])))
trans2 = Transaction(2, dict(zip([a, c, e, g], [2, 6, 2, 5])))
trans3 = Transaction(3, dict(zip([a, b, c, d, e, f], [1, 2, 1, 6, 1, 5])))
trans4 = Transaction(4, dict(zip([b, c, d, e], [4, 3, 3, 1])))
trans5 = Transaction(5, dict(zip([b, c, e, g], [2, 2, 1, 2])))

items = {a, b, c, d, e, f, g}
database = [trans1, trans2, trans3, trans4, trans5]

In [161]:
def check_order_condition(a: Item, b: Item):
    if a.utility < 0 and b.utility > 0:
        return True
    elif a.utility * b.utility > 0:
        return a.twu > b.twu
    return False

def check_order_item_and_set(ik: Item, X: set[Item]) -> bool:
    for i in X:
        if i != ik and check_order_condition(ik, i) == False:
            return False
    return True

def calculate_positive_utility_of_transaction(trans: Transaction):
    pu = 0;
    items : set[Item] = trans.items_quantities.keys()
    for item in items:
        if(item.utility > 0):
            pu += trans.items_quantities.get(item) * item.utility
    return pu;

def calculate_transaction_weight_utility(items: set[Item], database: list[Transaction]):
    twu = 0
    for trans in database:
        trans_items = trans.items_quantities.keys()
        if(items.issubset(trans_items)):
            twu += calculate_positive_utility_of_transaction(trans)
    return twu

def calculate_remaining_utility_of_item_set_in_trans(
    items: set[Item], trans: Transaction
):
    ru = 0
    trans_items: set[Item] = trans.items_quantities.keys()
    for item in trans_items:
        if item.utility > 0:
            if check_order_item_and_set(item, items) == True:
                if item not in items:
                    ru += item.utility * trans.items_quantities.get(item)
    return ru

def calculate_utility_of_item_set_in_trans(
    items: set[Item], trans: Transaction
):
    u = 0
    for item in items:
        quantity = trans.items_quantities.get(item)
        if quantity != None:
            u += item.utility * quantity
    return u

In [162]:
def calculate_local_utility(alpha: set[Item], item: Item, database: list[Transaction]):
    lu = 0
    for trans in database:
        if (alpha | {item}).issubset(trans.items_quantities.keys()):
            lu += calculate_utility_of_item_set_in_trans(alpha, trans) +  calculate_remaining_utility_of_item_set_in_trans(alpha, trans)
    return lu

In [163]:
def calculate_utility(items: set[Item], db: list[Transaction]):
    u = 0
    for trans in db:
        if items.issubset(trans.items_quantities.keys()):
            for i in items:
                if trans.items_quantities.get(i) != None:
                    u += i.utility * trans.items_quantities.get(i)
    return u

In [164]:
for i in items:
    i.twu = calculate_transaction_weight_utility({i}, database)
    print(str(i) + ": " + str(i.twu))

def sort_items_by_twu_and_utility(items: list[Item]) -> list[Item]:
    def sort_key(item: Item) -> tuple:
        return (0 if item.utility > 0 else 1, item.twu)

    return sorted(items, key=sort_key)


b: 61
f: 30
d: 58
e: 88
a: 65
g: 38
c: 96


In [165]:
def find_extended_item_set(all_items: set[Item], alpha: set[Item]) -> set[Item]:
    e_list = set()
    no_alpha = all_items.difference(alpha)
    for item in no_alpha:
        if check_order_item_and_set(item, alpha):
            e_list.add(item)
    return e_list

In [166]:
def calculate_subtree_utility(
    alpha: set[Item], item: Item, database: list[Transaction], secondary: set[Item]
):
    su = 0
    for trans in database:
        if (alpha | {item}).issubset(trans.items_quantities.keys()):
            u = 0
            temp: set[Item] = set(trans.items_quantities.keys())
            for i in temp:
                if check_order_item_and_set(i, set(alpha | {item})):
                    u += i.utility * trans.items_quantities[i]
            su += (
                calculate_utility_of_item_set_in_trans(alpha, trans)
                + calculate_utility_of_item_set_in_trans({item}, trans)
                + calculate_remaining_utility_of_item_set_in_trans({item}, trans)
            )
    return su

In [167]:
import copy

In [168]:
def find_database_projection(item_set: set[Item], database: list[Transaction]):
    db_projection = list()
    for trans in database:
        if item_set.issubset(trans.items_quantities.keys()):
            new_trans = Transaction(trans.id, dict())
            for item in trans.items_quantities.keys():
                if True or item not in item_set:
                    if check_order_item_and_set(item, item_set):
                        if trans.items_quantities.get(item) != None:
                            new_trans.items_quantities[item] = trans.items_quantities.get(item)
            if new_trans.items_quantities.keys():
                db_projection.append(new_trans)
    return db_projection

In [169]:
for trans in database:
    print(trans)

(tid = 1, frequencies = {a: 1, c: 1, d: 1})
(tid = 2, frequencies = {a: 2, c: 6, e: 2, g: 5})
(tid = 3, frequencies = {a: 1, b: 2, c: 1, d: 6, e: 1, f: 5})
(tid = 4, frequencies = {b: 4, c: 3, d: 3, e: 1})
(tid = 5, frequencies = {b: 2, c: 2, e: 1, g: 2})


In [170]:
def search_procedure(
    alpha: set[Item],
    alpha_db: list[Transaction],
    alpha_primary: set[Item],
    alpha_secondary: set[Item],
    min_util: int,
    result: list[set[Item]],
):
    print("alpha: " + str(alpha))
    for pri_item in alpha_primary:
        beta: set[Item] = alpha | {pri_item}
        print("Beta 1: " + str(beta))
        if calculate_utility(beta, alpha_db) >= min_util:
            if beta not in result:
                result.append(beta)
        beta_dp = find_database_projection(beta, alpha_db)

        print("BETA DP: ")
        for trans in beta_dp:
            print(trans)

        beta_secondary = set()
        beta_primary = set()
        for sec_item in alpha_secondary:
            if sec_item != pri_item:
                lu = calculate_local_utility(beta, sec_item, beta_dp)
                su = calculate_subtree_utility(beta, sec_item, beta_dp, alpha_secondary)
                print(
                    "second_item: " + str(sec_item) + ", " + str(lu) + " - " + str(su)
                )
                if lu >= min_util:
                    beta_secondary.add(sec_item)
                if su >= min_util:
                    beta_primary.add(sec_item)
        # print("BSecondary: " + str(beta_secondary))
        # print("BPrimary: " + str(beta_primary))
        print("Beta 2: " + str(beta))
        print("call recursive")
        search_procedure(beta, beta_dp, beta_primary, beta_secondary, min_util, result)

In [171]:
def mining(database: list[Transaction], item_list: set[Item], min_util: int):
    db = copy.deepcopy(database)
    alpha = set()
    secondary = {
        i for i in item_list if calculate_local_utility(alpha, i, db) >= min_util
    }
    secondary = sort_items_by_twu_and_utility(secondary)
    removed = item_list.difference(secondary)
    for trans in db:
        for item in removed:
            trans.items_quantities.pop(item, None)
    primary = {
        i
        for i in secondary
        if calculate_subtree_utility(alpha, i, db, set(secondary)) >= min_util
    }

    result = list()
    search_procedure(alpha, db, primary, secondary, min_util, result)
    return result

In [172]:
print(calculate_subtree_utility({b}, f, database, set()))

34


In [173]:
sort_items_by_twu_and_utility(items)

[f, g, d, b, a, e, c]

In [174]:
for item_set in mining(database, set(items), 28):
    print(str(item_set) + ": " + str(calculate_utility(item_set, database)))

alpha: set()
Beta 1: {b}
BETA DP: 
(tid = 3, frequencies = {a: 1, b: 2, c: 1, e: 1})
(tid = 4, frequencies = {b: 4, c: 3, e: 1})
(tid = 5, frequencies = {b: 2, c: 2, e: 1})
second_item: f, 0 - 0
second_item: g, 0 - 0
second_item: d, 0 - 0
second_item: a, 13 - 13
second_item: e, 36 - 31
second_item: c, 36 - 22
Beta 2: {b}
call recursive
alpha: {b}
Beta 1: {e, b}
BETA DP: 
(tid = 3, frequencies = {c: 1, e: 1})
(tid = 4, frequencies = {c: 3, e: 1})
(tid = 5, frequencies = {c: 2, e: 1})
second_item: c, 0 - 0
Beta 2: {e, b}
call recursive
alpha: {e, b}
Beta 1: {f}
BETA DP: 
(tid = 3, frequencies = {a: 1, b: 2, c: 1, d: 6, e: 1, f: 5})
second_item: g, 0 - 0
second_item: d, 30 - 30
second_item: b, 30 - 18
second_item: a, 30 - 14
second_item: e, 30 - 9
second_item: c, 30 - 6
Beta 2: {f}
call recursive
alpha: {f}
Beta 1: {f, d}
BETA DP: 
(tid = 3, frequencies = {a: 1, b: 2, c: 1, d: 6, e: 1})
second_item: b, 0 - 0
second_item: e, 0 - 0
second_item: a, 0 - 0
second_item: c, 0 - 0
Beta 2: {f, d}
