In [1]:
import numpy as np
import csv

# Zadanie 1

## Apriori

In [2]:
def support(X, T):
    (N, M) = T.shape
    C = np.zeros(M)
    np.put(C, list(X), 1)
    C.reshape(-1, 1)
    D = np.dot(T, C)
    D = (D == len(X))
    return sum(D) / N

In [3]:
def is_frequent(cand, T, min_supp):
    return support(cand, T) >= min_supp

In [4]:
def frequent_objects(T, min_supp):
    N = len(T)
    
    count = {}
    for t in T:
        for item in t:
            if item not in count:
                count[item] = 1
            else:
                count[item] += 1
    
    return [item for (item, val) in count.items() if val / N >= min_supp]

In [5]:
def reduce_transactions(T, F):
    res = np.zeros((len(T), len(F)))
    for i in range(len(T)):
        for j in range(len(F)):
            if F[j] in T[i]:
                res[i, j] = 1
    return res

In [6]:
def frequent_sets(T, F, min_supp=0.4):
    L = [{i} for i in range(len(F))]
    res = []
    while len(L) != 0:
        res += L
        newL = []
        for i in range(len(L)):
            s1 = L[i]
            for j in range(i + 1, len(L)):
                s2 = L[j]
                if len(s1 & s2) == len(s1) - 1 and len(s1 ^ s2) == 2 and ((s1 | s2) not in newL):
                    newL.append(s1 | s2)
        L = list(filter(lambda x: is_frequent(x, T, min_supp), newL))
    return res

In [7]:
def is_confident(A, B, T, min_conf):
    return support(A | B, T) / support(A, T) >= min_conf

In [8]:
def assoc_rules(S, T, min_conf=0.75):
    res = []
    S = [(s, set()) for s in S]
    while len(S) != 0:
        res += S
        newS = []
        for (A, B) in S:
            for x in A:
                A.remove(x)
                B.add(x)
                if is_confident(A, B, T, min_conf):
                    newS.append((A.copy(), B.copy()))
                A.add(x)
                B.remove(x)
        S = newS
    return res

In [9]:
def clean_rules(R, F, ids_to_names):
    res = []
    for (A, B) in R:
        if len(A) == 0 or len(B) == 0:
            continue
        clean_A = {ids_to_names[F[i]] for i in A}
        clean_B = {ids_to_names[F[i]] for i in B}
        res.append((clean_A, clean_B))
    return res

## Simple example

In [270]:
all_transactions = []

with open('Lista06Dane0/exmpl.csv') as file:
    csv_reader = csv.reader(file, delimiter=',')
    for row in csv_reader:
        all_transations.append(row)

products_names = list(set([item for sublist in all_transactions for item in sublist]))
names_to_ids = {products_names[i]: i for i in range(len(products_names))}
ids_to_names = {i: products_names[i] for i in range(len(products_names))}
T = [{names_to_ids[item] for item in tr} for tr in all_transations]

In [271]:
F = frequent_objects(T, 0.4)
T_red= reduce_transactions(T, F)
S = frequent_sets(T_red, F, 0.4)

In [272]:
R = assoc_rules(S, T_red, 0.75)
R_clean = clean_rules(R, F, ids_to_names)
print(R_clean)

[({'lettuce'}, {'tomatoes'})]


# Zadanie 2

In [273]:
all_transactions = []

with open('Lista06Dane1/retail.dat') as file:
    csv_reader = csv.reader(file, delimiter=' ')
    for row in csv_reader:
        all_transations.append(row[:-1])

products_names = list(set([item for sublist in all_transactions for item in sublist]))
names_to_ids = {products_names[i]: i for i in range(len(products_names))}
ids_to_names = {i: products_names[i] for i in range(len(products_names))}
T = [{names_to_ids[item] for item in tr} for tr in all_transations]

In [274]:
F = frequent_objects(T, 0.05)
T_red= reduce_transactions(T, F)
S = frequent_sets(T_red, F, 0.05)

In [275]:
R = assoc_rules(S, T_red, 0.75)
R_clean = clean_rules(R, F, ids_to_names)
print(R_clean)

[({'41'}, {'39'}), ({'38', '48'}, {'39'}), ({'41', '48'}, {'39'})]


# Zadanie 3

In [276]:
all_transactions = []

with open('Lista06Dane2/kosarak.dat') as file:
    csv_reader = csv.reader(file, delimiter=' ')
    for row in csv_reader:
        all_transations.append(row[:-1])

products_names = list(set([item for sublist in all_transactions for item in sublist]))
names_to_ids = {products_names[i]: i for i in range(len(products_names))}
ids_to_names = {i: products_names[i] for i in range(len(products_names))}
T = [{names_to_ids[item] for item in tr} for tr in all_transations]

In [277]:
F = frequent_objects(T, 0.05)
T_red= reduce_transactions(T, F)
S = frequent_sets(T_red, F, 0.05)

In [279]:
R = assoc_rules(S, T_red, 0.75)
R_clean = clean_rules(R, F, ids_to_names)
print(R_clean)

[({'11'}, {'6'}), ({'27'}, {'6'}), ({'7'}, {'6'}), ({'218'}, {'6'}), ({'11', '1'}, {'6'}), ({'3', '11'}, {'6'}), ({'7', '11'}, {'6'}), ({'7', '6'}, {'11'}), ({'11', '218'}, {'6'}), ({'6', '218'}, {'11'})]


# Zadanie 4

## D01

In [84]:
all_transactions = {}

with open('Lista06Dane3/D01', 'rb') as file:
    content = file.readlines()[1:]
    
for x in content:
    line = str(x.strip()).split(';')
    customer_id = int(line[1])
    product_id = int(line[5])
    if customer_id not in all_transactions:
        all_transactions[customer_id] = []
    all_transactions[customer_id].append(product_id)

products_names = list(set([item for sublist in all_transactions.values() for item in sublist]))
names_to_ids = {products_names[i]: i for i in range(len(products_names))}
ids_to_names = {i: products_names[i] for i in range(len(products_names))}
T = [{names_to_ids[item] for item in tr} for tr in all_transactions.values()]

In [85]:
F = frequent_objects(T, 0.01)
T_red= reduce_transactions(T, F)
S = frequent_sets(T_red, F, 0.01)

KeyboardInterrupt: 

In [83]:
R = assoc_rules(S, T_red, 0.75)
R_clean = clean_rules(R, F, ids_to_names)
print(R_clean)

[({4710011409056}, {4710011401128}), ({4710011401135}, {4710011401128})]


## D02

In [None]:
all_transactions = {}

with open('Lista06Dane3/D02', 'rb') as file:
    content = file.readlines()[1:]
    
for x in content:
    line = str(x.strip()).split(';')
    customer_id = int(line[1])
    product_id = int(line[5])
    if customer_id not in all_transactions:
        all_transactions[customer_id] = []
    all_transactions[customer_id].append(product_id)

products_names = list(set([item for sublist in all_transactions.values() for item in sublist]))
names_to_ids = {products_names[i]: i for i in range(len(products_names))}
ids_to_names = {i: products_names[i] for i in range(len(products_names))}
T = [{names_to_ids[item] for item in tr} for tr in all_transactions.values()]

In [86]:
F = frequent_objects(T, 0.01)
T_red= reduce_transactions(T, F)
S = frequent_sets(T_red, F, 0.01)

KeyboardInterrupt: 

In [None]:
R = assoc_rules(S, T_red, 0.75)
R_clean = clean_rules(R, F, ids_to_names)
print(R_clean)

## D11

In [None]:
all_transactions = {}

with open('Lista06Dane3/D11', 'rb') as file:
    content = file.readlines()[1:]
    
for x in content:
    line = str(x.strip()).split(';')
    customer_id = int(line[1])
    product_id = int(line[5])
    if customer_id not in all_transactions:
        all_transactions[customer_id] = []
    all_transactions[customer_id].append(product_id)

products_names = list(set([item for sublist in all_transactions.values() for item in sublist]))
names_to_ids = {products_names[i]: i for i in range(len(products_names))}
ids_to_names = {i: products_names[i] for i in range(len(products_names))}
T = [{names_to_ids[item] for item in tr} for tr in all_transactions.values()]

In [None]:
F = frequent_objects(T, 0.01)
T_red= reduce_transactions(T, F)
S = frequent_sets(T_red, F, 0.01)

In [None]:
R = assoc_rules(S, T_red, 0.75)
R_clean = clean_rules(R, F, ids_to_names)
print(R_clean)

# D12

In [None]:
all_transactions = {}

with open('Lista06Dane3/D12', 'rb') as file:
    content = file.readlines()[1:]
    
for x in content:
    line = str(x.strip()).split(';')
    customer_id = int(line[1])
    product_id = int(line[5])
    if customer_id not in all_transactions:
        all_transactions[customer_id] = []
    all_transactions[customer_id].append(product_id)

products_names = list(set([item for sublist in all_transactions.values() for item in sublist]))
names_to_ids = {products_names[i]: i for i in range(len(products_names))}
ids_to_names = {i: products_names[i] for i in range(len(products_names))}
T = [{names_to_ids[item] for item in tr} for tr in all_transactions.values()]

In [None]:
F = frequent_objects(T, 0.01)
T_red= reduce_transactions(T, F)
S = frequent_sets(T_red, F, 0.01)

In [None]:
R = assoc_rules(S, T_red, 0.75)
R_clean = clean_rules(R, F, ids_to_names)
print(R_clean)