In [8]:
import numpy as np

dataset_raw = [
	'ECG_Monitor BP_Monitor Insulin Antibiotics',
	'ECG_Monitor Ventilator Insulin',
	'ECG_Monitor BP_Monitor Ventilator Antibiotics',
	'BP_Monitor Insulin Antibiotics',
	'ECG_Monitor Ventilator Insulin Antibiotics'
]

item_dict = set()

supp_thresh = 0.4
conf_thresh = 0.85

def parse_dataset(dataset_raw : list[str]) -> list[set] :
	dataset = []
	for line in dataset_raw :
		items = line.split()
		transaction = set()
		for item in items :
			item_dict.add(item)
			transaction.add(item)
		dataset.append(transaction)
	return dataset

dataset = parse_dataset(dataset_raw)

print(dataset)
print(item_dict)


[{'BP_Monitor', 'ECG_Monitor', 'Insulin', 'Antibiotics'}, {'ECG_Monitor', 'Ventilator', 'Insulin'}, {'BP_Monitor', 'ECG_Monitor', 'Ventilator', 'Antibiotics'}, {'BP_Monitor', 'Antibiotics', 'Insulin'}, {'ECG_Monitor', 'Ventilator', 'Insulin', 'Antibiotics'}]
{'Antibiotics', 'Ventilator', 'Insulin', 'BP_Monitor', 'ECG_Monitor'}


In [9]:

def calc_supp(itemset : set) -> float :
	count = 0
	for transaction in dataset :
		if itemset.issubset(transaction) :
			count += 1
	return count / len(dataset)

def calc_conf(antecedent : set, consequent : set) -> float :
	union_set = antecedent.union(consequent)
	supp_antecedent = calc_supp(antecedent)
	supp_union = calc_supp(union_set)
	if supp_antecedent == 0 :
		return 0
	return supp_union / supp_antecedent

# Q1

In [10]:
freq_itemsets : dict[int, list[set]] = dict()

def search_itemset(cur_itemset : set) :
	for item in item_dict :
		if item in cur_itemset :
			continue
		new_itemset = cur_itemset.union({item})
		supp = calc_supp(new_itemset)
		if supp >= supp_thresh :
			size = len(new_itemset)
			if size not in freq_itemsets :
				freq_itemsets[size] = []
			if new_itemset in freq_itemsets[size] :
				continue
			freq_itemsets[size].append(new_itemset)
			search_itemset(new_itemset)

search_itemset(set())

print("Frequent 1-itemsets:")
for itemset in freq_itemsets.get(1, []) :
	print('\t', *itemset, ' Supp: ', calc_supp(itemset))
print("Frequent 2-itemsets:")
for itemset in freq_itemsets.get(2, []) :
	print('\t', *itemset, ' Supp:', calc_supp(itemset))

Frequent 1-itemsets:
	 Antibiotics  Supp:  0.8
	 Ventilator  Supp:  0.6
	 Insulin  Supp:  0.8
	 BP_Monitor  Supp:  0.6
	 ECG_Monitor  Supp:  0.8
Frequent 2-itemsets:
	 Antibiotics Ventilator  Supp: 0.4
	 Antibiotics Insulin  Supp: 0.6
	 BP_Monitor Antibiotics  Supp: 0.6
	 ECG_Monitor Antibiotics  Supp: 0.6
	 Ventilator Insulin  Supp: 0.4
	 ECG_Monitor Ventilator  Supp: 0.6
	 BP_Monitor Insulin  Supp: 0.4
	 ECG_Monitor Insulin  Supp: 0.6
	 BP_Monitor ECG_Monitor  Supp: 0.4


# Q2
get all association rules

In [11]:
for size, itemsets in freq_itemsets.items() :
	if size < 2 :
		continue
	for itemset in itemsets :
		for item in itemset :
			antecedent = itemset.difference({item})
			consequent = {item}
			conf = calc_conf(antecedent, consequent)
			if conf >= conf_thresh :
				print(f"{{{', '.join(antecedent)}}} -> {{{', '.join(consequent)}}}")

{BP_Monitor} -> {Antibiotics}
{Ventilator} -> {ECG_Monitor}
{Antibiotics, Ventilator} -> {ECG_Monitor}
{BP_Monitor, Insulin} -> {Antibiotics}
{BP_Monitor, ECG_Monitor} -> {Antibiotics}
{Ventilator, Insulin} -> {ECG_Monitor}
