In [1]:
import numpy as np

dataset_raw = [
	'ECG_Monitor BP_Monitor Insulin Antibiotics',
	'ECG_Monitor Ventilator Insulin',
	'ECG_Monitor BP_Monitor Ventilator Antibiotics',
	'BP_Monitor Insulin Antibiotics',
	'ECG_Monitor Ventilator Insulin Antibiotics'
]

item_dict = set()

supp_thresh = 0.4
conf_thresh = 0.85

def parse_dataset(dataset_raw : list[str]) -> list[set] :
	dataset = []
	for line in dataset_raw :
		items = line.split()
		transaction = set()
		for item in items :
			item_dict.add(item)
			transaction.add(item)
		dataset.append(transaction)
	return dataset

dataset = parse_dataset(dataset_raw)

print(dataset)
print(item_dict)


[{'Insulin', 'ECG_Monitor', 'BP_Monitor', 'Antibiotics'}, {'Insulin', 'ECG_Monitor', 'Ventilator'}, {'ECG_Monitor', 'Ventilator', 'BP_Monitor', 'Antibiotics'}, {'Insulin', 'BP_Monitor', 'Antibiotics'}, {'Insulin', 'ECG_Monitor', 'Ventilator', 'Antibiotics'}]
{'Insulin', 'ECG_Monitor', 'Ventilator', 'Antibiotics', 'BP_Monitor'}


In [3]:

def calc_supp(itemset : set) -> float :
	count = 0
	for transaction in dataset :
		if itemset.issubset(transaction) :
			count += 1
	return count / len(dataset)

def calc_conf(antecedent : set, consequent : set) -> float :
	union_set = antecedent.union(consequent)
	supp_antecedent = calc_supp(antecedent)
	supp_union = calc_supp(union_set)
	if supp_antecedent == 0 :
		return 0
	return supp_union / supp_antecedent

# Q1

In [6]:
freq_itemsets : dict[int, list[set]] = dict()

def search_itemset(cur_itemset : set) :
	for item in item_dict :
		if item in cur_itemset :
			continue
		new_itemset = cur_itemset.union({item})
		supp = calc_supp(new_itemset)
		if supp >= supp_thresh :
			size = len(new_itemset)
			if size not in freq_itemsets :
				freq_itemsets[size] = []
			freq_itemsets[size].append(new_itemset)
			search_itemset(new_itemset)

search_itemset(set())

print("Frequent 1-itemsets:")
for itemset in freq_itemsets.get(1, []) :
	print('\t', *itemset)
print("Frequent 2-itemsets:")
for itemset in freq_itemsets.get(2, []) :
	print('\t', *itemset)

Frequent 1-itemsets:
	 Insulin
	 ECG_Monitor
	 Ventilator
	 Antibiotics
	 BP_Monitor
Frequent 2-itemsets:
	 Insulin ECG_Monitor
	 Insulin Ventilator
	 Insulin Antibiotics
	 Insulin BP_Monitor
	 Insulin ECG_Monitor
	 ECG_Monitor Ventilator
	 ECG_Monitor Antibiotics
	 ECG_Monitor BP_Monitor
	 Insulin Ventilator
	 ECG_Monitor Ventilator
	 Ventilator Antibiotics
	 Insulin Antibiotics
	 ECG_Monitor Antibiotics
	 Ventilator Antibiotics
	 BP_Monitor Antibiotics
	 Insulin BP_Monitor
	 ECG_Monitor BP_Monitor
	 BP_Monitor Antibiotics


# Q2
get all association rules

In [9]:
for size, itemsets in freq_itemsets.items() :
	if size < 2 :
		continue
	for itemset in itemsets :
		for item in itemset :
			antecedent = itemset.difference({item})
			consequent = {item}
			conf = calc_conf(antecedent, consequent)
			if conf >= conf_thresh :
				print(f"{{{', '.join(antecedent)}}} -> {{{', '.join(consequent)}}}")

{Ventilator} -> {ECG_Monitor}
{Ventilator} -> {ECG_Monitor}
{BP_Monitor} -> {Antibiotics}
{BP_Monitor} -> {Antibiotics}
{Insulin, Ventilator} -> {ECG_Monitor}
{Insulin, Ventilator} -> {ECG_Monitor}
{Insulin, BP_Monitor} -> {Antibiotics}
{Insulin, BP_Monitor} -> {Antibiotics}
{Insulin, Ventilator} -> {ECG_Monitor}
{Insulin, Ventilator} -> {ECG_Monitor}
{Ventilator, Antibiotics} -> {ECG_Monitor}
{Ventilator, Antibiotics} -> {ECG_Monitor}
{ECG_Monitor, BP_Monitor} -> {Antibiotics}
{ECG_Monitor, BP_Monitor} -> {Antibiotics}
{Insulin, Ventilator} -> {ECG_Monitor}
{Insulin, Ventilator} -> {ECG_Monitor}
{Ventilator, Antibiotics} -> {ECG_Monitor}
{Ventilator, Antibiotics} -> {ECG_Monitor}
{Insulin, BP_Monitor} -> {Antibiotics}
{Ventilator, Antibiotics} -> {ECG_Monitor}
{ECG_Monitor, BP_Monitor} -> {Antibiotics}
{Ventilator, Antibiotics} -> {ECG_Monitor}
{Insulin, BP_Monitor} -> {Antibiotics}
{ECG_Monitor, BP_Monitor} -> {Antibiotics}
{Insulin, BP_Monitor} -> {Antibiotics}
{ECG_Monitor, BP_Moni