In [1]:
import pandas as pd
import numpy as np

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth

from itertools import chain, combinations

import matplotlib.pyplot as plt

# Задание 1

In [2]:
input_data = [
    list("ABCD"),
    list("ACDF"),
    list("ACDEG"),
    list("ABDF"),
    list("BCG"),
    list("DFG"),
    list("ABG"),
    list("CDFG")
]

te = TransactionEncoder()
trasaction_data = te.fit_transform(input_data)
data_frame = pd.DataFrame(trasaction_data, columns=te.columns_)
data_frame

Unnamed: 0,A,B,C,D,E,F,G
0,True,True,True,True,False,False,False
1,True,False,True,True,False,True,False
2,True,False,True,True,True,False,True
3,True,True,False,True,False,True,False
4,False,True,True,False,False,False,True
5,False,False,False,True,False,True,True
6,True,True,False,False,False,False,True
7,False,False,True,True,False,True,True


In [3]:
apriori_data = apriori(data_frame, min_support=3/8, use_colnames=True).sort_values("support", ascending=False)
apriori_data

Unnamed: 0,support,itemsets
3,0.75,(D)
0,0.625,(A)
2,0.625,(C)
5,0.625,(G)
1,0.5,(B)
4,0.5,(F)
8,0.5,"(A, D)"
9,0.5,"(D, C)"
11,0.5,"(D, F)"
6,0.375,"(A, B)"


In [4]:
fpg_data = fpgrowth(data_frame, min_support=2/8, use_colnames=True).sort_values("support", ascending=False)
fpg_data

Unnamed: 0,support,itemsets
0,0.75,(D)
2,0.625,(A)
5,0.625,(G)
1,0.625,(C)
3,0.5,(B)
4,0.5,(F)
6,0.5,"(D, C)"
18,0.5,"(D, F)"
9,0.5,"(A, D)"
12,0.375,"(A, C, D)"


# Задание 2

In [5]:
input_data_2 = [
    [2, 3, 6, 7],
    [1, 3, 4, 8, 11],
    [3, 9, 11],
    [1, 5, 6, 7],
    [1, 3, 8, 10, 11],
    [3, 5, 7, 9, 11],
    [4, 6, 8, 10, 11],
    [1, 3, 5, 8, 11],
]


## Subtask A

In [6]:
unique_items = set((chain.from_iterable(input_data_2)))
search_space_size = 2**len(unique_items) - 1
search_space_size

2047

## Subtask B

In [8]:
taxonomy = {
    1:0,  14:0,   6:0,     15:0,
#         /  \            / | \
#        /    \          /  |  \
#       /      \        /   |   \
      12:14,  5:14,  7:15,13:15,11:15,
#     / | \               / | \
#    /  |  \             /  |  \
2:12, 3:12, 4:12,   8:13, 9:13, 10:13
}

In [18]:
new_data = []
for t in input_data_2:
    high_order_set = set()
    for item in t:
        higher_item = item
        high_order_set.add(higher_item)
        while taxonomy[higher_item]:
            higher_item = taxonomy[higher_item]
            high_order_set.add(higher_item)

    new_data.append(list(high_order_set))

te_2 = TransactionEncoder()
trasaction_data_2 = te_2.fit_transform(new_data)

data_frame_2 = pd.DataFrame(trasaction_data_2, columns=te_2.columns_)

apriori_data_2 = apriori(data_frame_2, min_support=7/8, use_colnames=True).sort_values("support", ascending=False)
apriori_data_2

[2, 3, 6, 7, 12, 14, 15]
[1, 3, 4, 8, 11, 12, 13, 14, 15]
[3, 9, 11, 12, 13, 14, 15]
[1, 5, 6, 7, 14, 15]
[1, 3, 8, 10, 11, 12, 13, 14, 15]
[3, 5, 7, 9, 11, 12, 13, 14, 15]
[4, 6, 8, 10, 11, 12, 13, 14, 15]
[1, 3, 5, 8, 11, 12, 13, 14, 15]


Unnamed: 0,support,itemsets
1,1.0,(14)
2,1.0,(15)
5,1.0,"(14, 15)"
0,0.875,(12)
3,0.875,"(12, 14)"
4,0.875,"(12, 15)"
6,0.875,"(12, 14, 15)"
