# Code

### Attribute Information
     0. classes (A):                   edible=e, poisonous=p
     1. cap-shape (B):                 bell=b,conical=c,convex=x,flat=f,
                                       knobbed=k,sunken=s
     2. cap-surface (C):               fibrous=f,grooves=g,scaly=y,smooth=s
     3. cap-color (D):                 brown=n,buff=b,cinnamon=c,gray=g,green=r,
                                       pink=p,purple=u,red=e,white=w,yellow=y
     4. bruises? (E):                  bruises=t,no=f
     5. odor (F):                      almond=a,anise=l,creosote=c,fishy=y,foul=f,
                                       musty=m,none=n,pungent=p,spicy=s
     6. gill-attachment (G):           attached=a,descending=d,free=f,notched=n
     7. gill-spacing (H):              close=c,crowded=w,distant=d
     8. gill-size (I):                 broad=b,narrow=n
     9. gill-color (J):                black=k,brown=n,buff=b,chocolate=h,gray=g,
                                       green=r,orange=o,pink=p,purple=u,red=e,
                                       white=w,yellow=y
    10. stalk-shape (K):               enlarging=e,tapering=t
    11. stalk-root (L):                bulbous=b,club=c,cup=u,equal=e,
                                       rhizomorphs=z,rooted=r,missing=?
    12. stalk-surface-above-ring (M):  fibrous=f,scaly=y,silky=k,smooth=s
    13. stalk-surface-below-ring (N):  fibrous=f,scaly=y,silky=k,smooth=s
    14. stalk-color-above-ring (O):    brown=n,buff=b,cinnamon=c,gray=g,orange=o,
                                       pink=p,red=e,white=w,yellow=y
    15. stalk-color-below-ring (P):    brown=n,buff=b,cinnamon=c,gray=g,orange=o,
                                       pink=p,red=e,white=w,yellow=y
    16. veil-type (Q):                 partial=p,universal=u
    17. veil-color (R):                brown=n,orange=o,white=w,yellow=y
    18. ring-number (S):               none=n,one=o,two=t
    19. ring-type (T):                 cobwebby=c,evanescent=e,flaring=f,large=l,
                                       none=n,pendant=p,sheathing=s,zone=z
    20. spore-print-color (U):         black=k,brown=n,buff=b,chocolate=h,green=r,
                                       orange=o,purple=u,white=w,yellow=y
    21. population (V):                abundant=a,clustered=c,numerous=n,
                                       scattered=s,several=v,solitary=y
    22. habitat (W):                   grasses=g,leaves=l,meadows=m,paths=p,
                                       urban=u,waste=w,woods=d


## Download Module

In [None]:
import wget
import os

def download(target_name = 'mushroom.data'):
    """
    Download the mushroom dataset from UCI
    :param target_name: target path name
    """
    if not os.path.exists(target_name):
        url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data'
        wget.download(url, target_name)

download()

## Core Module: Apriori Implementation

In [146]:
class Apriori:

    def __init__(self, dataset, minsup):
        """
        Initialize the class Apriori, including data cleaning and preprocessing
        :param dataset: The mushroom dataset in ndarray
        :param minsup: Minimal support
        """
        del_column = []
        for (i,r) in enumerate(dataset):
            if '?' in r:
                del_column.append(i)
            # Add column numbers (in capital letter) to distinguish the same text in different columns
            for j in range(len(r)):
                dataset[i][j] = '%c'%(65+j) + dataset[i][j]
        
        # Delete the line with the missing data
        self.dataset = np.delete(dataset, del_column, 0)
        self.minsup = minsup
        print('Data after preprocessing and cleaning:\n', self.dataset)
        

    def __prune(self, data_set, Ck):
        """
        Pruning from the candidate itemset
        :param data_set: The mushroom dataset composed of multiple sets
        :param Ck: Candidate itemset of size k
        :return: Frequent itemset of size k and its support list
        """
        count = dict()
        for column in data_set:
            for item in Ck:
                if item.issubset(column):
                    if not item in count:
                        count[item] = 1
                    else:
                        count[item] += 1
        Fk = dict()
        n = len(data_set)
        for key in count:
            if count[key]/n >= self.minsup:
                Fk[key] = count[key]/n
        return Fk

    def __join(self, Fk):
        """
        Self-joining from the frequent itemset
        :param Fk: Frequent itemset of size k
        :return: Candidate itemset of size k
        """
        Ck = []
        n, m = len(Fk), len(list(Fk[0]))
        for i in range(n):
            for j in range(i+1, n):
                Fi, Fj = sorted(list(Fk[i])[:m-1]), sorted(list(Fk[j])[:m-1])
                if Fi == Fj:
                    Ck.append(Fk[i]|Fk[j])
        return Ck

    def main(self):
        """
        Main function 
        :return: Frequent itemset and its support list at each level
        """
        C1 = []
        for item in self.dataset.flat:
            if not [item] in C1:
                C1.append([item]) 
        F = []
        data_set = list(map(set, self.dataset))
        C1 = list(map(frozenset, sorted(C1)))
        F1 = self.__prune(data_set, C1)
        F.append(F1)
        k = 2
        while(len(F[k-2])>0):
            Ck = self.__join(list(map(frozenset,F[k-2].keys())))
            F.append(self.__prune(data_set, Ck))
            k += 1
      
        return F


## Case 1 (minimal support = 0.5)

### Read Data from the File

In [249]:

import numpy as np

dataset_path = 'mushroom.data'
dataset = np.genfromtxt(dataset_path, delimiter=",", dtype='<U2')  
dataset

array([['p', 'x', 's', ..., 'k', 's', 'u'],
       ['e', 'x', 's', ..., 'n', 'n', 'g'],
       ['e', 'b', 's', ..., 'n', 'n', 'm'],
       ...,
       ['e', 'f', 's', ..., 'b', 'c', 'l'],
       ['p', 'k', 'y', ..., 'w', 'v', 'l'],
       ['e', 'x', 's', ..., 'o', 'c', 'l']], dtype='<U2')

### Modifying Data

In [250]:
apriori = Apriori(dataset, 0.5)

Data after preprocessing and cleaning:
 [['Ap' 'Bx' 'Cs' ... 'Uk' 'Vs' 'Wu']
 ['Ae' 'Bx' 'Cs' ... 'Un' 'Vn' 'Wg']
 ['Ae' 'Bb' 'Cs' ... 'Un' 'Vn' 'Wm']
 ...
 ['Ae' 'Bx' 'Cy' ... 'Uw' 'Vy' 'Wp']
 ['Ap' 'Bx' 'Cy' ... 'Uw' 'Vc' 'Wd']
 ['Ap' 'Bf' 'Cy' ... 'Uw' 'Vc' 'Wd']]


### Result

In [251]:
F5 = apriori.main()

for i in range(len(F5)):
    if len(F5[i]) <=0 :
        break
    print(f'size = {i+1}:\n\n')
    for x in F5[i].items():
        print(f'Itemset: {x[0]} sup: {x[1]}')
    print('\n')

}) sup: 0.5088589652728561
Itemset: frozenset({'Rw', 'Gf', 'Ib', 'Qp'}) sup: 0.8720765414599575
Itemset: frozenset({'Gf', 'Ib', 'Qp', 'So'}) sup: 0.8476257973068746
Itemset: frozenset({'Tp', 'Gf', 'Ib', 'Qp'}) sup: 0.5031892274982283
Itemset: frozenset({'Gf', 'Ae', 'Ib', 'Qp'}) sup: 0.5754783841247342
Itemset: frozenset({'Rw', 'Gf', 'Ib', 'So'}) sup: 0.8476257973068746
Itemset: frozenset({'Tp', 'Rw', 'Gf', 'Ib'}) sup: 0.5031892274982283
Itemset: frozenset({'Rw', 'Gf', 'Ae', 'Ib'}) sup: 0.5754783841247342
Itemset: frozenset({'Gf', 'Ae', 'Ib', 'So'}) sup: 0.5669737774627923
Itemset: frozenset({'Qp', 'Rw', 'Ib', 'So'}) sup: 0.8476257973068746
Itemset: frozenset({'Qp', 'Rw', 'Tp', 'Ib'}) sup: 0.5031892274982283
Itemset: frozenset({'Qp', 'Rw', 'Ae', 'Ib'}) sup: 0.5754783841247342
Itemset: frozenset({'Qp', 'Ae', 'Ib', 'So'}) sup: 0.5669737774627923
Itemset: frozenset({'Rw', 'Ae', 'Ib', 'So'}) sup: 0.5669737774627923
Itemset: frozenset({'Kt', 'Rw', 'Gf', 'Qp'}) sup: 0.5102763997165131
Itemset

## Case 2 (minimal support = 0.7)

### Read Data from the File

In [165]:

import numpy as np

dataset_path = 'mushroom.data'
dataset = np.genfromtxt(dataset_path, delimiter=",", dtype='<U2')  
dataset

array([['p', 'x', 's', ..., 'k', 's', 'u'],
       ['e', 'x', 's', ..., 'n', 'n', 'g'],
       ['e', 'b', 's', ..., 'n', 'n', 'm'],
       ...,
       ['e', 'f', 's', ..., 'b', 'c', 'l'],
       ['p', 'k', 'y', ..., 'w', 'v', 'l'],
       ['e', 'x', 's', ..., 'o', 'c', 'l']], dtype='<U2')

### Modifying Data

In [166]:
apriori = Apriori(dataset, 0.7)

Data after preprocessing and cleaning:
 [['Ap' 'Bx' 'Cs' ... 'Uk' 'Vs' 'Wu']
 ['Ae' 'Bx' 'Cs' ... 'Un' 'Vn' 'Wg']
 ['Ae' 'Bb' 'Cs' ... 'Un' 'Vn' 'Wm']
 ...
 ['Ae' 'Bx' 'Cy' ... 'Uw' 'Vy' 'Wp']
 ['Ap' 'Bx' 'Cy' ... 'Uw' 'Vc' 'Wd']
 ['Ap' 'Bf' 'Cy' ... 'Uw' 'Vc' 'Wd']]


### Result

In [167]:
F7 = apriori.main()

for i in range(len(F7)):
    if len(F7[i]) <=0 :
        break
    print(f'size = {i+1}:\n\n')
    for x in F7[i].items():
        print(f'Itemset: {x[0]} sup: {x[1]}')
    print('\n')

size = 1:


Itemset: frozenset({'Gf'}) sup: 0.9968107725017717
Itemset: frozenset({'Hc'}) sup: 0.8185683912119065
Itemset: frozenset({'Qp'}) sup: 1.0
Itemset: frozenset({'Rw'}) sup: 0.9985825655563431
Itemset: frozenset({'So'}) sup: 0.9723600283486888
Itemset: frozenset({'Ib'}) sup: 0.8752657689581856


size = 2:


Itemset: frozenset({'Hc', 'Gf'}) sup: 0.8153791637136782
Itemset: frozenset({'Qp', 'Gf'}) sup: 0.9968107725017717
Itemset: frozenset({'Rw', 'Gf'}) sup: 0.9953933380581148
Itemset: frozenset({'So', 'Gf'}) sup: 0.9723600283486888
Itemset: frozenset({'Hc', 'Qp'}) sup: 0.8185683912119065
Itemset: frozenset({'Hc', 'Rw'}) sup: 0.8185683912119065
Itemset: frozenset({'Hc', 'So'}) sup: 0.7909284195605953
Itemset: frozenset({'Qp', 'Rw'}) sup: 0.9985825655563431
Itemset: frozenset({'Qp', 'So'}) sup: 0.9723600283486888
Itemset: frozenset({'Rw', 'So'}) sup: 0.9709425939050319
Itemset: frozenset({'Ib', 'Gf'}) sup: 0.8720765414599575
Itemset: frozenset({'Hc', 'Ib'}) sup: 0.7391920623671155

## Case 3 (minimal support = 0.9)

### Read Data from the File

In [168]:

import numpy as np

dataset_path = 'mushroom.data'
dataset = np.genfromtxt(dataset_path, delimiter=",", dtype='<U2')  
dataset

array([['p', 'x', 's', ..., 'k', 's', 'u'],
       ['e', 'x', 's', ..., 'n', 'n', 'g'],
       ['e', 'b', 's', ..., 'n', 'n', 'm'],
       ...,
       ['e', 'f', 's', ..., 'b', 'c', 'l'],
       ['p', 'k', 'y', ..., 'w', 'v', 'l'],
       ['e', 'x', 's', ..., 'o', 'c', 'l']], dtype='<U2')

### Modifying Data

In [169]:
apriori = Apriori(dataset, 0.9)

Data after preprocessing and cleaning:
 [['Ap' 'Bx' 'Cs' ... 'Uk' 'Vs' 'Wu']
 ['Ae' 'Bx' 'Cs' ... 'Un' 'Vn' 'Wg']
 ['Ae' 'Bb' 'Cs' ... 'Un' 'Vn' 'Wm']
 ...
 ['Ae' 'Bx' 'Cy' ... 'Uw' 'Vy' 'Wp']
 ['Ap' 'Bx' 'Cy' ... 'Uw' 'Vc' 'Wd']
 ['Ap' 'Bf' 'Cy' ... 'Uw' 'Vc' 'Wd']]


### Result

In [171]:
F9 = apriori.main()

for i in range(len(F9)):
    if len(F9[i]) <=0 :
        break
    print(f'size = {i+1}:\n\n')
    for x in F9[i].items():
        print(f'Itemset: {x[0]} sup: {x[1]}')
    print('\n')

size = 1:


Itemset: frozenset({'Gf'}) sup: 0.9968107725017717
Itemset: frozenset({'Qp'}) sup: 1.0
Itemset: frozenset({'Rw'}) sup: 0.9985825655563431
Itemset: frozenset({'So'}) sup: 0.9723600283486888


size = 2:


Itemset: frozenset({'Qp', 'Gf'}) sup: 0.9968107725017717
Itemset: frozenset({'Rw', 'Gf'}) sup: 0.9953933380581148
Itemset: frozenset({'So', 'Gf'}) sup: 0.9723600283486888
Itemset: frozenset({'Qp', 'Rw'}) sup: 0.9985825655563431
Itemset: frozenset({'Qp', 'So'}) sup: 0.9723600283486888
Itemset: frozenset({'Rw', 'So'}) sup: 0.9709425939050319


size = 3:


Itemset: frozenset({'Qp', 'Rw', 'Gf'}) sup: 0.9953933380581148
Itemset: frozenset({'Qp', 'So', 'Gf'}) sup: 0.9723600283486888
Itemset: frozenset({'Rw', 'So', 'Gf'}) sup: 0.9709425939050319
Itemset: frozenset({'So', 'Qp', 'Rw'}) sup: 0.9709425939050319


size = 4:


Itemset: frozenset({'Rw', 'Gf', 'Qp', 'So'}) sup: 0.9709425939050319




## Data Analysis on Eatable Mushrooms (support=0.5)

### Extracting frequent itemsets containing edible attributes



In [262]:
import numpy as np


edible
for r in F5:
    for c in r.items():
        if 'Ae' in c[0]:
            edible.append(c)

edible

[(frozenset({'Ae'}), 0.6180014174344437),
 (frozenset({'Ae', 'Gf'}), 0.6180014174344437),
 (frozenset({'Ae', 'Ms'}), 0.5428773919206237),
 (frozenset({'Ae', 'Ns'}), 0.5088589652728561),
 (frozenset({'Ae', 'Qp'}), 0.6180014174344437),
 (frozenset({'Ae', 'Rw'}), 0.6180014174344437),
 (frozenset({'Ae', 'So'}), 0.6094968107725017),
 (frozenset({'Ae', 'Ib'}), 0.5754783841247342),
 (frozenset({'Ae', 'Gf', 'Ms'}), 0.5428773919206237),
 (frozenset({'Ae', 'Gf', 'Ns'}), 0.5088589652728561),
 (frozenset({'Ae', 'Gf', 'Qp'}), 0.6180014174344437),
 (frozenset({'Ae', 'Gf', 'Rw'}), 0.6180014174344437),
 (frozenset({'Ae', 'Gf', 'So'}), 0.6094968107725017),
 (frozenset({'Ae', 'Ms', 'Qp'}), 0.5428773919206237),
 (frozenset({'Ae', 'Ms', 'Rw'}), 0.5428773919206237),
 (frozenset({'Ae', 'Ms', 'So'}), 0.5372076541459957),
 (frozenset({'Ae', 'Ns', 'Qp'}), 0.5088589652728561),
 (frozenset({'Ae', 'Ns', 'Rw'}), 0.5088589652728561),
 (frozenset({'Ae', 'Ns', 'So'}), 0.5031892274982283),
 (frozenset({'Ae', 'Qp', 'Rw

In [271]:
t30_edible = sorted(edible, key=lambda x: x[1], reverse=True)[:30]


t30_edible


[(frozenset({'Ae'}), 0.6180014174344437),
 (frozenset({'Ae', 'Gf'}), 0.6180014174344437),
 (frozenset({'Ae', 'Qp'}), 0.6180014174344437),
 (frozenset({'Ae', 'Rw'}), 0.6180014174344437),
 (frozenset({'Ae', 'Gf', 'Qp'}), 0.6180014174344437),
 (frozenset({'Ae', 'Gf', 'Rw'}), 0.6180014174344437),
 (frozenset({'Ae', 'Qp', 'Rw'}), 0.6180014174344437),
 (frozenset({'Ae', 'Gf', 'Qp', 'Rw'}), 0.6180014174344437),
 (frozenset({'Ae'}), 0.6180014174344437),
 (frozenset({'Ae', 'Gf'}), 0.6180014174344437),
 (frozenset({'Ae', 'Qp'}), 0.6180014174344437),
 (frozenset({'Ae', 'Rw'}), 0.6180014174344437),
 (frozenset({'Ae', 'Gf', 'Qp'}), 0.6180014174344437),
 (frozenset({'Ae', 'Gf', 'Rw'}), 0.6180014174344437),
 (frozenset({'Ae', 'Qp', 'Rw'}), 0.6180014174344437),
 (frozenset({'Ae', 'Gf', 'Qp', 'Rw'}), 0.6180014174344437),
 (frozenset({'Ae', 'So'}), 0.6094968107725017),
 (frozenset({'Ae', 'Gf', 'So'}), 0.6094968107725017),
 (frozenset({'Ae', 'Qp', 'So'}), 0.6094968107725017),
 (frozenset({'Ae', 'Rw', 'So

In [272]:
benchmark = 0.6180014174344437

for r in t30_edible:
    print(f'Itemset: {r[0]} Confidence: {r[1]/benchmark}\n')


Itemset: frozenset({'Ae'}) Confidence: 1.0

Itemset: frozenset({'Ae', 'Gf'}) Confidence: 1.0

Itemset: frozenset({'Qp', 'Ae'}) Confidence: 1.0

Itemset: frozenset({'Rw', 'Ae'}) Confidence: 1.0

Itemset: frozenset({'Ae', 'Qp', 'Gf'}) Confidence: 1.0

Itemset: frozenset({'Ae', 'Rw', 'Gf'}) Confidence: 1.0

Itemset: frozenset({'Qp', 'Rw', 'Ae'}) Confidence: 1.0

Itemset: frozenset({'Rw', 'Gf', 'Ae', 'Qp'}) Confidence: 1.0

Itemset: frozenset({'Ae'}) Confidence: 1.0

Itemset: frozenset({'Ae', 'Gf'}) Confidence: 1.0

Itemset: frozenset({'Qp', 'Ae'}) Confidence: 1.0

Itemset: frozenset({'Rw', 'Ae'}) Confidence: 1.0

Itemset: frozenset({'Ae', 'Qp', 'Gf'}) Confidence: 1.0

Itemset: frozenset({'Ae', 'Rw', 'Gf'}) Confidence: 1.0

Itemset: frozenset({'Qp', 'Rw', 'Ae'}) Confidence: 1.0

Itemset: frozenset({'Rw', 'Gf', 'Ae', 'Qp'}) Confidence: 1.0

Itemset: frozenset({'So', 'Ae'}) Confidence: 0.9862385321100916

Itemset: frozenset({'Ae', 'So', 'Gf'}) Confidence: 0.9862385321100916

Itemset: frozens