# Apriori

Date: 24/06/2022

Aim: To find association rules of given datasets using Apriori algorithm

All subsets of a frequent itemset must be frequent(Apriori property).
If an itemset is infrequent, all its supersets will be infrequent.

* Step-1: Determine the support of itemsets in the transactional database, and select the minimum support and confidence.

* Step-2: Take all supports in the transaction with higher support value than the minimum or selected support value.

* Step-3: Find all the rules of these subsets that have higher confidence value than the threshold or minimum confidence.

* Step-4: Sort the rules as the decreasing order of lift.



In [None]:
import pandas as pd
import numpy as np

In [None]:
pip install apyori

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting apyori
  Downloading apyori-1.1.2.tar.gz (8.6 kB)
Building wheels for collected packages: apyori
  Building wheel for apyori (setup.py) ... [?25l[?25hdone
  Created wheel for apyori: filename=apyori-1.1.2-py3-none-any.whl size=5974 sha256=38a0faea3a655c508a32fa87d83bd775a87ad42eb6cc7da806a106b51561f684
  Stored in directory: /root/.cache/pip/wheels/cb/f6/e1/57973c631d27efd1a2f375bd6a83b2a616c4021f24aab84080
Successfully built apyori
Installing collected packages: apyori
Successfully installed apyori-1.1.2


In [None]:
from apyori import apriori

In [None]:
df= pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Marketbasket.csv.csv", header=None)

In [None]:
df

Unnamed: 0,0,1,2,3,4,5
0,Wine,Chips,Bread,Butter,Milk,Apple
1,Wine,,Bread,Butter,Milk,
2,,,Bread,Butter,Milk,
3,,Chips,,,,Apple
4,Wine,Chips,Bread,Butter,Milk,Apple
5,Wine,Chips,,,Milk,
6,Wine,Chips,Bread,Butter,,Apple
7,Wine,Chips,,,Milk,
8,Wine,,Bread,,,Apple
9,Wine,,Bread,Butter,Milk,


Data preprocessing

In [None]:
records=[]
for i in range(0,22):
  records.append([str(df.values[i,j]) for j in range(0,6)])

In [None]:
records

[['Wine', 'Chips', 'Bread', 'Butter', 'Milk', 'Apple'],
 ['Wine', 'nan', 'Bread', 'Butter', 'Milk', 'nan'],
 ['nan', 'nan', 'Bread', 'Butter', 'Milk', 'nan'],
 ['nan', 'Chips', 'nan', 'nan', 'nan', 'Apple'],
 ['Wine', 'Chips', 'Bread', 'Butter', 'Milk', 'Apple'],
 ['Wine', 'Chips', 'nan', 'nan', 'Milk', 'nan'],
 ['Wine', 'Chips', 'Bread', 'Butter', 'nan', 'Apple'],
 ['Wine', 'Chips', 'nan', 'nan', 'Milk', 'nan'],
 ['Wine', 'nan', 'Bread', 'nan', 'nan', 'Apple'],
 ['Wine', 'nan', 'Bread', 'Butter', 'Milk', 'nan'],
 ['nan', 'Chips', 'Bread', 'Butter', 'nan', 'Apple'],
 ['Wine', 'nan', 'nan', 'Butter', 'Milk', 'Apple'],
 ['Wine', 'Chips', 'Bread', 'Butter', 'Milk', 'nan'],
 ['Wine', 'nan', 'Bread', 'nan', 'Milk', 'Apple'],
 ['Wine', 'nan', 'Bread', 'Butter', 'Milk', 'Apple'],
 ['Wine', 'Chips', 'Bread', 'Butter', 'Milk', 'Apple'],
 ['nan', 'Chips', 'Bread', 'Butter', 'Milk', 'Apple'],
 ['nan', 'Chips', 'nan', 'Butter', 'Milk', 'Apple'],
 ['Wine', 'Chips', 'Bread', 'Butter', 'Milk', 'Apple

Applying Apriori

In [None]:
assn_rule_1=apriori(records, min_support=0.50, min_confidence=0.7, min_lift=1.2, min_length=2)
assn_results=list(assn_rule_1)

In [None]:
print(len(assn_results))


1


In [None]:
print(assn_results)


[RelationRecord(items=frozenset({'Butter', 'Milk', 'Bread'}), support=0.5, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Butter'}), items_add=frozenset({'Bread', 'Milk'}), confidence=0.7333333333333334, lift=1.241025641025641), OrderedStatistic(items_base=frozenset({'Milk', 'Bread'}), items_add=frozenset({'Butter'}), confidence=0.8461538461538461, lift=1.241025641025641)])]


In [None]:
assn_rule_2=apriori(records, min_support=0.40, min_confidence=0.8, min_lift=1.2, min_length=2)
assn_results=list(assn_rule_2)

In [None]:
print(len(assn_results))

5


In [None]:
print(assn_results)

[RelationRecord(items=frozenset({'Apple', 'Wine', 'Bread'}), support=0.45454545454545453, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Apple', 'Wine'}), items_add=frozenset({'Bread'}), confidence=0.9090909090909091, lift=1.25)]), RelationRecord(items=frozenset({'Apple', 'Butter', 'Milk'}), support=0.4090909090909091, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Apple', 'Milk'}), items_add=frozenset({'Butter'}), confidence=0.8181818181818182, lift=1.2000000000000002)]), RelationRecord(items=frozenset({'Butter', 'Milk', 'Bread'}), support=0.5, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Milk', 'Bread'}), items_add=frozenset({'Butter'}), confidence=0.8461538461538461, lift=1.241025641025641)]), RelationRecord(items=frozenset({'Butter', 'Wine', 'Bread'}), support=0.45454545454545453, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Butter', 'Wine'}), items_add=frozenset({'Bread'}), confidence=0.9090909090909091, lift=1.25)]), Relatio

Example 2

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/dataset.csv", header=None)

In [None]:
df

Unnamed: 0,0,1,2
0,Hot dogs,Buns,Ketchup
1,Hot dogs,Buns,
2,Hot dogs,Coke,Chips
3,Chips,Ketchup,
4,Hot dogs,Coke,Chips


Data Preprocessing

In [None]:
records=[]
[records.append([str(df.values[i,j]) for j in range(0,2)]) for i in range(0,5)]

print(records)

[['Hot dogs', 'Buns'], ['Hot dogs', 'Buns'], ['Hot dogs', 'Coke'], ['Chips', 'Ketchup'], ['Hot dogs', 'Coke']]


Applying Apriori


In [None]:
assn_rule_1=apriori(records, min_support=0.2, min_confidence=0.5, min_lift=1.2, min_length=3)
assn_results=list(assn_rule_1)

In [None]:
print(assn_results)

[RelationRecord(items=frozenset({'Hot dogs', 'Buns'}), support=0.4, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Buns'}), items_add=frozenset({'Hot dogs'}), confidence=1.0, lift=1.25), OrderedStatistic(items_base=frozenset({'Hot dogs'}), items_add=frozenset({'Buns'}), confidence=0.5, lift=1.25)]), RelationRecord(items=frozenset({'Ketchup', 'Chips'}), support=0.2, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Chips'}), items_add=frozenset({'Ketchup'}), confidence=1.0, lift=5.0), OrderedStatistic(items_base=frozenset({'Ketchup'}), items_add=frozenset({'Chips'}), confidence=1.0, lift=5.0)]), RelationRecord(items=frozenset({'Coke', 'Hot dogs'}), support=0.4, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Coke'}), items_add=frozenset({'Hot dogs'}), confidence=1.0, lift=1.25), OrderedStatistic(items_base=frozenset({'Hot dogs'}), items_add=frozenset({'Coke'}), confidence=0.5, lift=1.25)])]


In [None]:
assn_rule_2=apriori(records, min_support=0.4, min_confidence=0.5, min_lift=1.2, min_length=2)
results=list(assn_rule_2)
results

[RelationRecord(items=frozenset({'Hot dogs', 'Buns'}), support=0.4, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Buns'}), items_add=frozenset({'Hot dogs'}), confidence=1.0, lift=1.25), OrderedStatistic(items_base=frozenset({'Hot dogs'}), items_add=frozenset({'Buns'}), confidence=0.5, lift=1.25)]),
 RelationRecord(items=frozenset({'Coke', 'Hot dogs'}), support=0.4, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Coke'}), items_add=frozenset({'Hot dogs'}), confidence=1.0, lift=1.25), OrderedStatistic(items_base=frozenset({'Hot dogs'}), items_add=frozenset({'Coke'}), confidence=0.5, lift=1.25)])]