# Association Analysis Assignment 
# Sanjay Gupta
# Date: 20-August-2021

# Importing Libraries

In [1]:
# Install the Libraries if below are not installed
# !pip install apriori
# !pip install mlxtend

# Load the Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth, fpmax
from mlxtend.preprocessing import TransactionEncoder

# Open the Assignment Question in Separeate Window

In [2]:
# Install Library related
#!pip install pillow

# importing Image class from PIL package 
import urllib.request
from PIL import Image 
urllib.request.urlretrieve("https://raw.githubusercontent.com/sanjaygupta1963/Pythoncoding/main/Assignment_1.jpg","sample.png")

# creating a object 
img = Image.open("sample.png")
img.show()

# Load the Dataset

In [3]:
#specify URL where data is located
url = 'https://raw.githubusercontent.com/sanjaygupta1963/Pythoncoding/main/Assignment_1.csv'

# Load Dataset from the Github URL
df = pd.read_csv(url)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   Transaction_ID  11 non-null     int64
 1   Item_1          11 non-null     int64
 2   Item_2          11 non-null     int64
 3   Item_3          11 non-null     int64
 4   Item_4          11 non-null     int64
 5   Item_5          11 non-null     int64
dtypes: int64(6)
memory usage: 656.0 bytes


In [5]:
df.head()

Unnamed: 0,Transaction_ID,Item_1,Item_2,Item_3,Item_4,Item_5
0,1,1,1,1,1,1
1,2,0,1,0,1,0
2,3,1,1,1,0,1
3,4,0,1,0,0,0
4,5,0,1,1,1,1


In [6]:
# Exploring the columns of the dataframe
df.columns

Index(['Transaction_ID', 'Item_1', 'Item_2', 'Item_3', 'Item_4', 'Item_5'], dtype='object')

# Applying the Apriori Algorithm

In [7]:
# Dropping Transaction ID Column from the Dataframe
df.drop(["Transaction_ID"], axis=1, inplace=True)

# Applying the Apriori Algorithm
frq_items = apriori(df, min_support=0.5,use_colnames=True)
frq_items

Unnamed: 0,support,itemsets
0,0.636364,(Item_1)
1,0.818182,(Item_2)
2,0.636364,(Item_3)
3,0.636364,(Item_5)
4,0.545455,"(Item_1, Item_5)"
5,0.545455,"(Item_2, Item_3)"
6,0.545455,"(Item_2, Item_5)"
7,0.545455,"(Item_3, Item_5)"


In [8]:
# Get frequent itemsets from a DataFrame
fpgrowth(df, min_support=0.5, use_colnames=False, max_len=None, verbose=0)

Unnamed: 0,support,itemsets
0,0.818182,(1)
1,0.636364,(4)
2,0.636364,(2)
3,0.636364,(0)
4,0.545455,"(1, 4)"
5,0.545455,"(2, 4)"
6,0.545455,"(1, 2)"
7,0.545455,"(0, 4)"


In [9]:
# Get maximal frequent itemsets from a DataFrame
fpmax(df, min_support=0.5, use_colnames=False, max_len=None, verbose=0)

Unnamed: 0,support,itemsets
0,0.545455,"(0, 4)"
1,0.545455,"(1, 2)"
2,0.545455,"(2, 4)"
3,0.545455,"(1, 4)"


In [10]:
rules = association_rules(frq_items, metric = "confidence", min_threshold = 0.1)
rules.sort_values('confidence', ascending = False, inplace = True)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Item_1),(Item_5),0.636364,0.636364,0.545455,0.857143,1.346939,0.140496,2.545455
1,(Item_5),(Item_1),0.636364,0.636364,0.545455,0.857143,1.346939,0.140496,2.545455
3,(Item_3),(Item_2),0.636364,0.818182,0.545455,0.857143,1.047619,0.024793,1.272727
5,(Item_5),(Item_2),0.636364,0.818182,0.545455,0.857143,1.047619,0.024793,1.272727
6,(Item_3),(Item_5),0.636364,0.636364,0.545455,0.857143,1.346939,0.140496,2.545455
7,(Item_5),(Item_3),0.636364,0.636364,0.545455,0.857143,1.346939,0.140496,2.545455
2,(Item_2),(Item_3),0.818182,0.636364,0.545455,0.666667,1.047619,0.024793,1.090909
4,(Item_2),(Item_5),0.818182,0.636364,0.545455,0.666667,1.047619,0.024793,1.090909


In [11]:
# For Association Rule (Item_1,Item_2==>>Item_3), the dataset will change
df12 = df
df12['Item_1'] = df12['Item_1'].replace(0,1)
df12['Item_2'] = df12['Item_2'].replace(0,1)
df12

Unnamed: 0,Item_1,Item_2,Item_3,Item_4,Item_5
0,1,1,1,1,1
1,1,1,0,1,0
2,1,1,1,0,1
3,1,1,0,0,0
4,1,1,1,1,1
5,1,1,0,0,0
6,1,1,0,1,1
7,1,1,1,0,1
8,1,1,1,0,1
9,1,1,1,0,0


In [12]:
# Applying the Apriori Algorithm
frq_items = apriori(df12, min_support=0.5,use_colnames=True)
frq_items

Unnamed: 0,support,itemsets
0,1.0,(Item_1)
1,1.0,(Item_2)
2,0.636364,(Item_3)
3,0.636364,(Item_5)
4,1.0,"(Item_2, Item_1)"
5,0.636364,"(Item_3, Item_1)"
6,0.636364,"(Item_1, Item_5)"
7,0.636364,"(Item_2, Item_3)"
8,0.636364,"(Item_2, Item_5)"
9,0.545455,"(Item_3, Item_5)"


In [13]:
rules = association_rules(frq_items, metric = "confidence", min_threshold = 0.1)
rules.sort_values('confidence', ascending = False, inplace = True)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Item_2),(Item_1),1.0,1.0,1.0,1.0,1.0,0.0,inf
12,"(Item_2, Item_3)",(Item_1),0.636364,1.0,0.636364,1.0,1.0,0.0,inf
44,"(Item_3, Item_5)","(Item_2, Item_1)",0.545455,1.0,0.545455,1.0,1.0,0.0,inf
39,"(Item_3, Item_1, Item_5)",(Item_2),0.545455,1.0,0.545455,1.0,1.0,0.0,inf
37,"(Item_2, Item_3, Item_5)",(Item_1),0.545455,1.0,0.545455,1.0,1.0,0.0,inf
32,"(Item_3, Item_5)",(Item_2),0.545455,1.0,0.545455,1.0,1.0,0.0,inf
1,(Item_1),(Item_2),1.0,1.0,1.0,1.0,1.0,0.0,inf
23,(Item_5),"(Item_2, Item_1)",0.636364,1.0,0.636364,1.0,1.0,0.0,inf
20,"(Item_1, Item_5)",(Item_2),0.636364,1.0,0.636364,1.0,1.0,0.0,inf
19,"(Item_2, Item_5)",(Item_1),0.636364,1.0,0.636364,1.0,1.0,0.0,inf


In [14]:
# Set Filter based on the Given Association Rule in the Question
rules[rules['antecedents'] == {'Item_2','Item_1'} ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
18,"(Item_2, Item_1)",(Item_5),1.0,0.636364,0.636364,0.636364,1.0,0.0,1.0
13,"(Item_2, Item_1)",(Item_3),1.0,0.636364,0.636364,0.636364,1.0,0.0,1.0
41,"(Item_2, Item_1)","(Item_3, Item_5)",1.0,0.545455,0.545455,0.545455,1.0,0.0,1.0


In [15]:
# Load Dataset from the Github URL
df = pd.read_csv(url)

# Dropping Transaction ID Column from the Dataframe
df.drop(["Transaction_ID"], axis=1, inplace=True)

# For Association Rule (Item_3,Item_5==>>Item_2), the dataset will change
df35 = df
df35['Item_3'] = df35['Item_3'].replace(0,1)
df35['Item_5'] = df35['Item_5'].replace(0,1)
df35


Unnamed: 0,Item_1,Item_2,Item_3,Item_4,Item_5
0,1,1,1,1,1
1,0,1,1,1,1
2,1,1,1,0,1
3,0,1,1,0,1
4,0,1,1,1,1
5,1,0,1,0,1
6,1,1,1,1,1
7,1,0,1,0,1
8,1,1,1,0,1
9,0,1,1,0,1


In [16]:
# Applying the Apriori Algorithm
frq_items = apriori(df35, min_support=0.5,use_colnames=True)
frq_items

Unnamed: 0,support,itemsets
0,0.636364,(Item_1)
1,0.818182,(Item_2)
2,1.0,(Item_3)
3,1.0,(Item_5)
4,0.636364,"(Item_3, Item_1)"
5,0.636364,"(Item_1, Item_5)"
6,0.818182,"(Item_2, Item_3)"
7,0.818182,"(Item_2, Item_5)"
8,1.0,"(Item_3, Item_5)"
9,0.636364,"(Item_3, Item_1, Item_5)"


In [17]:
rules = association_rules(frq_items, metric = "confidence", min_threshold = 0.1)
rules.sort_values('confidence', ascending = False, inplace = True)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
8,(Item_3),(Item_5),1.0,1.0,1.0,1.0,1.0,0.0,inf
17,"(Item_2, Item_5)",(Item_3),0.818182,1.0,0.818182,1.0,1.0,0.0,inf
14,(Item_1),"(Item_3, Item_5)",0.636364,1.0,0.636364,1.0,1.0,0.0,inf
19,(Item_2),"(Item_3, Item_5)",0.818182,1.0,0.818182,1.0,1.0,0.0,inf
12,"(Item_1, Item_5)",(Item_3),0.636364,1.0,0.636364,1.0,1.0,0.0,inf
1,(Item_1),(Item_3),0.636364,1.0,0.636364,1.0,1.0,0.0,inf
10,"(Item_3, Item_1)",(Item_5),0.636364,1.0,0.636364,1.0,1.0,0.0,inf
9,(Item_5),(Item_3),1.0,1.0,1.0,1.0,1.0,0.0,inf
16,"(Item_2, Item_3)",(Item_5),0.818182,1.0,0.818182,1.0,1.0,0.0,inf
6,(Item_2),(Item_5),0.818182,1.0,0.818182,1.0,1.0,0.0,inf


In [18]:
# Set Filter based on the Given Association Rule in the Question
rules[rules['antecedents'] == {'Item_3','Item_5'} ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
18,"(Item_3, Item_5)",(Item_2),1.0,0.818182,0.818182,0.818182,1.0,0.0,1.0
11,"(Item_3, Item_5)",(Item_1),1.0,0.636364,0.636364,0.636364,1.0,0.0,1.0
