In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
!pip install mlxtend



In [3]:
from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

In [4]:
movies=pd.read_csv("my_movies.csv")

In [5]:
movies

Unnamed: 0,V1,V2,V3,V4,V5,Sixth Sense,Gladiator,LOTR1,Harry Potter1,Patriot,LOTR2,Harry Potter2,LOTR,Braveheart,Green Mile
0,Sixth Sense,LOTR1,Harry Potter1,Green Mile,LOTR2,1,0,1,1,0,1,0,0,0,1
1,Gladiator,Patriot,Braveheart,,,0,1,0,0,1,0,0,0,1,0
2,LOTR1,LOTR2,,,,0,0,1,0,0,1,0,0,0,0
3,Gladiator,Patriot,Sixth Sense,,,1,1,0,0,1,0,0,0,0,0
4,Gladiator,Patriot,Sixth Sense,,,1,1,0,0,1,0,0,0,0,0
5,Gladiator,Patriot,Sixth Sense,,,1,1,0,0,1,0,0,0,0,0
6,Harry Potter1,Harry Potter2,,,,0,0,0,1,0,0,1,0,0,0
7,Gladiator,Patriot,,,,0,1,0,0,1,0,0,0,0,0
8,Gladiator,Patriot,Sixth Sense,,,1,1,0,0,1,0,0,0,0,0
9,Sixth Sense,LOTR,Gladiator,Green Mile,,1,1,0,0,0,0,0,1,0,1


## Pre-Processing
### As the data is not in transaction formation, We are using Transaction Encoder

In [6]:
movies1=pd.get_dummies(movies)
movies1.head()

Unnamed: 0,Sixth Sense,Gladiator,LOTR1,Harry Potter1,Patriot,LOTR2,Harry Potter2,LOTR,Braveheart,Green Mile,...,V2_LOTR,V2_LOTR1,V2_LOTR2,V2_Patriot,V3_Braveheart,V3_Gladiator,V3_Harry Potter1,V3_Sixth Sense,V4_Green Mile,V5_LOTR2
0,1,0,1,1,0,1,0,0,0,1,...,0,1,0,0,0,0,1,0,1,1
1,0,1,0,0,1,0,0,0,1,0,...,0,0,0,1,1,0,0,0,0,0
2,0,0,1,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,1,1,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
4,1,1,0,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0


## Apriori Algorithm

In [7]:
frequent_itemsets = apriori(movies1, min_support=0.1, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.6,(Sixth Sense)
1,0.7,(Gladiator)
2,0.2,(LOTR1)
3,0.2,(Harry Potter1)
4,0.6,(Patriot)
...,...,...
1392,0.1,"(Harry Potter1, V1_Sixth Sense, V2_LOTR1, V5_L..."
1393,0.1,"(V1_Sixth Sense, V2_LOTR1, V5_LOTR2, Sixth Sen..."
1394,0.1,"(Harry Potter1, V1_Sixth Sense, V2_LOTR1, V5_L..."
1395,0.1,"(Harry Potter1, V1_Sixth Sense, V2_LOTR1, V5_L..."


In [8]:
rules = association_rules(frequent_itemsets,metric="lift",min_threshold=0.7)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Gladiator),(Sixth Sense),0.7,0.6,0.5,0.714286,1.190476,0.08,1.40
1,(Sixth Sense),(Gladiator),0.6,0.7,0.5,0.833333,1.190476,0.08,1.80
2,(LOTR1),(Sixth Sense),0.2,0.6,0.1,0.500000,0.833333,-0.02,0.80
3,(Sixth Sense),(LOTR1),0.6,0.2,0.1,0.166667,0.833333,-0.02,0.96
4,(Harry Potter1),(Sixth Sense),0.2,0.6,0.1,0.500000,0.833333,-0.02,0.80
...,...,...,...,...,...,...,...,...,...
64247,(V3_Harry Potter1),"(Harry Potter1, V1_Sixth Sense, V2_LOTR1, V5_L...",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
64248,(V4_Green Mile),"(Harry Potter1, V1_Sixth Sense, V2_LOTR1, V5_L...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.80
64249,(LOTR2),"(Harry Potter1, V1_Sixth Sense, V2_LOTR1, V5_L...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.80
64250,(Green Mile),"(Harry Potter1, V1_Sixth Sense, V2_LOTR1, V5_L...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.80


### An leverage value of 0 indicates independence. Range will be[-1,1]
####  A high conviction value means that the consequent is highly depending on the antecedent and range[0 inf]

In [9]:
rules.sort_values('lift',ascending = False)[0:20]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
32126,"(Harry Potter1, V3_Harry Potter1, Green Mile, ...","(LOTR1, V5_LOTR2, Sixth Sense)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33438,"(LOTR1, LOTR2, Sixth Sense)","(Harry Potter1, V4_Green Mile, V2_LOTR1, V5_LO...",0.1,0.1,0.1,1.0,10.0,0.09,inf
33416,"(Harry Potter1, LOTR1, Sixth Sense)","(V4_Green Mile, V2_LOTR1, V5_LOTR2, LOTR2)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33417,"(Harry Potter1, V4_Green Mile, LOTR2)","(LOTR1, V2_LOTR1, V5_LOTR2, Sixth Sense)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33418,"(Harry Potter1, V4_Green Mile, LOTR1)","(LOTR2, V2_LOTR1, V5_LOTR2, Sixth Sense)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33419,"(Harry Potter1, LOTR1, LOTR2)","(V4_Green Mile, V2_LOTR1, V5_LOTR2, Sixth Sense)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33420,"(V2_LOTR1, V5_LOTR2, Sixth Sense)","(Harry Potter1, LOTR1, V4_Green Mile, LOTR2)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33421,"(V4_Green Mile, V2_LOTR1, V5_LOTR2)","(Harry Potter1, LOTR1, LOTR2, Sixth Sense)",0.1,0.1,0.1,1.0,10.0,0.09,inf
33422,"(V2_LOTR1, V5_LOTR2, LOTR2)","(Harry Potter1, V4_Green Mile, LOTR1, Sixth Se...",0.1,0.1,0.1,1.0,10.0,0.09,inf
33423,"(LOTR1, V2_LOTR1, V5_LOTR2)","(Harry Potter1, V4_Green Mile, LOTR2, Sixth Se...",0.1,0.1,0.1,1.0,10.0,0.09,inf


In [10]:
rules[rules.lift>1]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Gladiator),(Sixth Sense),0.7,0.6,0.5,0.714286,1.190476,0.08,1.4
1,(Sixth Sense),(Gladiator),0.6,0.7,0.5,0.833333,1.190476,0.08,1.8
6,(Patriot),(Sixth Sense),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2
7,(Sixth Sense),(Patriot),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2
10,(LOTR),(Sixth Sense),0.1,0.6,0.1,1.000000,1.666667,0.04,inf
...,...,...,...,...,...,...,...,...,...
64247,(V3_Harry Potter1),"(Harry Potter1, V1_Sixth Sense, V2_LOTR1, V5_L...",0.1,0.1,0.1,1.000000,10.000000,0.09,inf
64248,(V4_Green Mile),"(Harry Potter1, V1_Sixth Sense, V2_LOTR1, V5_L...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.8
64249,(LOTR2),"(Harry Potter1, V1_Sixth Sense, V2_LOTR1, V5_L...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.8
64250,(Green Mile),"(Harry Potter1, V1_Sixth Sense, V2_LOTR1, V5_L...",0.2,0.1,0.1,0.500000,5.000000,0.08,1.8


# For Book Dataset

In [11]:
book=pd.read_csv("book.csv")
book

Unnamed: 0,ChildBks,YouthBks,CookBks,DoItYBks,RefBks,ArtBks,GeogBks,ItalCook,ItalAtlas,ItalArt,Florence
0,0,1,0,1,0,0,1,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0
3,1,1,1,0,1,0,1,0,0,0,0
4,0,0,1,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
1995,0,0,1,0,0,1,1,1,0,1,1
1996,0,0,0,0,0,0,0,0,0,0,0
1997,0,0,0,0,0,0,0,0,0,0,0
1998,0,0,1,0,0,0,0,0,0,0,0


## Pre-Processing
#### As the data is not in transaction formation, We are using transaction Encoder

In [12]:
book1=pd.get_dummies(book)
book1.head()

Unnamed: 0,ChildBks,YouthBks,CookBks,DoItYBks,RefBks,ArtBks,GeogBks,ItalCook,ItalAtlas,ItalArt,Florence
0,0,1,0,1,0,0,1,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0
3,1,1,1,0,1,0,1,0,0,0,0
4,0,0,1,0,0,0,1,0,0,0,0


## Apriori Algorithm

In [13]:
frequent_itemsets = apriori(book1, min_support=0.1, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.423,(ChildBks)
1,0.2475,(YouthBks)
2,0.431,(CookBks)
3,0.282,(DoItYBks)
4,0.2145,(RefBks)
5,0.241,(ArtBks)
6,0.276,(GeogBks)
7,0.1135,(ItalCook)
8,0.1085,(Florence)
9,0.165,"(ChildBks, YouthBks)"


In [14]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.7)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(ChildBks),(YouthBks),0.4230,0.2475,0.1650,0.390071,1.576044,0.060308,1.233750
1,(YouthBks),(ChildBks),0.2475,0.4230,0.1650,0.666667,1.576044,0.060308,1.731000
2,(CookBks),(ChildBks),0.4310,0.4230,0.2560,0.593968,1.404179,0.073687,1.421069
3,(ChildBks),(CookBks),0.4230,0.4310,0.2560,0.605201,1.404179,0.073687,1.441240
4,(ChildBks),(DoItYBks),0.4230,0.2820,0.1840,0.434988,1.542511,0.064714,1.270770
...,...,...,...,...,...,...,...,...,...
95,"(CookBks, GeogBks)",(ArtBks),0.1925,0.2410,0.1035,0.537662,2.230964,0.057107,1.641657
96,"(ArtBks, GeogBks)",(CookBks),0.1275,0.4310,0.1035,0.811765,1.883445,0.048547,3.022812
97,(CookBks),"(ArtBks, GeogBks)",0.4310,0.1275,0.1035,0.240139,1.883445,0.048547,1.148237
98,(ArtBks),"(CookBks, GeogBks)",0.2410,0.1925,0.1035,0.429461,2.230964,0.057107,1.415327


### An leverage value of 0 indicates independence. Range will be[-1,1]
####  A high conviction value means that the consequent is highly depending on the antecedent and range[0 inf]

In [15]:
rules.sort_values('lift',ascending = False)[0:20]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
28,(CookBks),(ItalCook),0.431,0.1135,0.1135,0.263341,2.320186,0.064582,1.203406
29,(ItalCook),(CookBks),0.1135,0.431,0.1135,1.0,2.320186,0.064582,inf
76,"(ChildBks, ArtBks)",(GeogBks),0.1625,0.276,0.102,0.627692,2.274247,0.05715,1.944628
81,(GeogBks),"(ChildBks, ArtBks)",0.276,0.1625,0.102,0.369565,2.274247,0.05715,1.328448
87,(ArtBks),"(CookBks, DoItYBks)",0.241,0.1875,0.1015,0.421162,2.246196,0.056313,1.403674
82,"(CookBks, DoItYBks)",(ArtBks),0.1875,0.241,0.1015,0.541333,2.246196,0.056313,1.654797
99,(GeogBks),"(CookBks, ArtBks)",0.276,0.167,0.1035,0.375,2.245509,0.057408,1.3328
94,"(CookBks, ArtBks)",(GeogBks),0.167,0.276,0.1035,0.61976,2.245509,0.057408,1.904063
95,"(CookBks, GeogBks)",(ArtBks),0.1925,0.241,0.1035,0.537662,2.230964,0.057107,1.641657
98,(ArtBks),"(CookBks, GeogBks)",0.241,0.1925,0.1035,0.429461,2.230964,0.057107,1.415327


In [16]:
rules[rules.lift>1]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(ChildBks),(YouthBks),0.4230,0.2475,0.1650,0.390071,1.576044,0.060308,1.233750
1,(YouthBks),(ChildBks),0.2475,0.4230,0.1650,0.666667,1.576044,0.060308,1.731000
2,(CookBks),(ChildBks),0.4310,0.4230,0.2560,0.593968,1.404179,0.073687,1.421069
3,(ChildBks),(CookBks),0.4230,0.4310,0.2560,0.605201,1.404179,0.073687,1.441240
4,(ChildBks),(DoItYBks),0.4230,0.2820,0.1840,0.434988,1.542511,0.064714,1.270770
...,...,...,...,...,...,...,...,...,...
95,"(CookBks, GeogBks)",(ArtBks),0.1925,0.2410,0.1035,0.537662,2.230964,0.057107,1.641657
96,"(ArtBks, GeogBks)",(CookBks),0.1275,0.4310,0.1035,0.811765,1.883445,0.048547,3.022812
97,(CookBks),"(ArtBks, GeogBks)",0.4310,0.1275,0.1035,0.240139,1.883445,0.048547,1.148237
98,(ArtBks),"(CookBks, GeogBks)",0.2410,0.1925,0.1035,0.429461,2.230964,0.057107,1.415327
