## Task 2.1 - Unsupervised Learning - Association Rule Mining - Binary Classification

In this task we will **use Association Rule Mining to find associations between the features and the target Adoption.**

In [1]:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import warnings
# from apyori import apriori
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from sklearn import preprocessing
import category_encoders as ce

## Read the preprocessed dataset

In [2]:
PetFinder_dataset = pd.read_csv("PetFinder_dataset_pp.csv")

In [3]:
PetFinder_dataset.shape

(12987, 28)

In [4]:
PetFinder_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12987 entries, 0 to 12986
Data columns (total 28 columns):
Type                    12987 non-null object
Age                     12987 non-null int64
Gender                  12987 non-null object
MaturitySize            12987 non-null object
FurLength               12987 non-null object
Vaccinated              12987 non-null object
Dewormed                12987 non-null object
Sterilized              12987 non-null object
Health                  12987 non-null object
Quantity                12987 non-null int64
Fee                     12987 non-null int64
State                   12987 non-null object
PhotoAmt                12987 non-null float64
Hasname                 12987 non-null int64
Breed                   12987 non-null object
Color                   12987 non-null object
Polarity                12987 non-null float64
Subjectivity            12987 non-null float64
DescWords               12987 non-null int64
AgeBin             

In [5]:
PetFinder_dataset.columns

Index(['Type', 'Age', 'Gender', 'MaturitySize', 'FurLength', 'Vaccinated',
       'Dewormed', 'Sterilized', 'Health', 'Quantity', 'Fee', 'State',
       'PhotoAmt', 'Hasname', 'Breed', 'Color', 'Polarity', 'Subjectivity',
       'DescWords', 'AgeBin', 'FeeBin', 'PhotoAmtBin', 'PolarityBin',
       'SubjectivityBin', 'DescwordsBin', 'Adopted', 'AdoptionSpeed',
       'InitialAdoptionSpeed'],
      dtype='object')

In [6]:
PetFinder_dataset.isnull().sum()

Type                    0
Age                     0
Gender                  0
MaturitySize            0
FurLength               0
Vaccinated              0
Dewormed                0
Sterilized              0
Health                  0
Quantity                0
Fee                     0
State                   0
PhotoAmt                0
Hasname                 0
Breed                   0
Color                   0
Polarity                0
Subjectivity            0
DescWords               0
AgeBin                  0
FeeBin                  0
PhotoAmtBin             0
PolarityBin             0
SubjectivityBin         0
DescwordsBin            0
Adopted                 0
AdoptionSpeed           0
InitialAdoptionSpeed    0
dtype: int64

In [7]:
PetFinder_dataset.shape
one_hot = pd.get_dummies(PetFinder_dataset)
one_hot

Unnamed: 0,Age,Quantity,Fee,PhotoAmt,Hasname,Polarity,Subjectivity,DescWords,Adopted,InitialAdoptionSpeed,...,"DescwordsBin_(25.0, 34.0]","DescwordsBin_(34.0, 44.0]","DescwordsBin_(44.0, 56.0]","DescwordsBin_(56.0, 71.0]","DescwordsBin_(71.0, 93.0]","DescwordsBin_(9.0, 17.0]","DescwordsBin_(93.0, 135.0]",AdoptionSpeed_delayed adoption,AdoptionSpeed_instant adoption,AdoptionSpeed_very late adoption
0,3,1,100,1.0,1,0.130357,0.407143,69,True,2,...,0,0,0,1,0,0,0,0,1,0
1,1,1,0,2.0,0,0.100000,0.400000,23,True,0,...,0,0,0,0,0,0,0,0,1,0
2,1,1,0,7.0,1,0.261905,0.542857,69,True,3,...,0,0,0,1,0,0,0,1,0,0
3,4,1,150,8.0,1,0.341146,0.437500,25,True,2,...,0,0,0,0,0,0,0,0,1,0
4,1,1,0,3.0,1,0.400000,0.743750,81,True,2,...,0,0,0,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12982,1,1,0,1.0,1,0.150000,0.358333,81,True,3,...,0,0,0,0,1,0,0,1,0,0
12983,6,1,0,1.0,1,0.000000,0.000000,1,True,0,...,0,0,0,0,0,0,0,0,1,0
12984,60,2,0,3.0,1,0.166667,0.287500,31,False,4,...,1,0,0,0,0,0,0,0,0,1
12985,9,1,0,3.0,1,0.490000,0.650000,18,False,4,...,0,0,0,0,0,0,0,0,0,1


# Binary Classification Problem
Only categorical features will be used in the association rule mining

In [8]:
one_hot = pd.get_dummies(PetFinder_dataset.drop(['Age','Quantity','Fee','PhotoAmt','Polarity','Subjectivity','DescWords','AdoptionSpeed','InitialAdoptionSpeed'],axis=1))

In [9]:
one_hot_Adopted=one_hot[one_hot['Adopted']].drop(['Adopted'],axis=1)
one_hot_NAdopted=one_hot[~one_hot['Adopted']].drop(['Adopted'],axis=1)


## Finding frequent itemsets in the adopted pets

In [10]:
frq_itmsets_A=apriori(one_hot_Adopted,min_support=0.50,use_colnames=True)
frq_itmsets_A['length']= frq_itmsets_A['itemsets'].apply(lambda x: len(x))
frq_itmsets_A['itemsetsT']= frq_itmsets_A['itemsets'].apply(lambda x: ','.join(list(x))).astype("unicode")
print(frq_itmsets_A[frq_itmsets_A['length']>3]['itemsets'].values)

[frozenset({'Hasname', 'MaturitySize_Medium', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Sterilized_No', 'Hasname', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})]


## Finding frequent itemsets in the not adopted pets

In [11]:
frq_itmsets_NA=apriori(one_hot_NAdopted,min_support=0.50,use_colnames=True)
frq_itmsets_NA['length']= frq_itmsets_NA['itemsets'].apply(lambda x: len(x))
frq_itmsets_NA['itemsetsT']= frq_itmsets_NA['itemsets'].apply(lambda x: ','.join(list(x))).astype("unicode")
print(frq_itmsets_NA[frq_itmsets_NA['length']>3]['itemsets'].values)

[frozenset({'Hasname', 'MaturitySize_Medium', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})]


## Finding association rules in the dataset

In [12]:
frq_itmsets=apriori(one_hot,min_support=0.25,use_colnames=True)
rules=association_rules(frq_itmsets,metric="confidence",min_threshold=0.8)
rules['length']= rules['antecedents'].apply(lambda x: len(x))
rules['consequentsT']= rules['consequents'].apply(lambda x: ','.join(list(x))).astype("unicode")
print(rules[(rules['length']>3) & (rules['consequentsT'].eq('Adopted'))]['antecedents'].values)

[frozenset({'Sterilized_No', 'Hasname', 'MaturitySize_Medium', 'Health_Healthy'})
 frozenset({'Sterilized_No', 'Hasname', 'MaturitySize_Medium', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Hasname', 'MaturitySize_Medium', 'Health_Healthy', 'AgeBin_(-0.1, 3.0]'})
 frozenset({'Sterilized_No', 'Health_Healthy', 'Hasname', 'Vaccinated_No'})
 frozenset({'Sterilized_No', 'Dewormed_Yes', 'Hasname', 'Health_Healthy'})
 frozenset({'Sterilized_No', 'State_Selangor', 'Hasname', 'Health_Healthy'})
 frozenset({'Sterilized_No', 'Hasname', 'Health_Healthy', 'AgeBin_(-0.1, 3.0]'})
 frozenset({'Sterilized_No', 'Hasname', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Sterilized_No', 'Hasname', 'AgeBin_(-0.1, 3.0]', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Hasname', 'Health_Healthy', 'AgeBin_(-0.1, 3.0]', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Sterilized_No', 'MaturitySize_Medium', 'Health_Healthy', 'AgeBin_(-0.1, 3.0]'})
 frozenset({'Sterilized_No', 'MaturitySize_Medium', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'}

In [13]:
rules[(rules['length']>3) & (rules['consequentsT'].eq('Adopted'))]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,length,consequentsT
1369,"(Sterilized_No, Hasname, MaturitySize_Medium, ...",(Adopted),0.412874,0.729884,0.33418,0.809399,1.108943,0.03283,1.417185,4,Adopted
1373,"(Sterilized_No, Hasname, MaturitySize_Medium, ...",(Adopted),0.35651,0.729884,0.289289,0.811447,1.111748,0.029078,1.432575,4,Adopted
1383,"(Hasname, MaturitySize_Medium, Health_Healthy,...",(Adopted),0.309771,0.729884,0.265573,0.85732,1.174599,0.039476,1.893167,4,Adopted
1401,"(Sterilized_No, Health_Healthy, Hasname, Vacci...",(Adopted),0.339955,0.729884,0.27797,0.817667,1.12027,0.029842,1.481445,4,Adopted
1420,"(Sterilized_No, Dewormed_Yes, Hasname, Health_...",(Adopted),0.326942,0.729884,0.264495,0.808997,1.108391,0.025865,1.414197,4,Adopted
1428,"(Sterilized_No, State_Selangor, Hasname, Healt...",(Adopted),0.3465,0.729884,0.286132,0.825778,1.131383,0.033227,1.550412,4,Adopted
1433,"(Sterilized_No, Hasname, Health_Healthy, AgeBi...",(Adopted),0.373835,0.729884,0.325017,0.869413,1.191166,0.052161,2.068477,4,Adopted
1441,"(Sterilized_No, Hasname, Health_Healthy, FeeBi...",(Adopted),0.480095,0.729884,0.388157,0.8085,1.107711,0.037744,1.410532,4,Adopted
1447,"(Sterilized_No, Hasname, AgeBin_(-0.1, 3.0], F...",(Adopted),0.328097,0.729884,0.285131,0.869045,1.190662,0.045658,2.062662,4,Adopted
1461,"(Hasname, Health_Healthy, AgeBin_(-0.1, 3.0], ...",(Adopted),0.364441,0.729884,0.310541,0.852102,1.167449,0.044541,1.826372,4,Adopted


# Multiclass Classification Problem
Only categorical features will be used in the association rule mining

In [14]:
one_hot = pd.get_dummies(PetFinder_dataset.drop(['Age','Quantity','Fee','PhotoAmt','Polarity','Subjectivity','DescWords','Adopted','InitialAdoptionSpeed'],axis=1))

In [15]:
one_hot_IA=one_hot[one_hot['AdoptionSpeed_instant adoption']==1].drop(['AdoptionSpeed_instant adoption','AdoptionSpeed_delayed adoption','AdoptionSpeed_very late adoption'],axis=1)
one_hot_DA=one_hot[one_hot['AdoptionSpeed_delayed adoption']==1].drop(['AdoptionSpeed_instant adoption','AdoptionSpeed_delayed adoption','AdoptionSpeed_very late adoption'],axis=1)
one_hot_LA=one_hot[one_hot['AdoptionSpeed_very late adoption']==1].drop(['AdoptionSpeed_instant adoption','AdoptionSpeed_delayed adoption','AdoptionSpeed_very late adoption'],axis=1)

In [16]:
one_hot

Unnamed: 0,Hasname,Type_Cat,Type_Dog,Gender_Female,Gender_Male,Gender_Mixed,MaturitySize_Extra Large,MaturitySize_Large,MaturitySize_Medium,MaturitySize_Small,...,"DescwordsBin_(25.0, 34.0]","DescwordsBin_(34.0, 44.0]","DescwordsBin_(44.0, 56.0]","DescwordsBin_(56.0, 71.0]","DescwordsBin_(71.0, 93.0]","DescwordsBin_(9.0, 17.0]","DescwordsBin_(93.0, 135.0]",AdoptionSpeed_delayed adoption,AdoptionSpeed_instant adoption,AdoptionSpeed_very late adoption
0,1,1,0,0,1,0,0,0,0,1,...,0,0,0,1,0,0,0,0,1,0
1,0,1,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
2,1,0,1,0,1,0,0,0,1,0,...,0,0,0,1,0,0,0,1,0,0
3,1,0,1,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
4,1,0,1,0,1,0,0,0,1,0,...,0,0,0,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12982,1,1,0,1,0,0,0,0,1,0,...,0,0,0,0,1,0,0,1,0,0
12983,1,0,1,1,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
12984,1,1,0,0,0,1,0,0,1,0,...,1,0,0,0,0,0,0,0,0,1
12985,1,1,0,1,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1


## Finding frequent itemsets in instantly adopted pets

In [17]:
frq_itmsets_IA=apriori(one_hot_IA,min_support=0.50,use_colnames=True)
frq_itmsets_IA['length']= frq_itmsets_IA['itemsets'].apply(lambda x: len(x))
frq_itmsets_IA['itemsetsT']= frq_itmsets_IA['itemsets'].apply(lambda x: ','.join(list(x))).astype("unicode")
print(frq_itmsets_IA[frq_itmsets_IA['length']>2]['itemsets'].values)

[frozenset({'Hasname', 'MaturitySize_Medium', 'Health_Healthy'})
 frozenset({'Hasname', 'MaturitySize_Medium', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Dewormed_Yes', 'Hasname', 'Health_Healthy'})
 frozenset({'Sterilized_No', 'Hasname', 'Health_Healthy'})
 frozenset({'Sterilized_No', 'Hasname', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'State_Selangor', 'Hasname', 'Health_Healthy'})
 frozenset({'Hasname', 'Health_Healthy', 'AgeBin_(-0.1, 3.0]'})
 frozenset({'Hasname', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Sterilized_No', 'MaturitySize_Medium', 'Health_Healthy'})
 frozenset({'MaturitySize_Medium', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Sterilized_No', 'Health_Healthy', 'AgeBin_(-0.1, 3.0]'})
 frozenset({'Sterilized_No', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Sterilized_No', 'Hasname', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})]


## Finding frequent itemsets in delayed adoption pets

In [18]:
frq_itmsets_DA=apriori(one_hot_DA,min_support=0.50,use_colnames=True)
frq_itmsets_DA['length']= frq_itmsets_DA['itemsets'].apply(lambda x: len(x))
frq_itmsets_DA['itemsetsT']= frq_itmsets_DA['itemsets'].apply(lambda x: ','.join(list(x))).astype("unicode")
print(frq_itmsets_DA[frq_itmsets_DA['length']>2]['itemsets'].values)

[frozenset({'Hasname', 'Health_Healthy', 'Type_Dog'})
 frozenset({'Hasname', 'Gender_Female', 'Health_Healthy'})
 frozenset({'Hasname', 'MaturitySize_Medium', 'Health_Healthy'})
 frozenset({'Hasname', 'MaturitySize_Medium', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'FurLength_Short', 'Hasname', 'Health_Healthy'})
 frozenset({'Dewormed_Yes', 'Hasname', 'Health_Healthy'})
 frozenset({'Dewormed_Yes', 'Hasname', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Sterilized_No', 'Hasname', 'Health_Healthy'})
 frozenset({'Sterilized_No', 'Hasname', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'State_Selangor', 'Hasname', 'Health_Healthy'})
 frozenset({'Health_Healthy', 'Hasname', 'Breed_Mixed Breed'})
 frozenset({'Hasname', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Health_Healthy', 'Type_Dog', 'Breed_Mixed Breed'})
 frozenset({'Type_Dog', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Sterilized_No', 'MaturitySize_Medium', 'Health_Healthy'})
 frozenset({'MaturitySize_Medium', 'Health_Healthy', 'FeeBin_(-

## Finding frequent itemsets in the not adopted pets

In [19]:
frq_itmsets_LA=apriori(one_hot_LA,min_support=0.50,use_colnames=True)
frq_itmsets_LA['length']= frq_itmsets_LA['itemsets'].apply(lambda x: len(x))
frq_itmsets_LA['itemsetsT']= frq_itmsets_LA['itemsets'].apply(lambda x: ','.join(list(x))).astype("unicode")
print(frq_itmsets_LA[frq_itmsets_LA['length']>2]['itemsets'].values)

[frozenset({'Hasname', 'Health_Healthy', 'Type_Dog'})
 frozenset({'Hasname', 'MaturitySize_Medium', 'Health_Healthy'})
 frozenset({'Hasname', 'MaturitySize_Medium', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'FurLength_Short', 'Hasname', 'Health_Healthy'})
 frozenset({'Dewormed_Yes', 'Hasname', 'Health_Healthy'})
 frozenset({'Health_Healthy', 'Hasname', 'Breed_Mixed Breed'})
 frozenset({'Hasname', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Hasname', 'Health_Healthy', 'PhotoAmtBin_(0.99, 3.99]'})
 frozenset({'Health_Healthy', 'Type_Dog', 'Breed_Mixed Breed'})
 frozenset({'MaturitySize_Medium', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Hasname', 'MaturitySize_Medium', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})]


## Finding association rules in the dataset

In [20]:
frq_itmsets=apriori(one_hot,min_support=0.1,use_colnames=True)
rules=association_rules(frq_itmsets,metric="confidence",min_threshold=0.15)
rules['length']= rules['antecedents'].apply(lambda x: len(x))
rules['consequentsT']= rules['consequents'].apply(lambda x: ','.join(list(x))).astype("unicode")
rules['length']= rules['antecedents'].apply(lambda x: len(x))

Instantly adopted pets

In [21]:
print(rules[(rules['length']>6) & (rules['confidence']>0.6) & (rules['consequentsT'].eq('AdoptionSpeed_instant adoption'))]['antecedents'].values)

[frozenset({'Sterilized_No', 'FeeBin_(-0.1, 0.0]', 'Hasname', 'Vaccinated_No', 'Health_Healthy', 'MaturitySize_Medium', 'AgeBin_(-0.1, 3.0]'})]


In [22]:
rules[(rules['length']>6) & (rules['confidence']>0.6) & (rules['consequentsT'].eq('AdoptionSpeed_instant adoption'))]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,length,consequentsT
298324,"(Sterilized_No, FeeBin_(-0.1, 0.0], Hasname, V...",(AdoptionSpeed_instant adoption),0.154616,0.509741,0.101948,0.659363,1.293526,0.023134,1.439241,7,AdoptionSpeed_instant adoption


Delayed adoption pets

In [23]:
print(rules[(rules['length']>3) & (rules['confidence']>0.1) & (rules['consequentsT'].eq('AdoptionSpeed_delayed adoption'))]['antecedents'].values)

[frozenset({'Breed_Mixed Breed', 'Hasname', 'Health_Healthy', 'Type_Dog'})
 frozenset({'Hasname', 'Health_Healthy', 'Type_Dog', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Sterilized_No', 'Hasname', 'MaturitySize_Medium', 'Health_Healthy'})
 frozenset({'Hasname', 'MaturitySize_Medium', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'FurLength_Short', 'Hasname', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Dewormed_Yes', 'Hasname', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Sterilized_No', 'Hasname', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'State_Selangor', 'Hasname', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Breed_Mixed Breed', 'Hasname', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})
 frozenset({'Breed_Mixed Breed', 'Type_Dog', 'Health_Healthy', 'FeeBin_(-0.1, 0.0]'})]


In [24]:
rules[(rules['length']>3) & (rules['consequentsT'].str.contains('AdoptionSpeed_delayed adoption'))]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,length,consequentsT
65383,"(Breed_Mixed Breed, Hasname, Health_Healthy, T...",(AdoptionSpeed_delayed adoption),0.41272,0.220143,0.10472,0.253731,1.152574,0.013863,1.045008,4,AdoptionSpeed_delayed adoption
65720,"(Hasname, Health_Healthy, Type_Dog, FeeBin_(-0...",(AdoptionSpeed_delayed adoption),0.409948,0.220143,0.102333,0.249624,1.133918,0.012086,1.039288,4,AdoptionSpeed_delayed adoption
80360,"(Sterilized_No, Hasname, MaturitySize_Medium, ...",(AdoptionSpeed_delayed adoption),0.412874,0.220143,0.101101,0.244871,1.112327,0.01021,1.032747,4,AdoptionSpeed_delayed adoption
82137,"(Hasname, MaturitySize_Medium, Health_Healthy,...",(AdoptionSpeed_delayed adoption),0.518057,0.220143,0.124355,0.240042,1.090388,0.010308,1.026184,4,AdoptionSpeed_delayed adoption
87639,"(FurLength_Short, Hasname, Health_Healthy, Fee...",(AdoptionSpeed_delayed adoption),0.439054,0.220143,0.104566,0.238162,1.08185,0.007911,1.023652,4,AdoptionSpeed_delayed adoption
94349,"(Dewormed_Yes, Hasname, Health_Healthy, FeeBin...",(AdoptionSpeed_delayed adoption),0.438977,0.220143,0.107415,0.244694,1.111521,0.010777,1.032504,4,AdoptionSpeed_delayed adoption
95694,"(Sterilized_No, Hasname, Health_Healthy, FeeBi...",(AdoptionSpeed_delayed adoption),0.480095,0.220143,0.111804,0.232879,1.057852,0.006114,1.016602,4,AdoptionSpeed_delayed adoption
97448,"(State_Selangor, Hasname, Health_Healthy, FeeB...",(AdoptionSpeed_delayed adoption),0.426503,0.220143,0.102179,0.239574,1.088264,0.008287,1.025552,4,AdoptionSpeed_delayed adoption
97944,"(Breed_Mixed Breed, Hasname, Health_Healthy, F...",(AdoptionSpeed_delayed adoption),0.40271,0.220143,0.100331,0.24914,1.131716,0.011677,1.038618,4,AdoptionSpeed_delayed adoption
114995,"(Breed_Mixed Breed, Type_Dog, Health_Healthy, ...",(AdoptionSpeed_delayed adoption),0.398552,0.220143,0.10087,0.253091,1.149666,0.013131,1.044112,4,AdoptionSpeed_delayed adoption


Very late adoption pets

In [25]:
print(rules[(rules['length']>4) & (rules['confidence']>0.1) & (rules['consequentsT'].eq('AdoptionSpeed_very late adoption'))]['antecedents'].values)

[frozenset({'Hasname', 'Health_Healthy', 'Breed_Mixed Breed', 'Type_Dog', 'MaturitySize_Medium'})
 frozenset({'FeeBin_(-0.1, 0.0]', 'Hasname', 'Health_Healthy', 'Breed_Mixed Breed', 'Type_Dog'})]


In [26]:
rules[(rules['length']>4) & (rules['consequentsT'].eq('AdoptionSpeed_very late adoption'))]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,length,consequentsT
157349,"(Hasname, Health_Healthy, Breed_Mixed Breed, T...",(AdoptionSpeed_very late adoption),0.340263,0.270116,0.101255,0.297579,1.101669,0.009344,1.039097,5,AdoptionSpeed_very late adoption
167442,"(FeeBin_(-0.1, 0.0], Hasname, Health_Healthy, ...",(AdoptionSpeed_very late adoption),0.35959,0.270116,0.107261,0.298287,1.104291,0.01013,1.040146,5,AdoptionSpeed_very late adoption


## 2.3 Association Rules - Results and Discussion 

In both Binary classification and Multiclass classification taks, we followed two distinct approaches:
- Find frequent itemsets within uniformely classified subsets of the overall dataset
- Find frequent itemsets and association rules within the overall dataset
For the binary classification problem, the minimum support for the frequent itemsets was set to 50% and we filtered in just the frequent itemsets with minimum length 3.
As for the association rule mining, we have only considered frequent itemsets with support greater than 25% as the bassis for the generating association rules. For the rules themselves we have set a minimum confidence 0.8 na drule length 3.
The association rule mining for the binary classification gave us two rules with length 5. Those are the most significant rules.

As for the Multiclass problem, we had to lower the bar in terms of minimum support for the frequent itemsets as well as for the minimum confidence to find rules.

...