In [1]:
#%pip install apyori

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder 
from apyori import apriori

In [3]:
#Read csv data
census = pd.read_csv("Census.csv")
census1 = census.copy()
census1.head()

Unnamed: 0,Person ID,Region,Residence Type,Family Composition,Population Base,Sex,Age,Marital Status,Student,Country of Birth,Health,Ethnic Group,Religion,Economic Activity,Occupation,Industry,Hours worked per week,No of hours,Approximated Social Grade
0,7394816,E12000001,H,2,1,2,6,2,2,1,2,1,2,5,8,2,-9,,4
1,7394832,E12000001,H,3,1,2,1,1,2,1,2,1,1,-9,-9,-9,-9,,-9
2,7394719,E12000001,H,2,1,1,7,2,2,1,1,1,2,5,8,2,-9,,2
3,7394840,E12000001,H,1,1,2,6,4,2,1,3,1,2,5,9,5,-9,,4
4,7394711,E12000001,H,2,1,1,1,1,1,1,1,1,1,-9,-9,-9,-9,,-9


In [4]:
region_mapping = {
    'E12000001': 'North East',
    'E12000002': 'North West',
    'E12000003': 'Yorkshire and the Humber',
    'E12000004': 'East Midlands',
    'E12000005': 'West Midlands',
    'E12000006': 'East of England',
    'E12000007': 'London',
    'E12000008': 'South East',
    'E12000009': 'South West',
    'W92000004': 'Wales'
}
sex_mapping = {
    1: 'Male',
    2: 'Female'
}

age_mapping = {
    1: '0 to 15',
    2: '16 to 24',
    3: '25 to 34',
    4: '35 to 44',
    5: '45 to 54',
    6: '55 to 64',
    7: '65 to 74',
    8: '75 and over'
}

religion_mapping = {
    1: 'No religion',
    2: 'Christian',
    3: 'Buddhist',
    4: 'Hindu',
    5: 'Jewish',
    6: 'Muslim',
    7: 'Sikh',
    8: 'Other religion',
    9: 'Not stated',
    -9: 'No code required'
}

eg_mapping = {
    1: 'White',
    2: 'Mixed',
    3: 'Asian and Asian British',
    4: 'Black or Black British',
    5: 'Chinese or Other ethnic group',
    -9: 'No code required'
}

health_mapping = {
    1: 'Very good health',
    2: 'Good health',
    3: 'Fair health',
    4: 'Bad health',
    5: 'Very bad health',
    -9: 'No code required'
}

census1['Ethnic Group'] = census1['Ethnic Group'].replace(eg_mapping)
census1['Religion'] = census1['Religion'].replace(religion_mapping)
census1['Age'] = census1['Age'].replace(age_mapping)
census1['Sex'] = census1['Sex'].replace(sex_mapping)
census1['Region'] = census1['Region'].replace(region_mapping)
census1['Health'] = census1['Health'].replace(health_mapping)

In [5]:
census1.head()

Unnamed: 0,Person ID,Region,Residence Type,Family Composition,Population Base,Sex,Age,Marital Status,Student,Country of Birth,Health,Ethnic Group,Religion,Economic Activity,Occupation,Industry,Hours worked per week,No of hours,Approximated Social Grade
0,7394816,North East,H,2,1,Female,55 to 64,2,2,1,Good health,White,Christian,5,8,2,-9,,4
1,7394832,North East,H,3,1,Female,0 to 15,1,2,1,Good health,White,No religion,-9,-9,-9,-9,,-9
2,7394719,North East,H,2,1,Male,65 to 74,2,2,1,Very good health,White,Christian,5,8,2,-9,,2
3,7394840,North East,H,1,1,Female,55 to 64,4,2,1,Fair health,White,Christian,5,9,5,-9,,4
4,7394711,North East,H,2,1,Male,0 to 15,1,1,1,Very good health,White,No religion,-9,-9,-9,-9,,-9


In [6]:
census1.keys()

Index(['Person ID', 'Region', 'Residence Type', 'Family Composition',
       'Population Base', 'Sex', 'Age', 'Marital Status', 'Student',
       'Country of Birth', 'Health', 'Ethnic Group', 'Religion',
       'Economic Activity', 'Occupation', 'Industry', 'Hours worked per week',
       'No of hours', 'Approximated Social Grade'],
      dtype='object')

In [7]:
# Select the columns for analysis
selected_columns = ['Sex', 'Ethnic Group', 'Age', 'Region', 'Religion', 'Health']

# Create a list of transactions
transactions = []
for _, row in census1[selected_columns].iterrows():
    transactions.append([str(value) for value in row])

In [8]:
print(transactions[:5])

[['Female', 'White', '55 to 64', 'North East', 'Christian', 'Good health'], ['Female', 'White', '0 to 15', 'North East', 'No religion', 'Good health'], ['Male', 'White', '65 to 74', 'North East', 'Christian', 'Very good health'], ['Female', 'White', '55 to 64', 'North East', 'Christian', 'Fair health'], ['Male', 'White', '0 to 15', 'North East', 'No religion', 'Very good health']]


In [9]:
# Set the minimum support, confidence, and lift values
min_support = 0.1
min_confidence = 0.1
min_lift = 1.0

# Run the Apriori algorithm
association_rules = apriori(transactions, min_support=min_support, min_confidence=min_confidence, min_lift=min_lift)

# Convert the results to a list for easier interpretation
association_results = list(association_rules)


In [10]:
print(association_results[:5])

[RelationRecord(items=frozenset({'0 to 15'}), support=0.1875100923228139, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'0 to 15'}), confidence=0.1875100923228139, lift=1.0)]), RelationRecord(items=frozenset({'16 to 24'}), support=0.1277512549584021, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'16 to 24'}), confidence=0.1277512549584021, lift=1.0)]), RelationRecord(items=frozenset({'25 to 34'}), support=0.13330291009934356, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'25 to 34'}), confidence=0.13330291009934356, lift=1.0)]), RelationRecord(items=frozenset({'35 to 44'}), support=0.1380296275494085, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'35 to 44'}), confidence=0.1380296275494085, lift=1.0)]), RelationRecord(items=frozenset({'45 to 54'}), support=0.13583037876926318, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_a

In [11]:
df = pd.DataFrame(columns=('Items','Antecedent','Consequent','Support','Confidence','Lift'))

Support =[]
Confidence = []
Lift = []
Items = []
Antecedent = []
Consequent=[]

for RelationRecord in association_results:
    for ordered_stat in RelationRecord.ordered_statistics:
        Support.append(RelationRecord.support)
        Items.append(RelationRecord.items)
        Antecedent.append(ordered_stat.items_base)
        Consequent.append(ordered_stat.items_add)
        Confidence.append(ordered_stat.confidence)
        Lift.append(ordered_stat.lift)

df['Items'] = list(map(set, Items))                                   
df['Antecedent'] = list(map(set, Antecedent))
df['Consequent'] = list(map(set, Consequent))
df['Support'] = Support
df['Confidence'] = Confidence
df['Lift']= Lift

#Sort resulted dataframe by Lift
df.sort_values(by ='Lift', ascending = False, inplace = True)


In [12]:
df.reset_index(drop=True, inplace=True)

In [13]:
df.head()

Unnamed: 0,Items,Antecedent,Consequent,Support,Confidence,Lift
0,"{0 to 15, Very good health, White}",{Very good health},"{0 to 15, White}",0.11815,0.254047,1.731926
1,"{0 to 15, Very good health, White}","{0 to 15, White}",{Very good health},0.11815,0.805473,1.731926
2,"{0 to 15, Very good health}",{Very good health},{0 to 15},0.146698,0.315431,1.682207
3,"{0 to 15, Very good health}",{0 to 15},{Very good health},0.146698,0.78235,1.682207
4,"{0 to 15, Very good health, White}","{Very good health, White}",{0 to 15},0.11815,0.302167,1.611472


In [14]:
df.shape

(184, 6)

In [15]:
above_1 = df[df["Lift"] > 1]

In [16]:
above_1.shape

(122, 6)

In [17]:
above_1.head(20)

Unnamed: 0,Items,Antecedent,Consequent,Support,Confidence,Lift
0,"{0 to 15, Very good health, White}",{Very good health},"{0 to 15, White}",0.11815,0.254047,1.731926
1,"{0 to 15, Very good health, White}","{0 to 15, White}",{Very good health},0.11815,0.805473,1.731926
2,"{0 to 15, Very good health}",{Very good health},{0 to 15},0.146698,0.315431,1.682207
3,"{0 to 15, Very good health}",{0 to 15},{Very good health},0.146698,0.78235,1.682207
4,"{0 to 15, Very good health, White}","{Very good health, White}",{0 to 15},0.11815,0.302167,1.611472
5,"{0 to 15, Very good health, White}",{0 to 15},"{Very good health, White}",0.11815,0.630101,1.611472
6,"{No religion, Very good health, White}",{No religion},"{Very good health, White}",0.123976,0.498623,1.275219
7,"{No religion, Very good health, White}","{Very good health, White}",{No religion},0.123976,0.317066,1.275219
8,"{No religion, White, Male}",{No religion},"{White, Male}",0.127679,0.513518,1.233549
9,"{No religion, White, Male}","{White, Male}",{No religion},0.127679,0.306705,1.233549


In [18]:
confidence_80 = above_1[above_1['Confidence'] > 0.8]

In [19]:
confidence_80.head(20)

Unnamed: 0,Items,Antecedent,Consequent,Support,Confidence,Lift
1,"{0 to 15, Very good health, White}","{0 to 15, White}",{Very good health},0.11815,0.805473,1.731926
29,"{Good health, Christian, White}","{Good health, Christian}",{White},0.188447,0.934788,1.101575
31,"{No religion, White, Male}","{No religion, Male}",{White},0.127679,0.934436,1.10116
32,"{Good health, Christian, White, Female}","{Good health, Christian, Female}",{White},0.102341,0.933257,1.09977
34,"{No religion, White}",{No religion},{White},0.231781,0.93221,1.098537
37,"{55 to 64, White}",{55 to 64},{White},0.10727,0.930724,1.096786
38,"{No religion, White, Female}","{No religion, Female}",{White},0.104102,0.929494,1.095336
41,"{No religion, Very good health, White}","{No religion, Very good health}",{White},0.123976,0.928894,1.094629
45,"{Christian, White, Female}","{Christian, Female}",{White},0.291552,0.927113,1.092531
46,"{Christian, White}",{Christian},{White},0.542453,0.92676,1.092115
