Association Rule Mining - Analyse: Medium Customers - Age group 45-54

@author: Kessor Chao

Used guides: https://stackabuse.com/association-rule-mining-via-apriori-algorithm-in-python/ 

Read-me: ReadMe_ARM.txt

Last edited: December 2022

In [1]:
# importing necessary libraries

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from apyori import apriori
from collections import defaultdict

In [None]:
# importing and merging the data

left = pd.read_csv('KeyData.csv', sep=';', nrows=2000000).astype({"Item": str, "Real Brand": str})
right = pd.read_csv('RFM.csv')

store_data = pd.merge(left, right, on="KeyNo")

In [3]:
# Cleaning data

store_data = store_data.loc[store_data['Real Brand'] != 'XXXX - Service Center'] 
store_data = store_data.loc[store_data['Real Brand'] != 'nan']
store_data = store_data.loc[store_data['Real Brand'] != 'XXXXXX-PACKAGE']
store_data = store_data.loc[store_data['Keycard Age'] >= 45] 
store_data = store_data.loc[store_data['Keycard Age'] <= 54] 
store_data = store_data.loc[store_data['Keycard Gender'] == 'F']
store_data = store_data.loc[store_data['Customer_segment'] == 'Medium Value Customer']

In [None]:
# View data after cleaning

store_data.head()

In [5]:
# Group data by KeyNo and data

store_data = store_data.groupby(['KeyNo','Date'])

In [None]:
# View data after grouping

store_data.count()

In [7]:
# Setting values that we want to associate

keys = ['Real Brand','Department']

In [8]:
# Creating lists based on values.

aggregates = {}
for k in keys:
    aggregates[k] = store_data.agg({k: lambda x: list(x)}).values.tolist()
    for i, x in enumerate(aggregates[k]):
        aggregates[k][i] = x[0]

In [19]:
# Creating associations with apriori and setting minimum values

association_results = {}
for k in keys:
    rules = apriori(aggregates[k], min_support=0.0015, min_confidence=0.2, min_lift=3, min_length=2)
    association_results[k] = pd.DataFrame(rules)

In [22]:
# Finding association results and adding them to the dataset

ars = {}
for k in keys:
    ars[k] = association_results[k].copy(deep=True)
    ars[k][['Ascendant', 'Consequent']] = pd.DataFrame(ars[k]['items'].apply(lambda item: list(item)[0:2]).tolist()) 
    ars[k][['Confidence', 'Lift']] = pd.DataFrame(ars[k]['ordered_statistics'].apply(lambda item: [item[0][2], item[0][3]]).tolist())
    ars[k].drop(['items', 'ordered_statistics'], inplace=True, axis=1)

In [23]:
#Store data into csv files based on values

for k in keys:
    ars[k].to_csv(f'ARM_MediumValue-45-54{k}.csv')