In [11]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [12]:
data = pd.DataFrame({
    'age': [25, 30, 35, 40, 45, 50, 55, 60, 65],
    'income': [30000, 35000, 40000, 45000, 50000, 55000, 60000, 650000, 70000],
    'product_1': [1, 1, 0, 1, 0, 1, 1, 1, 0],
    'product_2': [1, 0, 1, 0, 1, 0, 1, 1, 1],
    'product_3': [0, 1, 1, 1, 1, 0, 1, 1, 0],
    'purchased': [1, 0, 1, 1, 0, 1, 1, 0, 1]
})

print(data)

   age  income  product_1  product_2  product_3  purchased
0   25   30000          1          1          0          1
1   30   35000          1          0          1          0
2   35   40000          0          1          1          1
3   40   45000          1          0          1          1
4   45   50000          0          1          1          0
5   50   55000          1          0          0          1
6   55   60000          1          1          1          1
7   60  650000          1          1          1          0
8   65   70000          0          1          0          1


In [13]:
# Select columns related to product purchases for frequent pattern mining
product_data = data[['product_1', 'product_2', 'product_3']]

In [14]:
# Apply Apriori algorithms to 
frequent_itemsets = apriori(product_data, min_support=0.4, use_colnames=True)

# Display frequent itemsets
print('Frequent Itemsets:')
print(frequent_itemsets)

Frequent Itemsets:
    support                itemsets
0  0.666667             (product_1)
1  0.666667             (product_2)
2  0.666667             (product_3)
3  0.444444  (product_1, product_3)
4  0.444444  (product_2, product_3)




In [15]:
rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1.0)

# Display the rules
print('Association Rules:')
print(rules)

Association Rules:
   antecedents  consequents  antecedent support  ...  jaccard  certainty  kulczynski
0  (product_1)  (product_3)            0.666667  ...      0.5        0.0    0.666667
1  (product_3)  (product_1)            0.666667  ...      0.5        0.0    0.666667
2  (product_2)  (product_3)            0.666667  ...      0.5        0.0    0.666667
3  (product_3)  (product_2)            0.666667  ...      0.5        0.0    0.666667

[4 rows x 14 columns]


In [16]:
def classify_transaction(transaction, rules):
    for _, rule in rules.iterrows():
        antecedents = set(rule['antecedents'])
        consequents = set(rule['consequents'])

        # Check if the antecedents (items bought) are in the transaction
        if antecedents.issubset(transaction) and consequents.issubset(transaction):
            return 1 # Predict purchased(1)
    return 0

# Apply classification on each customer (row) based on products bought
predictions = []
for _, row in data.iterrows():
    transaction = set(row[['product_1', 'product_2', 'product_3']].values)
    prediction = classify_transaction(transaction, rules)
    predictions.append(prediction)

# Evaluate accuracy
accuracy = accuracy_score(data['purchased'], predictions)
print("Classification Accuracy:", accuracy)


Classification Accuracy: 0.3333333333333333
