                Single support

In [1]:
# Step 1: Import Required Libraries
import numpy as np
import pandas as pd
from apyori import apriori

In [2]:
# Define the dataset
dataset = [
    ['Student', 'Teach', 'School', 'Education'],
    ['Student', 'School', 'Education'],
    ['Teach', 'School', 'City', 'Game', 'Education'],
    ['Baseball', 'Basketball', 'Sport'],
    ['Basketball', 'Player', 'Spectator', 'Sport'],
    ['Baseball', 'Coach', 'Game', 'Team', 'Sport'],
    ['Basketball', 'Team', 'City', 'Game', 'Sport']
]

In [3]:
# Define class labels
class_labels = {'Education', 'Sport'}

In [4]:
# Run Apriori algorithm
rules = apriori(dataset, min_support=0.2, min_confidence=0.6, min_lift=1, min_length=2)

In [5]:
# Display results
results = list(rules)

for rule in results:
    items = set(rule.items)
    for ordered_stat in rule.ordered_statistics:
        base = set(ordered_stat.items_base)
        add = set(ordered_stat.items_add)

        # Ensure the RHS contains only one item and it is a class label
        if len(add) == 1 and next(iter(add)) in class_labels:
            print(f"Rule: {base} → {add}")
            print(f"Support: {rule.support:.2f}")
            print(f"Confidence: {ordered_stat.confidence:.2f}")
            print(f"Lift: {ordered_stat.lift:.2f}")
            print("-" * 40)

Rule: {'Baseball'} → {'Sport'}
Support: 0.29
Confidence: 1.00
Lift: 1.75
----------------------------------------
Rule: {'Basketball'} → {'Sport'}
Support: 0.43
Confidence: 1.00
Lift: 1.75
----------------------------------------
Rule: {'School'} → {'Education'}
Support: 0.43
Confidence: 1.00
Lift: 2.33
----------------------------------------
Rule: {'Student'} → {'Education'}
Support: 0.29
Confidence: 1.00
Lift: 2.33
----------------------------------------
Rule: {'Teach'} → {'Education'}
Support: 0.29
Confidence: 1.00
Lift: 2.33
----------------------------------------
Rule: {'Game'} → {'Sport'}
Support: 0.29
Confidence: 0.67
Lift: 1.17
----------------------------------------
Rule: {'Team'} → {'Sport'}
Support: 0.29
Confidence: 1.00
Lift: 1.75
----------------------------------------
Rule: {'Student', 'School'} → {'Education'}
Support: 0.29
Confidence: 1.00
Lift: 2.33
----------------------------------------
Rule: {'Teach', 'School'} → {'Education'}
Support: 0.29
Confidence: 1.00
Li

            Multi Support

In [6]:
# Step 1: Load Dataset
data = pd.read_csv("document_data.csv")

In [7]:
# Step 2: Transform Data - Convert documents into transaction-like format with class labels
transactions = []
class_transactions = {}  # Store transactions by class
all_items = set()

for index, row in data.iterrows():
    items = row['Terms'].split(", ")  # Convert terms to list
    items.append(row['Class'])  # Append class label to transaction
    transactions.append(items)
    all_items.update(items)

    # Store transactions separately for each class
    doc_class = row['Class']
    if doc_class not in class_transactions:
        class_transactions[doc_class] = []
    class_transactions[doc_class].append(items)

In [8]:
# Step 3: Manually Assign Minimum Support for Each Class
class_min_support = {
    'Education': 0.3,  # Minimum Support for Education
    'Sport': 0.25      # Minimum Support for Sport
}

print("Assigned Minimum Support for Each Class:", class_min_support)

Assigned Minimum Support for Each Class: {'Education': 0.3, 'Sport': 0.25}


In [9]:
# Step 4: Generate Class Association Rules for each class separately
filtered_rules = []

for doc_class, class_data in class_transactions.items():
    min_support = class_min_support.get(doc_class, 0.2)  # Default to 0.2 if not specified

    rules = apriori(class_data, min_support=min_support, min_confidence=0.6, min_lift=1.2, min_length=2)
    results = list(rules)

    # Store rules along with class info
    for item in results:
        filtered_rules.append((doc_class, item))

In [10]:
# Step 5: Display Results (Class Association Rules)
for doc_class, item in filtered_rules:
    pair = item[0]  # Extract itemset
    items = [x for x in pair if x != doc_class]  # Exclude class from antecedents
    
    if doc_class in pair:  # Ensure the class is in the rule
        print(f"Class: {doc_class}")
        print(f"Rule: {items} → {doc_class}")  # Predicting the class
        print(f"Support: {round(item[1], 4)}")
        print(f"Confidence: {round(item[2][0][2], 4)}")  # Confidence of predicting the class
        print(f"Lift: {round(item[2][0][3], 4)}\n")


Class: Education
Rule: ['Game', 'Teach'] → Education
Support: 0.3333
Confidence: 1.0
Lift: 1.5

Class: Education
Rule: ['Game', 'Teach', 'School'] → Education
Support: 0.3333
Confidence: 1.0
Lift: 1.5

Class: Sport
Rule: ['Baseball', 'Coach'] → Sport
Support: 0.25
Confidence: 1.0
Lift: 2.0

Class: Sport
Rule: ['Baseball', 'Game'] → Sport
Support: 0.25
Confidence: 1.0
Lift: 2.0

Class: Sport
Rule: ['City', 'Basketball'] → Sport
Support: 0.25
Confidence: 1.0
Lift: 1.3333

Class: Sport
Rule: ['Basketball', 'Player'] → Sport
Support: 0.25
Confidence: 1.0
Lift: 1.3333

Class: Sport
Rule: ['Team', 'Basketball'] → Sport
Support: 0.25
Confidence: 1.0
Lift: 1.3333

Class: Sport
Rule: ['City', 'Team'] → Sport
Support: 0.25
Confidence: 1.0
Lift: 4.0

Class: Sport
Rule: ['Coach', 'Game'] → Sport
Support: 0.25
Confidence: 1.0
Lift: 4.0

Class: Sport
Rule: ['Baseball', 'Game', 'Coach'] → Sport
Support: 0.25
Confidence: 1.0
Lift: 4.0

Class: Sport
Rule: ['City', 'Basketball', 'Team'] → Sport
Support: