<a href="https://colab.research.google.com/github/nxbnv/DMDW-5th-sem/blob/main/6thlab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# Create a DataFrame from the provided dataset
data = {
    'Transaction_ID': ['T100', 'T200', 'T300', 'T400', 'T500'],
    'Items_bought': [
        {'M', 'O', 'N', 'K', 'E', 'Y'},
        {'D', 'O', 'N', 'K', 'E', 'Y'},
        {'M', 'A', 'K', 'E'},
        {'M', 'U', 'C', 'K', 'Y'},
        {'C', 'O', 'O', 'K', 'I', 'E'}
    ]
}

df = pd.DataFrame(data)

# Convert the 'Items_bought' column to a list of itemsets
df['Items_bought'] = df['Items_bought'].apply(list)

# Perform one-hot encoding to prepare the data for Apriori
oht = pd.get_dummies(df['Items_bought'].explode()).groupby(level=0).max()

# Initialize step counters
step = 1

# Step 1: Scan dataset for count of each item and itemset (C1)
print(f"Step {step}: Count of each item (C1):")
c1 = oht.sum()
print(c1)

# Increment step counter
step += 1

# Step 2: Find out frequent one-itemset from C1 according to support count (L1)
min_support = 0.6
print(f"\nStep {step}: Frequent one-itemset (L1) with support >= {min_support}:")
l1 = c1[c1 >= min_support]
print(l1)

# Increment step counter
step += 1

# Step 3: Generate two-itemsets from L1 and find their frequency counts (C2)
print(f"\nStep {step}: Generating two-itemsets (C2):")
c2 = apriori(oht, min_support=min_support, use_colnames=True)
print(c2)

# Increment step counter
step += 1

# Step 4: From C2, generate L2 which is the list of frequent two-itemsets
print(f"\nStep {step}: Frequent two-itemsets (L2) with support >= {min_support}:")
l2 = c2[c2['support'] >= min_support]
print(l2)

# Increment step counter
step += 1

# Step 5: Generate C3 from L2 (candidates for three-itemset)
print(f"\nStep {step}: Generating candidates for three-itemsets (C3):")
c3 = apriori(oht, min_support=min_support, use_colnames=True)
print(c3)

# Increment step counter
step += 1

# Step 6: Stop when not possible
print(f"\nStep {step}: Stopping as there are no candidates for three-itemsets.")

# Output the final frequent itemsets (L2)
print("\nFinal Frequent Itemsets (L2):")
print(l2)

# Set the minimum confidence threshold
min_confidence = 0.8

# Find strong association rules
association_rules_df = association_rules(l2, metric="confidence", min_threshold=min_confidence)

# Print the strong association rules
print("\nStrong Association Rules with Confidence >= 80%:")
print(association_rules_df)


Step 1: Count of each item (C1):
A    1
C    2
D    1
E    4
I    1
K    5
M    3
N    2
O    3
U    1
Y    3
dtype: int64

Step 2: Frequent one-itemset (L1) with support >= 0.6:
A    1
C    2
D    1
E    4
I    1
K    5
M    3
N    2
O    3
U    1
Y    3
dtype: int64

Step 3: Generating two-itemsets (C2):
    support   itemsets
0       0.8        (E)
1       1.0        (K)
2       0.6        (M)
3       0.6        (O)
4       0.6        (Y)
5       0.8     (K, E)
6       0.6     (E, O)
7       0.6     (K, M)
8       0.6     (K, O)
9       0.6     (Y, K)
10      0.6  (K, E, O)

Step 4: Frequent two-itemsets (L2) with support >= 0.6:
    support   itemsets
0       0.8        (E)
1       1.0        (K)
2       0.6        (M)
3       0.6        (O)
4       0.6        (Y)
5       0.8     (K, E)
6       0.6     (E, O)
7       0.6     (K, M)
8       0.6     (K, O)
9       0.6     (Y, K)
10      0.6  (K, E, O)

Step 5: Generating candidates for three-itemsets (C3):
    support   itemsets
0   

