<a href="https://colab.research.google.com/github/mo-ibrahim22/Data-Mining/blob/main/Apriori_DM_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Import libraries**

In [None]:
#Import libraries
import pandas as pd
from itertools import combinations, chain


# **Function to generate combinations of items for a given length**

In [None]:
#Function to generate combinations of items for a given length
def generate_combinations(items, length):
    return list(combinations(items, length))


# **Function to calculate support for a given itemset**

In [None]:
#Function to calculate support for a given itemset
def calculate_support(data, itemset):
    return sum(1 for transaction in data if all(item in transaction for item in itemset))


# **Function to generate frequent itemsets for a given length**

In [None]:
#Function to generate frequent itemsets for a given length
def generate_frequent_itemsets(data, items, min_support, length):
    frequent_itemsets = {}
    combinations_length = generate_combinations(items, length)

    for combination in combinations_length:
        support = calculate_support(data, combination)
        if support >= min_support:
            frequent_itemsets[frozenset(combination)] = support

    return frequent_itemsets


# **Function to generate all possible association rules for a given itemset**

In [None]:
#Function to generate all possible association rules for a given itemset
def generate_all_possible_rules(itemset):
    rules = []
    for i in range(1, len(itemset)):
        antecedents = list(combinations(itemset, i))
        for antecedent in antecedents:
            consequent = frozenset(set(itemset) - set(antecedent))
            rules.append((frozenset(antecedent), consequent))
    return rules


# **Function to filter rules based on confidence and categorize as "strong" or "weak"**

In [None]:
#Function to filter rules based on confidence and categorize as "strong" or "weak"
def filter_and_categorize_rules(rules, frequent_itemsets, min_confidence):
    categorized_rules = []
    for antecedent, consequent in rules:
        confidence = frequent_itemsets[antecedent.union(consequent)] / frequent_itemsets[antecedent]
        category = "strong" if confidence >= min_confidence else "weak"
        categorized_rules.append((set(antecedent), set(consequent), confidence, category))
    return categorized_rules


# **Function to calculate lift and categorize based on lift value**

In [None]:
#Function to calculate lift and categorize based on lift value
def calculate_and_categorize_lift(frequent_itemsets, antecedent, consequent):
    support_antecedent = frequent_itemsets[antecedent]
    support_consequent = frequent_itemsets[consequent]
    support_both = frequent_itemsets[antecedent.union(consequent)]
    lift = support_both / (support_antecedent * support_consequent)

    if lift > 1:
        category = "dependent, + correlated"
    elif lift < 1:
        category = "dependent, - correlated"
    else:
        category = "Independent"

    return lift, category


# **Read the Excel file into a DataFrame**

In [None]:
#Read the Excel file into a DataFrame
file_path = '/content/H_Format.xlsx'
data_horizontal_format = pd.read_excel(file_path, sheet_name='Sheet1')
data = [set(str(row[1]).split(',')) for row in data_horizontal_format.itertuples(index=False)]
items = set(item for transaction in data for item in transaction)

# **Extract transactions from the DataFrame**

In [None]:
#Extract transactions from the DataFrame
# data = [set(str(row[1]).split(',')) for row in data_horizontal_format.itertuples(index=False)]
print(data)

[{'E', 'N', 'M', 'K', 'Y', 'O'}, {'E', 'D', 'N', 'K', 'Y', 'O'}, {'A', 'K', 'E', 'M'}, {'M', 'C', 'K', 'U', 'Y'}, {'E', 'C', 'K', 'I', 'O'}]


# **Extract unique items from transactions**

In [None]:
#Extract unique items from transactions
items = set(item for transaction in data for item in transaction)
print(items)

{'E', 'D', 'M', 'N', 'C', 'K', 'U', 'A', 'Y', 'I', 'O'}


# **Set the support and confidence thresholds**

In [None]:
#Set the support and confidence thresholds
min_support = float (input("min_support"))
min_confidence = float (input("min_confidence"))


min_support3
min_confidence.5


# **Generate frequent itemsets for different lengths**

In [None]:
#Generate frequent itemsets for different lengths
frequent_itemsets = {}
for length in range(1, len(items) + 1):
    frequent_itemsets.update(generate_frequent_itemsets(data, items, min_support, length))



# **Display frequent itemsets and their support counts**

In [None]:
#Display frequent itemsets and their support counts
print("\nSection 3: Frequent Itemsets")
current_level = None  # Variable to track the current level

for itemset, support in frequent_itemsets.items():
    level = len(itemset)  # Determine the level of the itemset

    # Print header if the level has changed
    if level != current_level:
        print(f"\nLevel {level}:")
        current_level = level

    # Print itemset information
    print(f"Itemset: {set(itemset)}, Support: {support}")



Section 3: Frequent Itemsets

Level 1:
Itemset: {'E'}, Support: 4
Itemset: {'M'}, Support: 3
Itemset: {'K'}, Support: 5
Itemset: {'Y'}, Support: 3
Itemset: {'O'}, Support: 3

Level 2:
Itemset: {'E', 'K'}, Support: 4
Itemset: {'E', 'O'}, Support: 3
Itemset: {'K', 'M'}, Support: 3
Itemset: {'K', 'Y'}, Support: 3
Itemset: {'K', 'O'}, Support: 3

Level 3:
Itemset: {'E', 'K', 'O'}, Support: 3


# **Generate all possible association rules for frequent itemsets**

In [None]:
#Generate all possible association rules for frequent itemsets
all_possible_rules = []
for itemset in frequent_itemsets.keys():
    all_possible_rules.extend(generate_all_possible_rules(itemset))


# **Display all possible association rules**

In [None]:
#Display all possible association rules
print("\nSection 4: All Possible Association Rules")
for antecedent, consequent in all_possible_rules:
    print(f"Rule: {set(antecedent)} => {set(consequent)}")



Section 4: All Possible Association Rules
Rule: {'E'} => {'K'}
Rule: {'K'} => {'E'}
Rule: {'E'} => {'O'}
Rule: {'O'} => {'E'}
Rule: {'K'} => {'M'}
Rule: {'M'} => {'K'}
Rule: {'K'} => {'Y'}
Rule: {'Y'} => {'K'}
Rule: {'K'} => {'O'}
Rule: {'O'} => {'K'}
Rule: {'E'} => {'K', 'O'}
Rule: {'K'} => {'E', 'O'}
Rule: {'O'} => {'E', 'K'}
Rule: {'E', 'K'} => {'O'}
Rule: {'E', 'O'} => {'K'}
Rule: {'K', 'O'} => {'E'}


# **Filter and categorize rules based on confidence**

In [None]:
#Filter and categorize rules based on confidence
categorized_rules = filter_and_categorize_rules(all_possible_rules, frequent_itemsets, min_confidence)
print("\nSection 5: Categorized Rules based on Confidence")
for antecedent, consequent, confidence, category in categorized_rules:
    print(f"Rule: {set(antecedent)} => {set(consequent)}, Confidence: {confidence}, Category: {category}")



Section 5: Categorized Rules based on Confidence
Rule: {'E'} => {'K'}, Confidence: 1.0, Category: strong
Rule: {'K'} => {'E'}, Confidence: 0.8, Category: strong
Rule: {'E'} => {'O'}, Confidence: 0.75, Category: strong
Rule: {'O'} => {'E'}, Confidence: 1.0, Category: strong
Rule: {'K'} => {'M'}, Confidence: 0.6, Category: strong
Rule: {'M'} => {'K'}, Confidence: 1.0, Category: strong
Rule: {'K'} => {'Y'}, Confidence: 0.6, Category: strong
Rule: {'Y'} => {'K'}, Confidence: 1.0, Category: strong
Rule: {'K'} => {'O'}, Confidence: 0.6, Category: strong
Rule: {'O'} => {'K'}, Confidence: 1.0, Category: strong
Rule: {'E'} => {'K', 'O'}, Confidence: 0.75, Category: strong
Rule: {'K'} => {'E', 'O'}, Confidence: 0.6, Category: strong
Rule: {'O'} => {'E', 'K'}, Confidence: 1.0, Category: strong
Rule: {'E', 'K'} => {'O'}, Confidence: 0.75, Category: strong
Rule: {'E', 'O'} => {'K'}, Confidence: 1.0, Category: strong
Rule: {'K', 'O'} => {'E'}, Confidence: 1.0, Category: strong


# **Calculate and categorize lift for Unique Association Rules**

In [None]:
# Calculate and categorize lift for all possible association rules
print("\nSection 6: Categorized Lift for Unique Association Rules")
printed_rules = set()  # Set to keep track of printed rules

for antecedent, consequent in all_possible_rules:
    # Check if the rule has been printed before
    if (antecedent, consequent) not in printed_rules and (consequent, antecedent) not in printed_rules:
        lift, category = calculate_and_categorize_lift(frequent_itemsets, antecedent, consequent)
        print(f"Rule: {set(antecedent)} => {set(consequent)}, Lift: {lift}, Category: {category}")

        # Add both directions of the rule to the printed set
        printed_rules.add((antecedent, consequent))
        printed_rules.add((consequent, antecedent))



Section 6: Categorized Lift for Unique Association Rules
Rule: {'E'} => {'K'}, Lift: 0.2, Category: dependent, - correlated
Rule: {'E'} => {'O'}, Lift: 0.25, Category: dependent, - correlated
Rule: {'K'} => {'M'}, Lift: 0.2, Category: dependent, - correlated
Rule: {'K'} => {'Y'}, Lift: 0.2, Category: dependent, - correlated
Rule: {'K'} => {'O'}, Lift: 0.2, Category: dependent, - correlated
Rule: {'E'} => {'K', 'O'}, Lift: 0.25, Category: dependent, - correlated
Rule: {'K'} => {'E', 'O'}, Lift: 0.2, Category: dependent, - correlated
Rule: {'O'} => {'E', 'K'}, Lift: 0.25, Category: dependent, - correlated


# **Display frequent itemsets and their support counts**

In [None]:
#Display frequent itemsets and their support counts
def display_table_with_gradient(df):
    return df.style.background_gradient(cmap='viridis', axis=0 )
         # df.sort_values("items", ascending=False)
# Convert frequent_itemsets to DataFrame for display
df_frequent_itemsets = pd.DataFrame(list(frequent_itemsets.items()), columns=['Itemset', 'Support'])
# Convert frozensets to string for better display
df_frequent_itemsets['Itemset'] = df_frequent_itemsets['Itemset'].apply(lambda x: ', '.join(map(str, x)))

# Display frequent itemsets and their support counts with style.background_gradient
display_table_with_gradient(df_frequent_itemsets)


Unnamed: 0,Itemset,Support
0,E,4
1,M,3
2,K,5
3,Y,3
4,O,3
5,"E, K",4
6,"E, O",3
7,"K, M",3
8,"K, Y",3
9,"K, O",3
