<a href="https://colab.research.google.com/github/mo-ibrahim22/Data-Mining/blob/main/ECLAT_DM_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Block 1: Import necessary libraries
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import combinations

In [None]:
# Block 2: Read data from Excel file
data_format = pd.read_excel("/content/V_Format.xlsx")

In [None]:
# Block 3: Check if data is in horizontal format, if yes, convert to vertical format

# Check if the first column is named "TID"
if data_format.columns[0] == "TID":
    if data_format['items'].str.contains(',').any():
        # Split the 'items' column into a list of items
        data_format['items'] = data_format['items'].apply(lambda x: x.split(','))

        # Create an empty DataFrame with columns 'Item name' and 'Transactions'
        result_data = pd.DataFrame(columns=['Item name', 'Transactions'])

        # Iterate through each row and update the new DataFrame
        for index, row in data_format.iterrows():
            for item in row['items']:
                if not (result_data['Item name'] == item).any():
                    result_data = pd.concat([result_data, pd.DataFrame({'Item name': [item], 'Transactions': [set()]})], ignore_index=True)
                # Split transactions and convert to a set
                transactions = set(map(str.strip, str(row['TID']).split(',')))
                result_data.loc[result_data['Item name'] == item, 'Transactions'] = result_data.loc[result_data['Item name'] == item, 'Transactions'].values[0].union(transactions)

else:
    print("The data is already in vertical format")

    # Create an empty DataFrame with columns 'Item name' and 'Transactions'
    result_data = pd.DataFrame(columns=['Item name', 'Transactions'])

    # Iterate through each row and update the new DataFrame
    for index, row in data_format.iterrows():
        # Split transactions and convert to a set
        transactions = set(map(str.strip, str(row['TID_set']).split(',')))
        result_data = pd.concat([result_data, pd.DataFrame({'Item name': [row['itemset']], 'Transactions': [transactions]})], ignore_index=True)



The data is already in vertical format


In [None]:
# Block 4: Print the result_data DataFrame
print(result_data)


  Item name                                Transactions
0        I1        {T900, T100, T800, T400, T700, T500}
1        I2  {T900, T100, T600, T400, T800, T200, T300}
2        I3        {T900, T800, T600, T700, T500, T300}
3        I4                                {T200, T400}
4        I5                                {T800, T100}


In [None]:
# Block 5: Use TransactionEncoder to create a binary table
te = TransactionEncoder()
binary_table = te.fit_transform(result_data['Transactions'])
binary_df = pd.DataFrame(binary_table, columns=te.columns_)

# Add Item name column to the binary table
binary_df.insert(0, 'Item name', result_data['Item name'])

# Print the binary_df DataFrame
print(binary_df)


  Item name   T100   T200   T300   T400   T500   T600   T700   T800   T900
0        I1   True  False  False   True   True  False   True   True   True
1        I2   True   True   True   True  False   True  False   True   True
2        I3  False  False   True  False   True   True   True   True   True
3        I4  False   True  False   True  False  False  False  False  False
4        I5   True  False  False  False  False  False  False   True  False


In [None]:
# Block 6: Set the minimum support and confidence thresholds
#min_support = 3
min_support = float (input("min_support"))
#min_confidence = 0.5
min_confidence = float (input("min_confidence"))

min_support2
min_confidence.6


In [None]:
# Block 7: Initialize a dictionary to store the support of each item
item_support = {}


In [None]:
# Block 8: Iterate through each row in result_data and calculate support for each item
for index, row in result_data.iterrows():
    item_name = row['Item name']
    transactions = row['Transactions']

    # Calculate support for each item
    support = len(transactions)

    # Store the support in the dictionary
    item_support[item_name] = support

# Print the support for each item
print("Item Support:")
for item, support in item_support.items():
    print(f"{item}: {support}")


Item Support:
I1: 6
I2: 7
I3: 6
I4: 2
I5: 2


In [None]:
# Block 9: Initialize a list to store frequent item sets
frequent_item_sets = []


In [None]:
# Block 10: Find frequent item sets with support >= min_support
for level in range(1, len(result_data['Item name']) + 1):
    for item_set in combinations(result_data['Item name'], level):
        item_set_transactions = set.intersection(*[set(result_data.loc[result_data['Item name'] == item, 'Transactions'].values[0]) for item in item_set])
        support = len(item_set_transactions)

        # Check if the support is greater than or equal to min_support
        if support >= min_support:
            frequent_item_sets.append({
                'Item Set': set(item_set),  # Represent all item sets as sets
                'Support': support
            })


In [None]:
# Block 11: Print the frequent item sets, their support, and the level
print("Frequent Item Sets:")
current_level = None

for item_set in frequent_item_sets:
    level = len(item_set['Item Set'])  # Level is the length of the item set

    # Print the level header if it's a new level
    if level != current_level:
        print(f"\nLevel {level}:")

    print(f"Item Set: {item_set['Item Set']}, Support: {item_set['Support']}")

    # Update the current level
    current_level = level


Frequent Item Sets:

Level 1:
Item Set: {'I1'}, Support: 6
Item Set: {'I2'}, Support: 7
Item Set: {'I3'}, Support: 6
Item Set: {'I4'}, Support: 2
Item Set: {'I5'}, Support: 2

Level 2:
Item Set: {'I2', 'I1'}, Support: 4
Item Set: {'I3', 'I1'}, Support: 4
Item Set: {'I1', 'I5'}, Support: 2
Item Set: {'I3', 'I2'}, Support: 4
Item Set: {'I2', 'I4'}, Support: 2
Item Set: {'I2', 'I5'}, Support: 2

Level 3:
Item Set: {'I3', 'I2', 'I1'}, Support: 2
Item Set: {'I2', 'I1', 'I5'}, Support: 2


In [None]:
# Block 12: Generate all possible association rules
all_association_rules = []
for item_set in frequent_item_sets:
    items = item_set['Item Set']
    num_items = len(items)

    # Generate all possible combinations of antecedent and consequent
    for i in range(1, num_items):
        for antecedent in combinations(items, i):
            antecedent = set(antecedent)
            consequent = set(items) - antecedent

            # Add the rule
            all_association_rules.append({
                'Antecedent': antecedent,
                'Consequent': consequent,
                'Support': item_set['Support'],
                'Confidence': None,
                'Rule Type': None,
                'Lift': None,
                'Lift Type': None
            })

In [None]:
# Block 13: Calculate confidence for all possible association rules
for rule in all_association_rules:
    antecedent_set = rule['Antecedent']

    # Find the support for the antecedent in frequent_item_sets
    antecedent_support = next(item_set['Support'] for item_set in frequent_item_sets if item_set['Item Set'] == antecedent_set)

    rule['Confidence'] = rule['Support'] / antecedent_support

    # Check if confidence is greater than or equal to min_confidence
    if rule['Confidence'] >= min_confidence:
        rule['Rule Type'] = "strong"
    else:
        rule['Rule Type'] = "weak"


In [None]:
# Block 14: Calculate lift for all possible association rules
for rule in all_association_rules:
    antecedent_set = rule['Antecedent']
    consequent_set = rule['Consequent']

    # Find the support for the antecedent and consequent in frequent_item_sets
    antecedent_support = next(item_set['Support'] for item_set in frequent_item_sets if item_set['Item Set'] == antecedent_set)
    consequent_support = next(item_set['Support'] for item_set in frequent_item_sets if item_set['Item Set'] == consequent_set)

    # Calculate lift using the formula: support_both / (support_antecedent * support_consequent)
    rule['Lift'] = rule['Support'] / (antecedent_support * consequent_support)

    # Determine the lift type based on the lift value
    if rule['Lift'] > 1:
        rule['Lift Type'] = "dependent, + correlated"
    elif rule['Lift'] < 1:
        rule['Lift Type'] = "dependent, - correlated"
    else:
        rule['Lift Type'] = "Independent"


In [None]:
# Block 15: Print all possible association rules
print("\nAll Possible Association Rules:")
for rule in all_association_rules:
    print(f"{rule['Antecedent']} -> {rule['Consequent']}")



All Possible Association Rules:
{'I2'} -> {'I1'}
{'I1'} -> {'I2'}
{'I3'} -> {'I1'}
{'I1'} -> {'I3'}
{'I1'} -> {'I5'}
{'I5'} -> {'I1'}
{'I3'} -> {'I2'}
{'I2'} -> {'I3'}
{'I2'} -> {'I4'}
{'I4'} -> {'I2'}
{'I2'} -> {'I5'}
{'I5'} -> {'I2'}
{'I3'} -> {'I2', 'I1'}
{'I2'} -> {'I3', 'I1'}
{'I1'} -> {'I3', 'I2'}
{'I3', 'I2'} -> {'I1'}
{'I3', 'I1'} -> {'I2'}
{'I2', 'I1'} -> {'I3'}
{'I2'} -> {'I1', 'I5'}
{'I1'} -> {'I2', 'I5'}
{'I5'} -> {'I2', 'I1'}
{'I2', 'I1'} -> {'I5'}
{'I2', 'I5'} -> {'I1'}
{'I1', 'I5'} -> {'I2'}


In [None]:
# Block 16: Print all possible association rules with confidence type
print("\nAll Possible Association Rules with Confidence Types:")
for rule in all_association_rules:
    print(f"{rule['Antecedent']} -> {rule['Consequent']}: Confidence={rule['Confidence']}, Type={rule['Rule Type']}")



All Possible Association Rules with Confidence Types:
{'I2'} -> {'I1'}: Confidence=0.5714285714285714, Type=weak
{'I1'} -> {'I2'}: Confidence=0.6666666666666666, Type=strong
{'I3'} -> {'I1'}: Confidence=0.6666666666666666, Type=strong
{'I1'} -> {'I3'}: Confidence=0.6666666666666666, Type=strong
{'I1'} -> {'I5'}: Confidence=0.3333333333333333, Type=weak
{'I5'} -> {'I1'}: Confidence=1.0, Type=strong
{'I3'} -> {'I2'}: Confidence=0.6666666666666666, Type=strong
{'I2'} -> {'I3'}: Confidence=0.5714285714285714, Type=weak
{'I2'} -> {'I4'}: Confidence=0.2857142857142857, Type=weak
{'I4'} -> {'I2'}: Confidence=1.0, Type=strong
{'I2'} -> {'I5'}: Confidence=0.2857142857142857, Type=weak
{'I5'} -> {'I2'}: Confidence=1.0, Type=strong
{'I3'} -> {'I2', 'I1'}: Confidence=0.3333333333333333, Type=weak
{'I2'} -> {'I3', 'I1'}: Confidence=0.2857142857142857, Type=weak
{'I1'} -> {'I3', 'I2'}: Confidence=0.3333333333333333, Type=weak
{'I3', 'I2'} -> {'I1'}: Confidence=0.5, Type=weak
{'I3', 'I1'} -> {'I2'}:

In [None]:
# Block 17: Extract strong rules
strong_rules = [rule for rule in all_association_rules if rule['Rule Type'] == 'strong']


In [None]:
# Block 18: Print strong association rules
print("\nStrong Association Rules:")
for rule in strong_rules:
    print(f"{rule['Antecedent']} -> {rule['Consequent']}: Confidence={rule['Confidence']}, Type={rule['Rule Type']}")



Strong Association Rules:
{'I1'} -> {'I2'}: Confidence=0.6666666666666666, Type=strong
{'I3'} -> {'I1'}: Confidence=0.6666666666666666, Type=strong
{'I1'} -> {'I3'}: Confidence=0.6666666666666666, Type=strong
{'I5'} -> {'I1'}: Confidence=1.0, Type=strong
{'I3'} -> {'I2'}: Confidence=0.6666666666666666, Type=strong
{'I4'} -> {'I2'}: Confidence=1.0, Type=strong
{'I5'} -> {'I2'}: Confidence=1.0, Type=strong
{'I5'} -> {'I2', 'I1'}: Confidence=1.0, Type=strong
{'I2', 'I5'} -> {'I1'}: Confidence=1.0, Type=strong
{'I1', 'I5'} -> {'I2'}: Confidence=1.0, Type=strong


In [None]:
# Block 19: Print unique association rules with lift type
print("\nUnique Association Rules with Lift Types:")
unique_rules = set()

for rule in all_association_rules:
    antecedent = set(rule['Antecedent'])
    consequent = set(rule['Consequent'])
    lift_type = rule['Lift Type']

    # Check if the rule is unique (not already encountered)
    if (frozenset(antecedent), frozenset(consequent), lift_type) not in unique_rules and (frozenset(consequent), frozenset(antecedent), lift_type) not in unique_rules:
        unique_rules.add((frozenset(antecedent), frozenset(consequent), lift_type))
        print(f"{set(antecedent)} -> {set(consequent)}: Lift={rule['Lift']}, Lift Type={lift_type}")



Unique Association Rules with Lift Types:
{'I2'} -> {'I1'}: Lift=0.09523809523809523, Lift Type=dependent, - correlated
{'I3'} -> {'I1'}: Lift=0.1111111111111111, Lift Type=dependent, - correlated
{'I1'} -> {'I5'}: Lift=0.16666666666666666, Lift Type=dependent, - correlated
{'I3'} -> {'I2'}: Lift=0.09523809523809523, Lift Type=dependent, - correlated
{'I2'} -> {'I4'}: Lift=0.14285714285714285, Lift Type=dependent, - correlated
{'I2'} -> {'I5'}: Lift=0.14285714285714285, Lift Type=dependent, - correlated
{'I3'} -> {'I2', 'I1'}: Lift=0.08333333333333333, Lift Type=dependent, - correlated
{'I2'} -> {'I3', 'I1'}: Lift=0.07142857142857142, Lift Type=dependent, - correlated
{'I1'} -> {'I3', 'I2'}: Lift=0.08333333333333333, Lift Type=dependent, - correlated
{'I2'} -> {'I1', 'I5'}: Lift=0.14285714285714285, Lift Type=dependent, - correlated
{'I1'} -> {'I2', 'I5'}: Lift=0.16666666666666666, Lift Type=dependent, - correlated
{'I5'} -> {'I2', 'I1'}: Lift=0.25, Lift Type=dependent, - correlated


In [None]:
# Block 19: Convert Frequent Item Sets to DataFrame for display
frequent_item_sets_df = pd.DataFrame(columns=['Item Set', 'Support'])

for item_set in frequent_item_sets:
    frequent_item_sets_df = pd.concat([frequent_item_sets_df, pd.DataFrame({
        'Item Set': [item_set['Item Set']],
        'Support': [item_set['Support']]
    })], ignore_index=True)


In [None]:
# Block 20: Display Frequent Item Sets
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: lightblue' if v else '' for v in is_max]


styled_df = frequent_item_sets_df.style.background_gradient(subset=['Support'], cmap='Blues').apply(highlight_max)
styled_df


Unnamed: 0,Item Set,Support
0,{'I1'},6
1,{'I2'},7
2,{'I3'},6
3,{'I4'},2
4,{'I5'},2
5,"{'I2', 'I1'}",4
6,"{'I3', 'I1'}",4
7,"{'I1', 'I5'}",2
8,"{'I3', 'I2'}",4
9,"{'I2', 'I4'}",2
