In [7]:
from collections import Counter
from itertools import chain
import pandas as pd

class AprioriAlgorithm:
    def __init__(self, data):
        self.transactions = [set(transaction) for transaction in data]
        self.min_support = 3  # Adjust the minimum support threshold as needed

    def get_frequent_items(self):
        item_counts = Counter(chain.from_iterable(self.transactions))
        frequent_items = {item for item, count in item_counts.items() if count >= self.min_support}
        return frequent_items

    def generate_candidates(self, prev_frequent_items, length):
        candidates = []
        for item1 in prev_frequent_items:
            for item2 in prev_frequent_items:
                if isinstance(item1, str):
                    set1 = frozenset([item1])
                else:
                    set1 = frozenset(item1)

                if isinstance(item2, str):
                    set2 = frozenset([item2])
                else:
                    set2 = frozenset(item2)

                union_set = set1.union(set2)
                if len(union_set) == length and union_set not in candidates:
                    candidates.append(union_set)
        return candidates

    def generate_frequent_itemsets(self):
        frequent_itemsets = []
        k = 1
        prev_frequent_items = self.get_frequent_items()

        while prev_frequent_items:
            frequent_itemsets.extend(prev_frequent_items)
            k += 1
            candidates = self.generate_candidates(prev_frequent_items, k)
            candidate_counts = Counter()

            for transaction in self.transactions:
                for candidate in candidates:
                    if candidate.issubset(transaction):
                        candidate_counts[candidate] += 1

            prev_frequent_items = {itemset for itemset, count in candidate_counts.items() if count >= self.min_support}

        return frequent_itemsets

def convert_excel(input_file, output_file):
    # Read the Excel file
    df = pd.read_excel(input_file)

    # Check if the DataFrame is already in horizontal format
    if 'TiD' in df.columns and 'items' in df.columns:
        return df

    # Create a dictionary to store TiD_items and corresponding items
    tid_dict = {}

    # Determine column names based on availability
    tid_column = 'TiD' if 'TiD' in df.columns else 'TiD_items'
    items_column = 'items'

    # Iterate through rows in the DataFrame
    for index, row in df.iterrows():
        tid_items = str(row[tid_column]).split(',')  # Convert to string before splitting
        items = list(row[items_column])

        # Iterate through TiD_items and items
        for tid in tid_items:
            if tid in tid_dict:
                tid_dict[tid] += items
            else:
                tid_dict[tid] = items.copy()

    # Create a new DataFrame for the output
    output_df = pd.DataFrame(list(tid_dict.items()), columns=[tid_column, items_column])

    # Combine items into strings
    output_df[items_column] = output_df[items_column].apply(lambda x: ','.join(set(x)))

    # Write the result to a new Excel file
    output_df.to_excel(output_file, index=False)

    return output_df  # Return the modified DataFrame

# Example usage:
input_file = '/content/sample_data/Horizontal_Format (1) (1).xlsx'
output_file = 'output_file.xlsx'
modified_data = convert_excel(input_file, output_file)

# Example usage with modified data
apriori = AprioriAlgorithm(modified_data['items'].apply(lambda x: x.split(',')).tolist())
frequent_itemsets = apriori.generate_frequent_itemsets()

print("Frequent Itemsets:")
for itemset in frequent_itemsets:
    print(frozenset(itemset))


Frequent Itemsets:
frozenset({'Y'})
frozenset({'E'})
frozenset({'O'})
frozenset({'K'})
frozenset({'M'})
frozenset({'K', 'Y'})
frozenset({'O', 'E'})
frozenset({'K', 'E'})
frozenset({'K', 'M'})
frozenset({'O', 'K'})
frozenset({'O', 'K', 'E'})


In [13]:
from collections import Counter
from itertools import chain
import pandas as pd

class AprioriAlgorithm:
    def __init__(self, data):
        self.transactions = [set(transaction) for transaction in data]
        self.min_support = 3  # Adjust the minimum support threshold as needed

    def get_frequent_items(self):
        item_counts = Counter(chain.from_iterable(self.transactions))
        frequent_items = {item for item, count in item_counts.items() if count >= self.min_support}
        return frequent_items

    def generate_candidates(self, prev_frequent_items, length):
        candidates = []
        for item1 in prev_frequent_items:
            for item2 in prev_frequent_items:
                if isinstance(item1, str):
                    set1 = frozenset([item1])
                else:
                    set1 = frozenset(item1)

                if isinstance(item2, str):
                    set2 = frozenset([item2])
                else:
                    set2 = frozenset(item2)

                union_set = set1.union(set2)
                if len(union_set) == length and union_set not in candidates:
                    candidates.append(union_set)
        return candidates

    def generate_frequent_itemsets(self):
        frequent_itemsets = []
        k = 1
        prev_frequent_items = self.get_frequent_items()

        while prev_frequent_items:
            frequent_itemsets.extend(prev_frequent_items)
            k += 1
            candidates = self.generate_candidates(prev_frequent_items, k)
            candidate_counts = Counter()

            for transaction in self.transactions:
                for candidate in candidates:
                    if candidate.issubset(transaction):
                        candidate_counts[candidate] += 1

            prev_frequent_items = {itemset for itemset, count in candidate_counts.items() if count >= self.min_support}

        return frequent_itemsets

def convert_excel(input_file, output_file):
    # Read the Excel file
    df = pd.read_excel(input_file)

    # Check if the DataFrame is already in horizontal format
    if 'TiD' in df.columns and 'items' in df.columns:
        return df

    # Create a dictionary to store TiD_items and corresponding items
    tid_dict = {}

    # Determine column names based on availability
    tid_column = 'TiD' if 'TiD' in df.columns else 'TiD_items'
    items_column = 'items'

    # Iterate through rows in the DataFrame
    for index, row in df.iterrows():
        tid_items = str(row[tid_column]).split(',')  # Convert to string before splitting
        items = list(row[items_column])

        # Iterate through TiD_items and items
        for tid in tid_items:
            if tid in tid_dict:
                tid_dict[tid] += items
            else:
                tid_dict[tid] = items.copy()

    # Create a new DataFrame for the output
    output_df = pd.DataFrame(list(tid_dict.items()), columns=[tid_column, items_column])

    # Combine items into strings
    output_df[items_column] = output_df[items_column].apply(lambda x: ','.join(set(x)))

    # Write the result to a new Excel file
    output_df.to_excel(output_file, index=False)

    return output_df  # Return the modified DataFrame

# Example usage:
input_file = '/content/sample_data/Horizontal_Format.xlsx'
output_file = 'output_file.xlsx'
modified_data = convert_excel(input_file, output_file)

# Example usage with modified data
apriori = AprioriAlgorithm(modified_data['items'].apply(lambda x: x.split(',')).tolist())
frequent_itemsets = apriori.generate_frequent_itemsets()

print("Frequent Itemsets:")
for itemset in frequent_itemsets:
    print(set(itemset))  # Convert frozenset to set before printing


Frequent Itemsets:
{'k'}
{'o'}
{'y'}
{'e'}
{'m'}
{'k', 'y'}
{'k', 'e'}
{'o', 'e'}
{'k', 'o'}
{'k', 'm'}
{'k', 'e', 'o'}


In [17]:
from collections import Counter
from itertools import chain, combinations
import pandas as pd

class AprioriAlgorithm:
    def __init__(self, data, min_support=3, min_confidence=0.5):
        self.transactions = [set(transaction) if isinstance(transaction, (set, frozenset)) else set(transaction.split(',')) for transaction in data]
        self.min_support = min_support
        self.min_confidence = min_confidence
        self.frequent_itemsets = self.generate_frequent_itemsets()

    def get_frequent_items(self):
        item_counts = Counter(chain.from_iterable(self.transactions))
        frequent_items = {item for item, count in item_counts.items() if count >= self.min_support}
        return frequent_items

    def generate_candidates(self, prev_frequent_items, length):
        candidates = []
        for item1 in prev_frequent_items:
            for item2 in prev_frequent_items:
                if isinstance(item1, str):
                    set1 = frozenset([item1])
                else:
                    set1 = frozenset(item1)

                if isinstance(item2, str):
                    set2 = frozenset([item2])
                else:
                    set2 = frozenset(item2)

                union_set = set1.union(set2)
                if len(union_set) == length and union_set not in candidates:
                    candidates.append(union_set)
        return candidates

    def generate_frequent_itemsets(self):
        frequent_itemsets = []
        k = 1
        prev_frequent_items = self.get_frequent_items()

        while prev_frequent_items:
            frequent_itemsets.extend(prev_frequent_items)
            k += 1
            candidates = self.generate_candidates(prev_frequent_items, k)
            candidate_counts = Counter()

            for transaction in self.transactions:
                for candidate in candidates:
                    if candidate.issubset(transaction):
                        candidate_counts[candidate] += 1

            prev_frequent_items = {itemset for itemset, count in candidate_counts.items() if count >= self.min_support}

        return frequent_itemsets

    def calculate_support(self, itemset):
        count = sum(1 for transaction in self.transactions if itemset.issubset(transaction))
        return count

    def calculate_confidence(self, antecedent, consequent):
        antecedent_support = self.calculate_support(antecedent)
        rule_support = self.calculate_support(antecedent.union(consequent))

        if antecedent_support == 0:
            return 0  # Avoid division by zero

        confidence = rule_support / antecedent_support
        return confidence

    def calculate_lift(self, rule):
        antecedent, consequent = rule
        antecedent_support = self.calculate_support(antecedent)
        consequent_support = self.calculate_support(consequent)
        rule_support = self.calculate_support(antecedent.union(consequent))

        if rule_support == 0:
            return 0  # Avoid division by zero

        lift = (len(self.transactions) * rule_support) / (antecedent_support * consequent_support)
        return lift

    def generate_association_rules(self):
        rules = []
        for itemset in self.frequent_itemsets:
            if len(itemset) > 1:
                for antecedent_size in range(1, len(itemset)):
                    antecedents = set(combinations(itemset, antecedent_size))
                    for antecedent in antecedents:
                        antecedent = set(antecedent)
                        consequent = itemset - antecedent

                        confidence = self.calculate_confidence(antecedent, itemset)
                        if confidence >= self.min_confidence:
                            lift = self.calculate_lift((antecedent, consequent))
                            rules.append((antecedent, consequent, confidence, lift))

        return rules

# Example usage:
input_file = '/content/sample_data/Horizontal_Format.xlsx'
output_file = 'output_file.xlsx'
modified_data = convert_excel(input_file, output_file)

# Instantiate AprioriAlgorithm with modified data
apriori = AprioriAlgorithm(modified_data['items'].tolist())

# Print Frequent Itemsets
print("Frequent Itemsets:")
for itemset in apriori.frequent_itemsets:
    print(set(itemset))  # Convert frozenset to set before printing

# Generate and Print Association Rules
association_rules = apriori.generate_association_rules()
print("\nAssociation Rules:")
for rule in association_rules:
    antecedent, consequent, confidence, lift = rule
    print(f"{set(antecedent)} -> {set(consequent)} (Confidence: {confidence:.2f}, Lift: {lift:.2f})")


Frequent Itemsets:
{'k'}
{'o'}
{'y'}
{'e'}
{'m'}
{'k', 'y'}
{'k', 'e'}
{'o', 'e'}
{'k', 'o'}
{'k', 'm'}
{'k', 'e', 'o'}

Association Rules:
{'y'} -> {'k'} (Confidence: 1.00, Lift: 1.00)
{'k'} -> {'y'} (Confidence: 0.60, Lift: 1.00)
{'e'} -> {'k'} (Confidence: 1.00, Lift: 1.00)
{'k'} -> {'e'} (Confidence: 0.80, Lift: 1.00)
{'o'} -> {'e'} (Confidence: 1.00, Lift: 1.25)
{'e'} -> {'o'} (Confidence: 0.75, Lift: 1.25)
{'o'} -> {'k'} (Confidence: 1.00, Lift: 1.00)
{'k'} -> {'o'} (Confidence: 0.60, Lift: 1.00)
{'m'} -> {'k'} (Confidence: 1.00, Lift: 1.00)
{'k'} -> {'m'} (Confidence: 0.60, Lift: 1.00)
{'o'} -> {'k', 'e'} (Confidence: 1.00, Lift: 1.25)
{'e'} -> {'k', 'o'} (Confidence: 0.75, Lift: 1.25)
{'k'} -> {'e', 'o'} (Confidence: 0.60, Lift: 1.00)
{'e', 'o'} -> {'k'} (Confidence: 1.00, Lift: 1.00)
{'k', 'o'} -> {'e'} (Confidence: 1.00, Lift: 1.25)
{'k', 'e'} -> {'o'} (Confidence: 0.75, Lift: 1.25)


In [18]:
from collections import Counter
from itertools import chain, combinations
import pandas as pd

class AprioriAlgorithm:
    def __init__(self, data, min_support=3, min_confidence=0.5):
        self.transactions = [set(transaction) if isinstance(transaction, (set, frozenset)) else set(transaction.split(',')) for transaction in data]
        self.min_support = min_support
        self.min_confidence = min_confidence
        self.frequent_itemsets = self.generate_frequent_itemsets()

    def get_frequent_items(self):
        item_counts = Counter(chain.from_iterable(self.transactions))
        frequent_items = {item for item, count in item_counts.items() if count >= self.min_support}
        return frequent_items

    def generate_candidates(self, prev_frequent_items, length):
        candidates = []
        for item1 in prev_frequent_items:
            for item2 in prev_frequent_items:
                if isinstance(item1, str):
                    set1 = frozenset([item1])
                else:
                    set1 = frozenset(item1)

                if isinstance(item2, str):
                    set2 = frozenset([item2])
                else:
                    set2 = frozenset(item2)

                union_set = set1.union(set2)
                if len(union_set) == length and union_set not in candidates:
                    candidates.append(union_set)
        return candidates

    def generate_frequent_itemsets(self):
        frequent_itemsets = []
        k = 1
        prev_frequent_items = self.get_frequent_items()

        while prev_frequent_items:
            frequent_itemsets.extend(prev_frequent_items)
            k += 1
            candidates = self.generate_candidates(prev_frequent_items, k)
            candidate_counts = Counter()

            for transaction in self.transactions:
                for candidate in candidates:
                    if candidate.issubset(transaction):
                        candidate_counts[candidate] += 1

            prev_frequent_items = {itemset for itemset, count in candidate_counts.items() if count >= self.min_support}

        return frequent_itemsets

    def calculate_support(self, itemset):
        count = sum(1 for transaction in self.transactions if itemset.issubset(transaction))
        return count

    def calculate_confidence(self, antecedent, consequent):
        antecedent_support = self.calculate_support(antecedent)
        rule_support = self.calculate_support(antecedent.union(consequent))

        if antecedent_support == 0:
            return 0, None  # Avoid division by zero

        confidence = rule_support / antecedent_support
        return confidence, "Positive" if confidence >= 0 else "Negative"

    def calculate_lift(self, rule):
        antecedent, consequent = rule
        antecedent_support = self.calculate_support(antecedent)
        consequent_support = self.calculate_support(consequent)
        rule_support = self.calculate_support(antecedent.union(consequent))

        if rule_support == 0:
            return 0, None  # Avoid division by zero

        lift = (len(self.transactions) * rule_support) / (antecedent_support * consequent_support)
        return lift, "Positive" if lift >= 1 else "Negative"

    def generate_association_rules(self):
        rules = []
        for itemset in self.frequent_itemsets:
            if len(itemset) > 1:
                for antecedent_size in range(1, len(itemset)):
                    antecedents = set(combinations(itemset, antecedent_size))
                    for antecedent in antecedents:
                        antecedent = set(antecedent)
                        consequent = itemset - antecedent

                        confidence, confidence_direction = self.calculate_confidence(antecedent, itemset)
                        lift, lift_direction = self.calculate_lift((antecedent, consequent))

                        rules.append((antecedent, consequent, confidence, confidence_direction, lift, lift_direction))

        return rules

# Example usage:
input_file = '/content/sample_data/Horizontal_Format.xlsx'
output_file = 'output_file.xlsx'
modified_data = convert_excel(input_file, output_file)

# Instantiate AprioriAlgorithm with modified data
apriori = AprioriAlgorithm(modified_data['items'].tolist())

# Print Frequent Itemsets
print("Frequent Itemsets:")
for itemset in apriori.frequent_itemsets:
    print(set(itemset))  # Convert frozenset to set before printing

# Generate and Print Association Rules
association_rules = apriori.generate_association_rules()
print("\nAssociation Rules:")
for rule in association_rules:
    antecedent, consequent, confidence, confidence_direction, lift, lift_direction = rule
    print(f"{set(antecedent)} -> {set(consequent)} (Confidence: {confidence:.2f} ({confidence_direction}), Lift: {lift:.2f} ({lift_direction}))")


Frequent Itemsets:
{'k'}
{'o'}
{'y'}
{'e'}
{'m'}
{'k', 'y'}
{'k', 'e'}
{'o', 'e'}
{'k', 'o'}
{'k', 'm'}
{'k', 'e', 'o'}

Association Rules:
{'y'} -> {'k'} (Confidence: 1.00 (Positive), Lift: 1.00 (Positive))
{'k'} -> {'y'} (Confidence: 0.60 (Positive), Lift: 1.00 (Positive))
{'e'} -> {'k'} (Confidence: 1.00 (Positive), Lift: 1.00 (Positive))
{'k'} -> {'e'} (Confidence: 0.80 (Positive), Lift: 1.00 (Positive))
{'o'} -> {'e'} (Confidence: 1.00 (Positive), Lift: 1.25 (Positive))
{'e'} -> {'o'} (Confidence: 0.75 (Positive), Lift: 1.25 (Positive))
{'o'} -> {'k'} (Confidence: 1.00 (Positive), Lift: 1.00 (Positive))
{'k'} -> {'o'} (Confidence: 0.60 (Positive), Lift: 1.00 (Positive))
{'m'} -> {'k'} (Confidence: 1.00 (Positive), Lift: 1.00 (Positive))
{'k'} -> {'m'} (Confidence: 0.60 (Positive), Lift: 1.00 (Positive))
{'o'} -> {'k', 'e'} (Confidence: 1.00 (Positive), Lift: 1.25 (Positive))
{'e'} -> {'k', 'o'} (Confidence: 0.75 (Positive), Lift: 1.25 (Positive))
{'k'} -> {'e', 'o'} (Confidence: 0

In [21]:
from collections import Counter
from itertools import chain, combinations
import pandas as pd

class AprioriAlgorithm:
    def __init__(self, data, min_support=3, min_confidence=0.5):
        self.transactions = [set(transaction) if isinstance(transaction, (set, frozenset)) else set(transaction.split(',')) for transaction in data]
        self.min_support = min_support
        self.min_confidence = min_confidence
        self.frequent_itemsets = self.generate_frequent_itemsets()

    def get_frequent_items(self):
        item_counts = Counter(chain.from_iterable(self.transactions))
        frequent_items = {item for item, count in item_counts.items() if count >= self.min_support}
        return frequent_items

    def generate_candidates(self, prev_frequent_items, length):
        candidates = []
        for item1 in prev_frequent_items:
            for item2 in prev_frequent_items:
                if isinstance(item1, str):
                    set1 = frozenset([item1])
                else:
                    set1 = frozenset(item1)

                if isinstance(item2, str):
                    set2 = frozenset([item2])
                else:
                    set2 = frozenset(item2)

                union_set = set1.union(set2)
                if len(union_set) == length and union_set not in candidates:
                    candidates.append(union_set)
        return candidates

    def generate_frequent_itemsets(self):
        frequent_itemsets = []
        k = 1
        prev_frequent_items = self.get_frequent_items()

        while prev_frequent_items:
            frequent_itemsets.extend(prev_frequent_items)
            k += 1
            candidates = self.generate_candidates(prev_frequent_items, k)
            candidate_counts = Counter()

            for transaction in self.transactions:
                for candidate in candidates:
                    if candidate.issubset(transaction):
                        candidate_counts[candidate] += 1

            prev_frequent_items = {itemset for itemset, count in candidate_counts.items() if count >= self.min_support}

        return frequent_itemsets

    def calculate_support(self, itemset):
        count = sum(1 for transaction in self.transactions if itemset.issubset(transaction))
        return count

    def calculate_confidence(self, antecedent, consequent):
        antecedent_support = self.calculate_support(antecedent)
        rule_support = self.calculate_support(antecedent.union(consequent))

        if antecedent_support == 0:
            return 0, None  # Avoid division by zero

        confidence = rule_support / antecedent_support
        return confidence, "Positive" if confidence >= 0 else "Negative"

    def calculate_lift(self, rule):
        antecedent, consequent = rule
        antecedent_support = self.calculate_support(antecedent)
        consequent_support = self.calculate_support(consequent)
        rule_support = self.calculate_support(antecedent.union(consequent))

        if rule_support == 0:
            return 0, None  # Avoid division by zero

        lift = (len(self.transactions) * rule_support) / (antecedent_support * consequent_support)
        return lift, "Positive" if lift >= 1 else "Negative"

    def determine_dependency(self, confidence_direction, lift_direction):
        if confidence_direction == "Positive" and lift_direction == "Positive":
            return "Dependent"
        elif confidence_direction == "Negative" and lift_direction == "Negative":
            return "Dependent"
        elif confidence_direction == "Positive" and lift_direction == "Negative":
            return "Independent"
        elif confidence_direction == "Negative" and lift_direction == "Positive":
            return "Independent"
        else:
            return "No Correlation"

    def generate_association_rules(self):
        rules = []
        for itemset in self.frequent_itemsets:
            if len(itemset) > 1:
                for antecedent_size in range(1, len(itemset)):
                    antecedents = set(combinations(itemset, antecedent_size))
                    for antecedent in antecedents:
                        antecedent = set(antecedent)
                        consequent = itemset - antecedent

                        confidence, confidence_direction = self.calculate_confidence(antecedent, itemset)
                        lift, lift_direction = self.calculate_lift((antecedent, consequent))
                        dependency = self.determine_dependency(confidence_direction, lift_direction)

                        rules.append((antecedent, consequent, confidence, confidence_direction, lift, lift_direction, dependency))

        return rules

# Example usage:
input_file = '/content/sample_data/Horizontal_Format.xlsx'
output_file = 'output_file.xlsx'
modified_data = convert_excel(input_file, output_file)

# Instantiate AprioriAlgorithm with modified data
apriori = AprioriAlgorithm(modified_data['items'].tolist())

# Print Frequent Itemsets
print("Frequent Itemsets:")
for itemset in apriori.frequent_itemsets:
    print(set(itemset))  # Convert frozenset to set before printing

# Generate and Print Association Rules
association_rules = apriori.generate_association_rules()
print("\nAssociation Rules:")
for rule in association_rules:
    antecedent, consequent, confidence, confidence_direction, lift, lift_direction, dependency = rule
    print(f"{set(antecedent)} -> {set(consequent)} (Confidence: {confidence:.2f} ({confidence_direction}), Lift: {lift:.2f} ({lift_direction}), Dependency: {dependency})")


Frequent Itemsets:
{'k'}
{'o'}
{'y'}
{'e'}
{'m'}
{'k', 'y'}
{'k', 'e'}
{'o', 'e'}
{'k', 'o'}
{'k', 'm'}
{'k', 'e', 'o'}

Association Rules:
{'y'} -> {'k'} (Confidence: 1.00 (Positive), Lift: 1.00 (Positive), Dependency: Dependent)
{'k'} -> {'y'} (Confidence: 0.60 (Positive), Lift: 1.00 (Positive), Dependency: Dependent)
{'e'} -> {'k'} (Confidence: 1.00 (Positive), Lift: 1.00 (Positive), Dependency: Dependent)
{'k'} -> {'e'} (Confidence: 0.80 (Positive), Lift: 1.00 (Positive), Dependency: Dependent)
{'o'} -> {'e'} (Confidence: 1.00 (Positive), Lift: 1.25 (Positive), Dependency: Dependent)
{'e'} -> {'o'} (Confidence: 0.75 (Positive), Lift: 1.25 (Positive), Dependency: Dependent)
{'o'} -> {'k'} (Confidence: 1.00 (Positive), Lift: 1.00 (Positive), Dependency: Dependent)
{'k'} -> {'o'} (Confidence: 0.60 (Positive), Lift: 1.00 (Positive), Dependency: Dependent)
{'m'} -> {'k'} (Confidence: 1.00 (Positive), Lift: 1.00 (Positive), Dependency: Dependent)
{'k'} -> {'m'} (Confidence: 0.60 (Positiv