<a href="https://colab.research.google.com/github/swapnil14g/dmdw/blob/main/Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import itertools
from collections import Counter

def openfile(filename):
    with open(filename, 'r') as file:
        data = file.readlines()
    l=[]
    for row in data:
        l.append(row.strip().split())
    return l

def apriori(dataset, min_support):
    """
    Implements the Apriori algorithm to find frequent itemsets.

    Args:
        dataset: A list of transactions (lists of items).
        min_support: Minimum support threshold for an itemset to be considered frequent.

    Returns:
        A list of frequent itemsets.
    """

    frequent_itemsets = []
    C1 = Counter(item for transaction in dataset for item in transaction)
    L1 = [item for item, count in C1.items() if count >= min_support]
    frequent_itemsets.append(L1)

    k = 2
    while L1:
        Ck = []
        for i in range(len(L1) - 1):
            for j in range(i + 1, len(L1)):
                candidate = L1[i] + L1[j]
                subsets = [candidate[i:j] for i in range(k - 1)]
                if all(subset in frequent_itemsets[k - 2] for subset in subsets):
                    Ck.append(candidate)

        Lk = [item for item in Ck if count_support(item, dataset) >= min_support]
        frequent_itemsets.append(Lk)
        L1 = Lk
        k += 1

    return frequent_itemsets

def count_support(itemset, dataset):
    """
    Calculates the support of an itemset in the dataset.

    Args:
        itemset: A list of items.
        dataset: A list of transactions (lists of items).

    Returns:
        The support of the itemset.
    """

    count = 0
    for transaction in dataset:
        if set(itemset).issubset(set(transaction)):
            count += 1
    return count / len(dataset)

def create_mapping(frequent_itemsets):
    """
    Creates a mapping from frequent itemsets to unique identifiers.

    Args:
        frequent_itemsets: A list of frequent itemsets.

    Returns:
        A dictionary mapping frequent itemsets to unique identifiers.
    """

    mapping = {}
    for i, itemset in enumerate(frequent_itemsets):
        # Convert the itemset (list) to a tuple to make it hashable
        mapping[tuple(itemset)] = chr(65 + i)
    return mapping

def compress_dataset(dataset, frequent_itemsets, mapping):
    """
    Compresses the dataset using the given frequent itemsets and mapping.

    Args:
        dataset: A list of transactions (lists of items).
        frequent_itemsets: A list of frequent itemsets.
        mapping: A dictionary mapping frequent itemsets to unique identifiers.

    Returns:
        A list of compressed transactions.
    """

    compressed_dataset = []
    for transaction in dataset:
        compressed_transaction = []
        for itemset in frequent_itemsets:
            if set(itemset).issubset(set(transaction)):
                # Convert the itemset to a tuple before looking it up in the mapping
                compressed_transaction.append(mapping[tuple(itemset)])
        compressed_dataset.append(compressed_transaction)
    return compressed_dataset

def decompress_dataset(compressed_dataset, mapping):
    """
    Decompresses the compressed dataset using the given mapping.

    Args:
        compressed_dataset: A list of compressed transactions.
        mapping: A dictionary mapping frequent itemsets to unique identifiers.

    Returns:
        A list of original transactions.
    """

    original_dataset = []
    for compressed_transaction in compressed_dataset:
        original_transaction = []
        for identifier in compressed_transaction:
            if identifier in mapping:  # Check if identifier exists in mapping
                original_transaction.extend(mapping[identifier])
            else:
                # Handle missing identifier (e.g., log a warning or raise an exception)
                print(f"Warning: Identifier '{identifier}' not found in mapping.")
        original_dataset.append(original_transaction)
    return original_dataset

dataset = openfile('/content/drive/MyDrive/Colab Notebooks/D_small.dat')

min_support = 0.5
frequent_itemsets = apriori(dataset, min_support)
mapping = create_mapping(frequent_itemsets)
compressed_dataset = compress_dataset(dataset, frequent_itemsets, mapping)
decompressed_dataset = decompress_dataset(compressed_dataset, mapping)

print("Frequent itemsets:", frequent_itemsets)
print("Mapping:", mapping)
print("Compressed dataset:", compressed_dataset)
print("Decompressed dataset:", decompressed_dataset)

Frequent itemsets: [['1', '3', '5', '7', '9', '11', '13', '15', '17', '19', '21', '23', '25', '27', '29', '31', '34', '36', '38', '40', '42', '44', '46', '48', '50', '52', '54', '56', '58', '60', '62', '64', '66', '68', '70', '72', '74', '12', '16', '20', '47', '51', '63', '24', '65', '43', '32', '73', '4', '33', '39', '71', '69', '10', '18', '14', '8', '49', '55', '6', '37', '28', '26', '75', '57', '45', '22', '2', '67', '35', '53', '41', '61', '30', '59'], ['17', '35', '37', '39', '57', '59'], []]
Mapping: {('1', '3', '5', '7', '9', '11', '13', '15', '17', '19', '21', '23', '25', '27', '29', '31', '34', '36', '38', '40', '42', '44', '46', '48', '50', '52', '54', '56', '58', '60', '62', '64', '66', '68', '70', '72', '74', '12', '16', '20', '47', '51', '63', '24', '65', '43', '32', '73', '4', '33', '39', '71', '69', '10', '18', '14', '8', '49', '55', '6', '37', '28', '26', '75', '57', '45', '22', '2', '67', '35', '53', '41', '61', '30', '59'): 'A', ('17', '35', '37', '39', '57', '59'):