In [1]:
from itertools import combinations
from collections import defaultdict
import json
import csv

class ECLAT:
    def __init__(self, min_support):
        self.min_support = min_support
        self.itemsets = defaultdict(list)
        self.freq_itemsets = {}
        self.associations = []

    def fit(self, transactions):
        self.transactions = transactions
        self._create_itemsets()
        self._find_frequent_itemsets()
        self._generate_associations()

    def _create_itemsets(self):
        for tid, transaction in enumerate(self.transactions):
            for item in transaction:
                self.itemsets[item].append(tid)

    def _find_frequent_itemsets(self):
        L = [{frozenset([item]): self._calculate_support([item])} for item in self.itemsets.keys()]
        k = 2
        while L:
            Ck = self._join_sets(L, k)
            L = self._prune(Ck)
            if L:
                self.freq_itemsets[k - 1] = L
            k += 1

    def _join_sets(self, itemsets, k):
        joined_sets = []
        for idx, itemset1 in enumerate(itemsets):
            for jdx, itemset2 in enumerate(itemsets):
                if jdx > idx:
                    for set1 in itemset1:
                        for set2 in itemset2:
                            if isinstance(set1, str):
                                set1 = {set1}
                            if isinstance(set2, str):
                                set2 = {set2}
                            if set1 is not None and set2 is not None:  # Filter out None values
                                joined_set = set1.union(set2)
                                if len(joined_set) == k:
                                    joined_sets.append(joined_set)
        return joined_sets

    def _prune(self, itemsets):
        return [itemset for itemset in itemsets if self._calculate_support(itemset) >= self.min_support]

    def _calculate_support(self, itemset):
        count = 0
        for transaction in self.transactions:
            if all(item in transaction for item in itemset):
                count += 1
        return count

    def get_frequent_itemsets(self):
        return self.freq_itemsets

    def generate_association_rules(self, min_confidence):
        rules = []
        for itemset_length, itemsets in self.freq_itemsets.items():
            if itemset_length < 2:
                continue
            for itemset in itemsets:
                self._generate_rules_from_itemset(itemset, rules, min_confidence)
        return rules

    def _generate_rules_from_itemset(self, itemset, rules, min_confidence):
        for subset in combinations(itemset, len(itemset) - 1):
            antecedent = frozenset(subset)
            consequent = itemset - antecedent
            confidence = self._calculate_confidence(antecedent, consequent)
            if confidence >= min_confidence:
                rules.append((antecedent, consequent, confidence))

    def _calculate_confidence(self, antecedent, consequent):
        support_antecedent = self._calculate_support(antecedent)
        support_itemset = self._calculate_support(antecedent.union(consequent))
        return support_itemset / support_antecedent

    def _generate_associations(self):
        for itemset_length, itemsets in self.freq_itemsets.items():
            if itemset_length < 2:
                continue
            for itemset in itemsets:
                self._generate_associations_from_itemset(itemset)

    def _generate_associations_from_itemset(self, itemset):
        for subset_length in range(1, len(itemset)):
            for subset in combinations(itemset, subset_length):
                antecedent = frozenset(subset)
                consequent = itemset - antecedent
                self.associations.append((antecedent, consequent))

# Load issues from JSON file
with open('../flutter_30-issues.json', 'r', encoding='utf-8') as f:
    issues = json.load(f)

# Preprocessing: Extract relevant information
transactions = []
for issue in issues:
    transaction = [str(issue["id"]), issue["state_reason"]]  # Convert items to strings
    for label in issue["labels"]:
        transaction.append(label["name"])
    transactions.append(transaction)

# Set minimum support and confidence
min_support = 0.3
min_confidence = 0.7

# Apply ECLAT algorithm
eclat = ECLAT(min_support)
eclat.fit(transactions)

# Get frequent itemsets and association rules
freq_itemsets = eclat.get_frequent_itemsets()
association_rules = eclat.generate_association_rules(min_confidence)
associations = eclat.associations

# Write results to CSV files
with open('frequent_itemsets.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['Itemset', 'Support']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for itemset_length, itemsets in freq_itemsets.items():
        for itemset in itemsets:
            if all(isinstance(item, str) for item in itemset):
                writer.writerow({'Itemset': ','.join(map(str, itemset)), 'Support': eclat._calculate_support(itemset)})

# Write association rules to CSV file
with open('association_rules.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['Antecedent', 'Consequent', 'Confidence']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for antecedent, consequent, confidence in association_rules:
        writer.writerow({'Antecedent': ','.join(antecedent), 'Consequent': ','.join(consequent), 'Confidence': confidence})

# Write associations to CSV file
with open('associations.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['Antecedent', 'Consequent']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for antecedent, consequent in associations:
        writer.writerow({'Antecedent': ','.join(antecedent), 'Consequent': ','.join(consequent)})

print("Results saved to frequent_itemsets.csv, association_rules.csv, and associations.csv")

Results saved to frequent_itemsets.csv, association_rules.csv, and associations.csv


In [None]:
import json
from itertools import combinations
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

class ECLAT:
    def __init__(self, min_support):
        self.min_support = min_support
        self.itemsets = {}
        self.associations = []

    def fit(self, transactions):
        self.transactions = transactions
        self._create_itemsets()
        self._generate_associations()

    def _create_itemsets(self):
        for tid, transaction in enumerate(self.transactions):
            for item in transaction:
                if item in self.itemsets:
                    self.itemsets[item].append(tid)
                else:
                    self.itemsets[item] = [tid]

    def _generate_associations(self):
        for item in self.itemsets.keys():
            tid_list = self.itemsets[item]
            for subset_length in range(1, len(tid_list)):
                for subset in combinations(tid_list, subset_length):
                    antecedent = frozenset(subset)
                    consequent = frozenset(tid_list) - antecedent
                    confidence = self._calculate_confidence(antecedent, consequent)
                    self.associations.append((antecedent, consequent, confidence))

    def _calculate_confidence(self, antecedent, consequent):
        support_antecedent = self._calculate_support(antecedent)
        if support_antecedent == 0:
            return 0  # Return default confidence if support of antecedent is zero
        support_itemset = self._calculate_support(antecedent.union(consequent))
        return support_itemset / support_antecedent

    def _calculate_support(self, itemset):
        count = 0
        for transaction in self.transactions:
            if all(item in transaction for item in itemset):
                count += 1
        return count / len(self.transactions)

# Load issues from JSON file
with open('../flutter_30-issues.json', 'r', encoding='utf-8') as f:
    issues = json.load(f)

# Extract relevant attributes for analysis (e.g., reasons and issue types)
transactions = []
for issue in issues:
    transaction = [issue["state_reason"]]
    for label in issue["labels"]:
        transaction.append(label["name"])
    transactions.append(transaction)

# Set minimum support
min_support = 0.1

# Apply ECLAT algorithm
eclat = ECLAT(min_support)
eclat.fit(transactions)

# Create a DataFrame to store association strength
association_strength = pd.DataFrame(index=transactions[0], columns=transactions[0])

# Calculate association strength between each attribute
for antecedent, consequent, confidence in eclat.associations:
    for a in antecedent:
        for c in consequent:
            association_strength.at[a, c] = max(confidence, association_strength.at[a, c])

# Plot heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(association_strength.astype(float), annot=True, cmap="coolwarm", fmt=".2f")
plt.title('Association Heatmap between GitHub Issue Attributes')
plt.xlabel('Consequent')
plt.ylabel('Antecedent')
plt.show()
