In [1]:
'''
    Representation of trajectory and corresponding dataset in our work
'''

import numpy as np


class Itemset():
    def __init__(self,data):
        self.data = set(data)
        self.data_length = len(data)

    def checkSub(self,query):
        for i in query:
            if i not in self.data:
                return False
        return True

    def length(self):
        return len(self.data)

    def __eq__(self,other):
        comp = (self.id() == other.id())
        return comp

    def id(self):
        return tuple(list(self.data).sort())

    #def get_line(self,index):
        #return self.data[index]


class ItemClient():
    def __init__(self):
        self.data = []

    def add_line(self, line):
        self.data.append(line)

    def get_line(self, index):
        return self.data[index]

    def get_line_num(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.get_line(index)


class ItemDataSet():
    def __init__(self):
        self.record = []

    def add_line(self, line):
        self.record.append(line)

    def get_line(self, index):
        return self.record[index]

    def get_line_num(self):
        return len(self.record)

    def __getitem__(self, index):
        return self.get_line(index)

    # The purpose of this function is to return the idx-th client
    def get_trajectory(self, idx):
        return self.record[idx]

    def show_dataset(self, filename):
        f = open(filename, 'a')
        for i in self.record:
            for j in i.data:
                f.write("{}".format(j))
        #f.write("{}".format(self.points))
        # print("len(self.record):", len(self.record))


In [None]:
import pickle
import re

# Convert a txt file to a pickle file

data = ItemDataSet()
num = 0
# FedFIM can be used in the case where the client has multiple transactions.
# For convenience reasons, in the experiments, we assume that each client has 7 transactions.
trans_num = 7
t = 0
with open("T10I4D100K.txt", "r") as f:

    C = ItemClient()
    for line in f:
        num = num + 1
        l1 = []
        s = re.findall(r'\d+', line)
        for i in s:
            l1.append(int(i))

        x = Itemset(l1)
        C.add_line(x)
        if(num % trans_num == 0):
            t = t + 1
            data.add_line(C)
            C=ItemClient()

    if(C.data != []):
         data.add_line(C)

pickle.dump(data,open('connect.pickle','wb'))

print("The total number of transactions in the dataset is: ",num)
print("Num of clients: ",t)

In [None]:
pip install bitarray

In [None]:
import pickle
import math
import numpy as np
import random
import bitarray
from datetime import datetime
import json
import os
import re  # ajouté

# --- Classes et fonctions principales (simplifiées) ---

class BMCTreeNode:
    def __init__(self, item, count, bitmap_code):
        self.item = item
        self.count = count
        self.bitmap_code = bitmap_code
        self.children = dict()
    def get_child_registering_item(self, item):
        return self.children.get(item)
    def add_child(self, child):
        self.children[child.item] = child

def clean_BMC_tree(root):
    for _, child in root.children.items():
        clean_BMC_tree(child)
    del root.item
    del root.children

class FrequentItemsetTreeNode:
    def __init__(self):
        self.item = None
        self.count = 0
        self.children = []
        self.NegNodeSet = []

class NegFIN:
    def __init__(self, args, dataset):
        self.dataset = dataset
        self.min_support = args.k
        self.client_num = dataset.get_line_num()
        self.num_of_transactions = 0
        self.num_participants = args.num_participants
        self.output_file = "outputNegFIN.txt"
        self.F1 = None
        self.item_to_NodeSet = None
        self.writer = None
        self.num_of_frequent_itemsets = 0
        self.execution_time = None
        self.eta = 1 / (1 + math.exp(args.epsilon))
        self.xi = args.xi
        self.FPs = []

    def runAlgorithm(self):
        start_timestamp = datetime.now()
        self.__generate_NodeSets_of_1_itemsets()
        root = self.__create_root_of_frequent_itemset_tree()
        self.writer = open(self.output_file, 'w')
        itemset_buffer = [None] * len(self.F1)
        itemset_length = 0
        FIS_parent_buffer = [None] * len(self.F1)
        FIS_parent_length = 0
        num_of_children = len(root.children)
        for _ in range(num_of_children):
            child = root.children[0]
            itemset_buffer[itemset_length] = child.item
            del root.children[0]
            self.__construct_frequent_itemset_tree(child, itemset_buffer, itemset_length + 1, root.children,
                                                   FIS_parent_buffer, FIS_parent_length)
        self.writer.close()
        end_timestamp = datetime.now()
        self.execution_time = (end_timestamp - start_timestamp).total_seconds() * 1000

    def __generate_NodeSets_of_1_itemsets(self):
        clientresponse = []
        item_name_to_count = {}

        if self.num_participants > self.client_num:
            self.num_participants = self.client_num

        RID = random.sample(range(0, self.client_num), self.num_participants)

        num_r = -1
        for cid in range(len(RID)):
            for j in range(len(self.dataset.record[RID[cid]].data)):
                clientresponse.append([])
                num_r += 1
                fx = list(self.dataset.record[RID[cid]].data[j].data)
                for t in range(len(fx)):
                    draw = np.random.random_sample()
                    response = 1
                    if draw < self.eta:
                        response = 1 - response
                    if response == 1:
                        clientresponse[num_r].append(fx[t])
                        item_name_to_count[fx[t]] = item_name_to_count.get(fx[t], 0) + 1

        self.num_of_transactions = num_r + 1
        item_name_to_count.pop('', None)
        self.min_count = math.ceil(self.num_of_transactions * self.min_support)
        upper_thres = self.min_support * (1 - self.eta) + (1 - self.min_support) * self.eta + math.sqrt(
            -math.log(self.xi) / (2 * self.num_of_transactions))
        self.F1 = [{'name': item_name, 'count': item_count} for item_name, item_count in item_name_to_count.items()
                   if upper_thres * self.num_of_transactions <= item_count]
        self.F1.sort(key=lambda item: item['count'])
        self.F1 = tuple(self.F1)
        item_name_to_item_index = {item['name']: idx for idx, item in enumerate(self.F1)}
        self.item_to_NodeSet = {idx: [] for idx in item_name_to_item_index.values()}

        bmc_tree_root = BMCTreeNode(item=None, count=None, bitmap_code=bitarray.bitarray([False] * len(self.F1)))
        for cid in range(self.num_of_transactions):
            transaction = [item_name_to_item_index[item] for item in clientresponse[cid] if item in item_name_to_item_index]
            transaction.sort(reverse=True)
            cur_root = bmc_tree_root
            for item in transaction:
                N = cur_root.get_child_registering_item(item)
                if N is None:
                    bitmap_code = cur_root.bitmap_code.copy()
                    bitmap_code[item] = True
                    N = BMCTreeNode(item=item, count=0, bitmap_code=bitmap_code)
                    cur_root.add_child(N)
                    self.item_to_NodeSet[item].append(N)
                N.count += 1
                cur_root = N
        clean_BMC_tree(bmc_tree_root)

    def __create_root_of_frequent_itemset_tree(self):
        root = FrequentItemsetTreeNode()
        for item in range(len(self.F1)):
            child = FrequentItemsetTreeNode()
            child.item = item
            child.count = self.F1[item]['count']
            child.NegNodeSet = self.item_to_NodeSet[item]
            root.children.append(child)
        return root

    def __write_itemsets_to_file(self, N, itemset_buffer, N_itemset_length, FIS_parent_buffer, FIS_parent_length):
        file_buffer = []
        self.num_of_frequent_itemsets += 1
        itemset_string = [self.F1[itemset_buffer[i]]['name'] for i in range(N_itemset_length)]
        t = {'data': itemset_string.copy(), 'support': N.count / self.num_of_transactions}
        self.FPs.append(t)
        itemset_string.append(f'#SUP: {N.count}\n')
        file_buffer.append(' '.join(str(x) for x in itemset_string))
        if FIS_parent_length > 0:
            max_comb = 1 << FIS_parent_length
            for i in range(1, max_comb):
                itemset_string = [self.F1[itemset_buffer[i]]['name'] for i in range(N_itemset_length)]
                subset_string = [self.F1[FIS_parent_buffer[j]]['name'] for j in range(FIS_parent_length) if (i & (1 << j)) > 0]
                itemset_string.extend(subset_string)
                t = {'data': itemset_string.copy(), 'support': N.count / self.num_of_transactions}
                self.FPs.append(t)
                itemset_string.append(f'#SUP: {N.count}\n')
                line = ' '.join(str(x) for x in itemset_string)
                file_buffer.append(line)
                self.num_of_frequent_itemsets += 1
        self.writer.writelines(file_buffer)

    def __construct_frequent_itemset_tree(self, N, itemset_buffer, N_itemset_length, N_right_siblings, FIS_parent_buffer, FIS_parent_length):
        for sibling in N_right_siblings:
            child = FrequentItemsetTreeNode()
            sum_of_NegNodeSets_counts = 0
            if N_itemset_length == 1:
                for ni in N.NegNodeSet:
                    if not ni.bitmap_code[sibling.item]:
                        child.NegNodeSet.append(ni)
                        sum_of_NegNodeSets_counts += ni.count
            else:
                for nj in sibling.NegNodeSet:
                    if nj.bitmap_code[N.item]:
                        child.NegNodeSet.append(nj)
                        sum_of_NegNodeSets_counts += nj.count
            child.count = N.count - sum_of_NegNodeSets_counts
            if self.min_count <= child.count:
                if N.count == child.count:
                    FIS_parent_buffer[FIS_parent_length] = sibling.item
                    FIS_parent_length += 1
                else:
                    child.item = sibling.item
                    N.children.append(child)
        self.__write_itemsets_to_file(N, itemset_buffer, N_itemset_length, FIS_parent_buffer, FIS_parent_length)
        number_of_children = len(N.children)
        for _ in range(number_of_children):
            child = N.children[0]
            itemset_buffer[N_itemset_length] = child.item
            del N.children[0]
            self.__construct_frequent_itemset_tree(child, itemset_buffer, N_itemset_length + 1, N.children, FIS_parent_buffer, FIS_parent_length)

class FLFIM_Model:
    def __init__(self, args, dataset):
        self.args = args
        self.dataset = dataset
    def run(self):
        algorithm = NegFIN(self.args, self.dataset)
        algorithm.runAlgorithm()
        return algorithm.FPs

class Args:
    def __init__(self, xi, k, num_participants, epsilon, dataset):
        self.xi = xi
        self.k = k
        self.num_participants = num_participants
        self.epsilon = epsilon
        self.dataset = dataset  # ajout de l'attribut

# --- Fonction pour calculer F1 score ---

def compute_f1_score(predicted, truth):
    # Convertir les listes d'itemsets en sets de tuples triés pour comparer sans ordre
    predicted_set = set(tuple(sorted(itemset)) for itemset in predicted)
    truth_set = set(tuple(sorted(itemset)) for itemset in truth)

    tp = len(predicted_set.intersection(truth_set))
    precision = tp / len(predicted_set) if predicted_set else 0
    recall = tp / len(truth_set) if truth_set else 0
    f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0

    print(f"Precision: {precision:.3f}, Recall: {recall:.3f}, F1-score: {f1:.3f}")
    print(f"TP: {tp}, FP: {len(predicted_set) - tp}, FN: {len(truth_set) - tp}")
    return precision, recall, f1

def SupportCountPickleName(args):
    sanitized_dataset = re.sub(r'[^a-zA-Z0-9_-]', '', str(args.dataset))
    name = "supportcount_" + sanitized_dataset + '.txt'
    return name

def GetGroundTruth(args):
    scName = SupportCountPickleName(args)
    if os.path.isfile(scName):
        with open(scName,'rb') as fp:
            sc_rec = pickle.load(fp)
            ground_truth = [i['data'] for i in sc_rec]
            return ground_truth
    else:
        print(f"Ground truth file '{scName}' not found.")
        return []

class groundTruth():
    def __init__(self,args,dataset):
        self.args = args
        self.dataset = dataset

    def run(self):
        algorithm = NegFIN(self.args,self.dataset)
        algorithm.runAlgorithm()
        return algorithm.FPs

if __name__ == "__main__":
    # Chargement du dataset
    print("Chargement du dataset retail.pickle...")
    with open('connect.pickle', 'rb') as f:
        dataset = pickle.load(f)

    # Listes des paramètres à tester
    epsilons = [1.0, 3.0, 5.0, 7.0, 9.0]
    kis = [0.1, 0.3, 0.5, 0.7, 0.9]

    results = []

    for epsilon in epsilons:
        for ki in kis:
            print(f"\nTest avec epsilon={epsilon}, three={ki}")

            args = Args(xi=0.2, k=ki, num_participants=250, epsilon=epsilon, dataset=dataset)

            # Exécution modèle bruyant
            model_noisy = FLFIM_Model(args, dataset)
            noisy_results = model_noisy.run()
            predicted_itemsets = [fp['data'] for fp in noisy_results]

            # Exécution modèle vérité terrain
            truth_model = groundTruth(args, dataset)
            truth_results = truth_model.run()
            true_itemsets = [fp['data'] for fp in truth_results]

            # Calcul scores
            precision, recall, f1 = compute_f1_score(predicted_itemsets, true_itemsets)

            results.append({
                'dataset_name': "chess",
                'epsilon': epsilon,
                'min_sup': ki,
                'f1_score': f1
            })

    # Sauvegarde dans un fichier JSON
    with open('FedFimconnect.json', 'w') as json_file:
        json.dump(results, json_file, indent=4)

    print("\nTests terminés. Résultats sauvegardés dans FedFimskin.json")
