In [14]:
import os
import pandas as pd
from LKC import *
from itertools import combinations
from tqdm import tqdm
from operator import itemgetter

In [15]:
current_directory = os.getcwd()
two_folders_up = os.path.abspath(os.path.join(current_directory, '..', '..'))
data_folder = os.path.join(two_folders_up, 'Data')
results_folder = os.path.join(two_folders_up, 'Results')

In [16]:
def low_percentage_events(numbers, percentage):
    return [event for event in numbers.events if numbers.pr_e([event]) <= percentage]

def lowest_events(numbers, amount):
    event_occur = [(event, numbers.pr_e([event])) for event in numbers.events]
    return [i[0] for i in sorted(event_occur, key=itemgetter(1))[:amount]]

In [17]:
def calculate_cc(numbers, sensitive, name, cc_type):
    results = []
    
    upper_cc, lower_cc = 0,0
    for l in [1,2,3,4]:
        upper_cc, lower_cc = numbers.paper_cc_all(l, set(sensitive), upper_cc, lower_cc)
        K, C = numbers.max_LKC(l, set(sensitive))
        result_cc = upper_cc/lower_cc if lower_cc != 0 else 0
        results.append([sorted(sensitive), l, result_cc, upper_cc, lower_cc, K, C])

    results_df = pd.DataFrame(results, columns = ["Sensitive Activities", "Background Size", "Confidence Privacy", "Upper CC", "Lower CC", "Max K", "Max C"])
    results_df.to_csv(os.path.join(results_folder, f"{name}\CC_{cc_type}.csv"), index = False)

In [18]:
def run_various_cc(numbers, name, sensitive):
    calculate_cc(numbers, sensitive, name, "Knowledge")

    sensitive = low_percentage_events(numbers, 0.05)
    calculate_cc(numbers, sensitive, name, f"Percentage_0.05")
    

    sensitive = lowest_events(numbers, 4)
    calculate_cc(numbers, sensitive, name, f"Amount_4")

In [19]:
n_traffic = ListIntegerMap.load_from_file(os.path.join(data_folder, "Traffic Fines/Number5.json"))
sensitive = ["Send for Credit Collection", "Appeal to Judge"]
run_various_cc(n_traffic, "Traffic Fines", sensitive)
del n_traffic


n_sepsis = ListIntegerMap.load_from_file(os.path.join(data_folder, "Sepsis Cases/Number5.json"))
sensitive = ["Release A", "Release B", "Release C", "Release D", "Release E"]
run_various_cc(n_sepsis, "Sepsis Cases", sensitive)
del n_sepsis


n_12 = ListIntegerMap.load_from_file(os.path.join(data_folder, "BPI 2012/Number5.json"))
sensitive = ["A_DECLINED", "A_CANCELLED", "A_APPROVED"]
run_various_cc(n_12, "BPI 2012", sensitive)
del n_12


n_17 = ListIntegerMap.load_from_file(os.path.join(data_folder, "BPI 2017/Number5.json"))
sensitive = ["A_Denied", "A_Cancelled", "A_Pending"]
run_various_cc(n_17, "BPI 2017", sensitive)
del n_17


n_20D = ListIntegerMap.load_from_file(os.path.join(data_folder, "BPI 2020/DomesticDeclarations/Number5.json"))
sensitive = ["Payment Handled"]
run_various_cc(n_20D, "BPI 2020/DomesticDeclarations", sensitive)
del n_20D


n_20R = ListIntegerMap.load_from_file(os.path.join(data_folder, "BPI 2020/RequestForPayment/Number5.json"))
sensitive = ["Payment Handled"]
run_various_cc(n_20R, "BPI 2020/RequestForPayment", sensitive)
del n_20R


n_20I = ListIntegerMap.load_from_file(os.path.join(data_folder, "BPI 2020/InternationalDeclarations/Number5.json"))
sensitive = ["Send Reminder"]
run_various_cc(n_20I, "BPI 2020/InternationalDeclarations", sensitive)
del n_20I


n_20P = ListIntegerMap.load_from_file(os.path.join(data_folder, "BPI 2020/PrepaidTravelCost/Number5.json"))
sensitive = ["Payment Handled"]
run_various_cc(n_20P, "BPI 2020/PrepaidTravelCost", sensitive)
del n_20P


n_20M = ListIntegerMap.load_from_file(os.path.join(data_folder, "BPI 2020/PermitLog/Number5.json"))
sensitive = ["Payment Handled", "Send Reminder"]
run_various_cc(n_20M, "BPI 2020/PermitLog", sensitive)
del n_20M


n_billing = ListIntegerMap.load_from_file(os.path.join(data_folder, "Hospital Billing/Number5.json"))
sensitive = ["DELETE", "REOPEN", "REJECT", "JOIN-PAT"]
run_various_cc(n_billing, "Hospital Billing", sensitive)
del n_billing