In [1]:
import pandas as pd
import numpy as np
import random 
from itertools import product, combinations
import copy
from tqdm import tqdm
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

def get_domains(data):
    #TODO: get domains from file
    return None

In [2]:
class Data:
	
	def __init__(self, variable_data, N, f):
		self.data = pd.DataFrame(self.generate_random_data(variable_data, N))
		self.__domains = self.extract_domains(variable_data)
		self.data['y'] = self.data.apply(lambda row: f(*row[1:-1]), axis=1)

	def generate_random_data(self, variable_data, N=10):
		data = {}
		data['id'] = range(N)
		
		for i, variable in enumerate(variable_data):
			name = variable['name']
			var_type = variable['type']
			step = int(variable['step']) if variable['step'] else 0

			if var_type == 'boolean':
				data[name] = [random.choice([False, True]) for _ in range(N)]
			else:
				if var_type=='int':
					min_value = int(variable['min'])
					max_value = int(variable['max'])
					mean = np.random.uniform(((5*min_value+3*max_value)/8)*1.2, ((3*min_value+5*max_value)/8)*1.2)
					std = np.random.uniform((max_value-min_value)/15, (max_value-min_value)/5)
					data[name]  = np.random.normal(mean, std, N).astype(int)
				elif var_type == 'float':
					min_value = float(variable['min'])
					max_value = float(variable['max'])
					mean = np.random.uniform((3*min_value+max_value)/4, (min_value+3*max_value)/4)
					std = np.random.uniform((max_value-min_value)/100, (max_value-min_value)/5)
					data[name] = np.random.normal(mean, std, N).round(step).astype(float)
	
		data['y'] = [None for _ in range(N)]
		df = pd.DataFrame(data)
		return df
	
	def extract_domains(self, variable_data):
		domains = {}
		for variable in variable_data:
			name = variable['name']
			var_type = variable['type']
			if var_type == 'boolean':
				domains[name] = [True, False]
			else:
				if var_type == 'int':
					min_value = int(variable['min'])
					max_value = int(variable['max'])
					domains[name] = list(range(min_value, max_value+1))
				elif var_type == 'float':
					min_value = float(variable['min'])
					max_value = float(variable['max'])
					step = int(variable['step'])
					step_value = 10** -step  # Define your step value here
					domains[name] = np.arange(min_value, max_value, step_value).tolist()
		return domains

	def get_values(self, id1, id2=None, X=[]):
		if id2 is None:
			if X == [] or X == self.data.keys():
				return self.data[self.data.id == id1]  # retourner toutes les valeurs
			else:
				return self.data[self.data.id == id1][X].to_dict()  # retourner les valeurs selectionnées

		else:
			if X == [] or X == self.data.keys():
				return self.data[self.data.id == id1], self.data[self.data.id == id2]  # retourner toutes les valeurs
			else:
				return self.data[self.data.id == id1][X].to_dict(), self.data[self.data.id == id2][X].to_dict()

	def V(self):
		return self.__domains.keys()

	def save(self, file = 'data/data.csv'):
		self.data.to_csv(file, index=False)

	def load(self, file = 'data/data.csv'):
		self.data = pd.read_csv(file)
		self.__domains = get_domains(self.data)


In [3]:
class Bifact:
	def __init__(self,f, data):
		self.data = data
		self.f = f
	
	def AC1(self, id, X, y):
	#ligne existe, la décision est bonne et toutes les variables de X ont les bonnes valeurs aussi
		df = self.data.data
		ligne = df[df.id == id].copy()
		return len(ligne) == 1 and  ligne.y.iloc[0] == y and all((str(ligne.loc[:, [a]].iloc[0, 0]) == str(X[a])) for a in X)


	def AC23(self, id, X, y):
		df=self.data.data
		ligne=df[df.id==id].copy()
		if (len(ligne)==0):
			return False
		vals=ligne.iloc[0].to_dict()
		X_var=list(X.keys())

		variable_domains = self.data.__domains

		def subdict_combinations(dictionary):
			if not dictionary:
				return [{}]

			key = next(iter(dictionary))
			rest = dictionary.copy()
			del rest[key]

			subcombos = subdict_combinations(rest)
			result = []
			for subcombo in subcombos:
				result.append(subcombo)
				with_key = {key: dictionary[key]}
				result.append({**with_key, **subcombo})
			return result

		def generate_combinations(X, variable_domains):
			# Générer toutes les combinaisons possibles de valeurs pour les clés de X_var
			combinations = product(*[variable_domains[var] for var in X])

			# Exclure la combinaison identique à X
			return [combination for combination in combinations if combination != tuple(X.values())]

		combinations = generate_combinations(X, variable_domains)

		sub=subdict_combinations(X)
		for subX in sub:
			if len(subX):
				X_var=list(subX.keys())
				vals=ligne.iloc[0].to_dict()
				combinations = generate_combinations(subX, variable_domains)
				# Vérifier chaque combinaison
				for combination in combinations:
					x_prime = dict(zip(X_var, combination))
					for k in x_prime.keys():
						vals[k]=x_prime[k]
					yhat=self.f(**{k: v for k, v in vals.items() if k in self.data.V()})

					if yhat!=y and len(x_prime)==len(X):
						print("AC2 et AC3 est vérifié pour une affectation x_prime:",x_prime, " la decision est :", yhat)
						return True
					elif yhat!=y:
						print("Erreur : AC2 et AC3 est vérifié pour une affectation x_prime :", x_prime, " la decision est :", yhat)
						return False

		print("Erreur : AC2 n'est pas vérifiée, veuillez saisir un superset de l'ensemble des variables sélectionné")
		return False
		#ca ameliore les cas où AC3 n'est pas vérifié tout en ayant une meme complexité dans le cas où AC2 et AC3 sont verifiés
	

	def BC1_2_beta2(self, id, X, y, var_notAC_candidat=set()):
		def generate_supersets(id, X):
			ligne = self.data.data[self.data.data['id']==id]
			V_sauf_X=[var for var in self.data.V() if not var in X.keys()]
			#print(V_sauf_X)
			supersets = [X]
			for var in V_sauf_X:
				valeur = ligne[var].iloc[0]
				copie = copy.deepcopy(supersets)
				for dictio in copie:
					dictio[var]= valeur
				supersets =supersets + copie
			unique_dicts = {tuple(sorted(d.items())) for d in supersets}
			unique_list_of_dicts = [dict(item) for item in unique_dicts]
			return sorted(unique_list_of_dicts, key=lambda x:len(x))

		# verifier que X peut etre une cause partielle
		ac1 = self.AC1(id, X, y)
		#TODO: corriger ça (peut etre)
		if not ac1:
			return False, None
		ac23 = self.AC23(id, X, y)
		if not ac23: # AC1 et AC2 sont vérifiés et AC3 ne l'est pas --> il existe un sous enesemble de X qui verifie une actual cause donc X ne peut pas etre une partial cause
			return False, None
		if ac23: # si X est une actual cause alors c'est une partial cause
			return True, None

		# Créer une structure de données pour enregistrer tous les supersets testés afin d'éviter
		supersets = generate_supersets(id, X)[1:] #////////////// ajouter [1:]
		print("id : ",id)
		for superset in tqdm(supersets):

			if tuple(superset.keys()) in var_notAC_candidat: # ignorer tous les supersets (passés en entrée) qui ne verifient pas les ACi
				continue

			if not self.AC1(id, superset,y):
				return False, var_notAC_candidat
			if  self.AC23(id, superset,y):
				print(f"La actual cause est: {superset} \n")
				return True, var_notAC_candidat

			var_notAC_candidat.add(tuple(superset.keys())) # Enregistrer tous les supersets qui ne verifient pas les ACi

		return False, var_notAC_candidat
	

	def BC1_4(self, id1, id2, X, y):
		ligne1=self.data.data[self.data.data.id==id1]
		ligne2=self.data.data[self.data.data.id==id2]

		# Vérifier que X1 de candidat1 et X2 de candidat2 sont testés sur les memes variables (si ce n'est pas vérifié alors BC3 échoue)
		if X[0].keys()!=X[1].keys():
			return False

		Vmax=[]
		# Générer Vmax : la liste des attributs distincts entre candidat1 et candidat2
		for col in ligne1.columns[1:-1]:
			if ligne1.loc[:,[col]].iloc[0,0]!=ligne2.loc[:,[col]].iloc[0,0]:
				Vmax.append(col)
		#print(Vmax)

		# Vérifier si X est inclu dans Vmax (si ce n'est pas vérifié alors BC3 échoue)
		X_var=[ var for var in X[0]]
		if not set(X_var).issubset(set(Vmax)):
			return False

		# Trouver toutes les combinaisons possibles de longueur 1 à la longueur de la liste
		all_combinaisons_var = []

		for nb_var in range(1, len(Vmax) + 1):
			all_combinaisons_var.extend(combinations(Vmax, nb_var))

		all_combinaisons_var_reversed=all_combinaisons_var[::-1]
		#print(all_combinaisons_var_reversed)

		var_notAC_candidat1=set()
		var_notAC_candidat2=set()

		# Verifier si cet element est partial cause
		for combinaison in tqdm(all_combinaisons_var_reversed):

			if not set(X_var).issubset(combinaison) : # On en veut évaluer que les supersets de X
				continue

			dict_candidat1=ligne1.loc[:,combinaison].iloc[0].to_dict()
			dict_candidat2=ligne2.loc[:,combinaison].iloc[0].to_dict()

			BC1_2_beta_bool1, var_notAC_candidat1=self.BC1_2_beta2(id1, dict_candidat1, y[0],var_notAC_candidat1)
			if not (BC1_2_beta_bool1):
				continue

			BC1_2_beta_bool2, var_notAC_candidat2=self.BC1_2_beta2(id2, dict_candidat2, y[1], var_notAC_candidat2)
			if not BC1_2_beta_bool2:
				continue


			# Verifier la BC4
			if len(set(X_var)) < len(combinaison):  # BC4 n'est pas vérifiée car nous avons trouver un superset qui verifie BC1_3
				return False,combinaison

			else : # len(X_var)==len(combinaison)
				return True, X_var

		return False, None # Cas ou c'est un sous ensemble de X_var qui vérifie BC1_4

		
	def BC(self, id1, id2, X, y):
		return self.BC1_2_beta2(id1, X[0], y[0]) and self.BC1_2_beta2(id2, X[1], y[1]) and self.BC1_4(id1, id2, X, y) and self.BC1_4(id1, id2, X, y)
	
	def find_cause(self,id1, id2):
		V_diff = set(self.data.get_values(id1).columns[1:-1]) - set(self.data.get_values(id2).columns[1:-1])
		for i in range(len(V_diff), 0, -1):
			comb1 = combinations(self.data.get_values(id1).loc[:, list(V_diff)], i)
			comb2 = combinations(self.data.get_values(id2).loc[:, list(V_diff)], i)
	
			for c1, c2 in zip(comb1, comb2):
				if self.BC(id1, id2, (c1, c2), (self.data.get_values(id1)[-1], self.data.get_values(id2)[-1])):
					return c1, c2
		
		return None
		

In [4]:
import tkinter as tk
from tkinter import filedialog
from tkinter import ttk
import numpy as np
import pandas as pd

WINDOW_WIDTH = 1200
WINDOW_HEIGHT = 800

variable_entry = None
variable_type_var = None
min_entry = None
max_entry = None
step_entry = None
variables_tree = None
variable_counter = 0  # Counter pour variable names

min_label = None
max_label = None
step_label = None

def import_csv():
	file_path = filedialog.askopenfilename()
	if file_path:
		select_variables_window()

def generate_random():
	global variable_counter
	variable_counter = 0
	select_variables_window()

def add_variable():
	global variable_entry, variable_type_var, min_entry, max_entry, step_entry, variable_counter, variables_tree

	toggle_min_max_step_visibility()
	suggested_name = f"x{variable_counter + 1}"  # variables x1, x2, x3 ... par défaut
	variable_name = variable_entry.get() or suggested_name
	variable_type = variable_type_var.get()
	
	if variable_type == "int":
			step_value = get_default_step_value("int")
			min_value = min_entry.get() if min_entry.get() else get_default_min_value(variable_type)
			max_value = max_entry.get() if max_entry.get() else get_default_max_value(variable_type)
	elif variable_type == "boolean":
			step_value = get_default_step_value("boolean")
			min_value = get_default_min_value("boolean")
			max_value = get_default_max_value("boolean")
	else:
		min_value = min_entry.get() if min_entry.get() else get_default_min_value(variable_type)
		max_value = max_entry.get() if max_entry.get() else get_default_max_value(variable_type)
		step_value = step_entry.get() if step_entry.get() else get_default_step_value(variable_type)

	if variable_name:
		variable_counter += 1
		variable_entry.delete(0, tk.END)
		variable_entry.insert(0, f"x{variable_counter + 1}") 
		min_entry.delete(0, tk.END)
		min_entry.insert(0, get_default_min_value("int"))  
		max_entry.delete(0, tk.END)
		max_entry.insert(0, get_default_max_value("int")) 
		step_entry.delete(0, tk.END)
		step_entry.insert(0, get_default_step_value("int")) 
		variables_tree.insert("", "end", values=(variable_name, variable_type, min_value, max_value, step_value))

	toggle_min_max_step_visibility()

def update_variable_type(var_type = variable_type_var):
	global variable_type_var
	toggle_min_max_step_visibility()
	variable_type_var.set(variable_type_var.get())

def select_variables_window():
	global variable_entry, variable_type_var, min_entry, max_entry, step_entry, num_rows_entry, variables_tree, variables_window
	global min_label, max_label, step_label, next_button, function_var

	variables_window = tk.Toplevel(root)
	variables_window.title("Select Variables")
	variables_window.geometry(f"{WINDOW_WIDTH}x{WINDOW_HEIGHT}")

	variable_label = tk.Label(variables_window, text="Variable Name:")
	variable_label.grid(row=0, column=0, padx=5, pady=5)
	suggested_name = f"x{variable_counter + 1}" 
	variable_entry = tk.Entry(variables_window)
	variable_entry.insert(0, suggested_name)  
	variable_entry.grid(row=0, column=1, padx=5, pady=5)

	# variable type
	type_label = tk.Label(variables_window, text="Variable Type:")
	type_label.grid(row=0, column=2, padx=5, pady=5)
	variable_type_var = tk.StringVar(variables_window)
	variable_type_var.set("int")  # Default type
	type_dropdown = tk.OptionMenu(variables_window, variable_type_var, "int", "float", "boolean", command=update_variable_type)
	type_dropdown.grid(row=0, column=3, padx=5, pady=5)

	#  min value
	min_label = tk.Label(variables_window, text="Min:")
	min_label.grid(row=0, column=4, padx=5, pady=5)
	min_entry = tk.Entry(variables_window)
	min_entry.grid(row=0, column=5, padx=5, pady=5)

	# max value 
	max_label = tk.Label(variables_window, text="Max:")
	max_label.grid(row=0, column=6, padx=5, pady=5)
	max_entry = tk.Entry(variables_window)
	max_entry.grid(row=0, column=7, padx=5, pady=5)

	# step value 
	step_label = tk.Label(variables_window, text="Arrond:")
	step_label.grid(row=0, column=8, padx=5, pady=5)
	step_entry = tk.Entry(variables_window)
	step_entry.grid(row=0, column=9, padx=5, pady=5)

	#  add variable
	add_button = tk.Button(variables_window, text="Add", command=add_variable)
	add_button.grid(row=0, column=10, padx=5, pady=5)

	# Treeview
	variables_tree = ttk.Treeview(variables_window, columns=("Name", "Type", "Min", "Max", "Step"), show="headings")
	variables_tree.heading("Name", text="Name")
	variables_tree.heading("Type", text="Type")
	variables_tree.heading("Min", text="Min")
	variables_tree.heading("Max", text="Max")
	variables_tree.heading("Step", text="Step")
	variables_tree.grid(row=1, column=0, columnspan=11, padx=5, pady=5)


	# my f selector
	function_label = tk.Label(variables_window, text="Function:")
	function_label.grid(row=2, column=5, padx=5, pady=5)
	function_var = tk.StringVar(variables_window)
	function_var.set("and")  # Default function
	function_dropdown = tk.OptionMenu(variables_window, function_var, "and", "or", "mean", "clustering", "monmaster", "monmaster discretisé")
	function_dropdown.grid(row=2, column=6, padx=5, pady=5)

	# N = 
	num_rows_label = tk.Label(variables_window, text="N:")
	num_rows_label.grid(row=2, column=7, padx=5, pady=5)
	num_rows_entry = tk.Entry(variables_window)
	num_rows_entry.insert(0, "10")  # Default number of rows
	num_rows_entry.grid(row=2, column=8, padx=5, pady=5)
	
	# add variable (pourquoi pas deux?)
	add_button2 = tk.Button(variables_window, text="Add", command=add_variable)
	add_button2.grid(row=2, column=9, padx=5, pady=5)

	# generer
	next_button = tk.Button(variables_window, text="Generate", command= lambda: display_data(get_variable_data()[0], get_variable_data()[1]))
	next_button.grid(row=2, column=10, columnspan=11, padx=5, pady=5)

	toggle_min_max_step_visibility()	

def get_variable_data():
	variable_data = []
	for item in variables_tree.get_children():
		name = variables_tree.item(item, "values")[0]
		type = variables_tree.item(item, "values")[1]
		min_value = variables_tree.item(item, "values")[2]
		max_value = variables_tree.item(item, "values")[3]
		step_value = variables_tree.item(item, "values")[4]
		variable_data.append({"name": name, "type": type, "min": min_value, "max": max_value, "step": step_value})
	return [variable_data, int(num_rows_entry.get()) if num_rows_entry.get() else 10]

def display_data(variable_data, N):
	global id1_entry, id2_entry, data_object
	selected_function = function_var.get()
	if selected_function == "and":
		f = f1
	elif selected_function == "or":
		f = f2
	elif selected_function == "mean":
		f = f3
	elif selected_function == "clustering":
		f = f4
	elif selected_function == "monmaster":
		f = f5
	elif selected_function == "monmaster discretisé":
		f=f6
	data_object = Data(variable_data, N,f)
	df = data_object.data
	# calculate the last column of data_object using f for each row
	print("test")

	data_window = tk.Toplevel(root)
	data_window.title("Data Table")
	data_window.geometry(f"{WINDOW_WIDTH}x{WINDOW_HEIGHT}")
	frame = tk.Frame(data_window)
	frame.grid(sticky='nsew')

	id1_label = tk.Label(frame, text="ID1:")
	id1_label.grid(row=0, column=0, padx=5, pady=5)
	id1_entry = tk.Entry(frame)
	id1_entry.insert(0, "0") 
	id1_entry.grid(row=1, column=0, padx=5, pady=5)

	id2_label = tk.Label(frame, text="ID2:")
	id2_label.grid(row=2, column=0, padx=5, pady=5)
	id2_entry = tk.Entry(frame)
	id2_entry.insert(0, "1")
	id2_entry.grid(row=3, column=0, padx=5, pady=5)

	# Find a cause button
	find_cause_button = tk.Button(frame, text="Find a Cause")
	find_cause_button.grid(row=6, column=0, padx=5, pady=5)

	v_scrollbar = ttk.Scrollbar(frame, orient="vertical")
	v_scrollbar.grid(row=7, column=1, sticky='ns')
	h_scrollbar = ttk.Scrollbar(frame, orient="horizontal")
	h_scrollbar.grid(row=8, column=0, sticky='ew')

	table = ttk.Treeview(frame, height=20, columns=["Index"] + [variable['name'] for variable in variable_data]+['y'],
						yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set, show="headings")
	table.grid(row=9, column=0, sticky='nsew')

	table.heading("Index", text="Index")
	table.column("Index", width=100) 

	for variable in variable_data +  [{'name': 'y'}]:
		table.heading(variable['name'], text=variable['name'])
		table.column(variable['name'], width=100) 

	for index, row in df.iterrows():
		table.insert("", "end", values=list(row))

	v_scrollbar.config(command=table.yview)
	h_scrollbar.config(command=table.xview)
	table.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set)
	frame.grid_rowconfigure(0, weight=1)
	frame.grid_columnconfigure(0, weight=1)
	data_window.grid_columnconfigure(0, weight=1)
	data_window.grid_rowconfigure(0, weight=1)

	toggle_min_max_step_visibility()

def f1(*args):
	return all(bool(arg) for arg in args)

def f2(*args):
	return any(bool(arg) for arg in args)

def f3(*args):
	total = sum(arg for arg in args)
	v = int(total)/len(args) if isinstance(total,int) else float(total)/len(args)
	display(v)
	return v

def f4(*args):
	data = [arg for arg in args if isinstance(arg, (int, float, bool))]
	data = np.array(data).reshape(-1, 1)
	kmeans = KMeans(n_clusters=2)
	kmeans.fit(data)
	labels = kmeans.labels_
	silhouette = silhouette_score(data, labels)
	#print(f'Silhouette Score: {silhouette}')
	#print(f'Inertia: {kmeans.inertia_}')
	def classify(*new_data):
		new_data = np.array(new_data).reshape(-1, 1)
		return int(''.join((map(str, kmeans.predict(new_data)))),2)
	return classify(*args)

def f5(*args):
	coeffs= [1, 1, 1, 1, 2] #calcul de la Moyenne
	seuils = [11, 11, 11] # Calcul du status d'éligibilité (logique, BDD, PS)
	thresholds2 = [11, 13] # prendre la décision
	S1, S2, S3, S4, S5, Logique, BDD, PS = args
	moyenne = (S1 * coeffs[0] + S2 * coeffs[1] + S3 * coeffs[2] + S4 * coeffs[3] + S5 * coeffs[4]) / sum(coeffs)
	e = (Logique> seuils[0] and BDD> seuils[1] and PS> seuils[2])
	m = (moyenne>thresholds2[1])*2 + (thresholds2[0] <= moyenne <= thresholds2[1])
	if e and m==2:
		return 'Accepté'
	elif e and m==1:
		return 'En liste d\'attente'
	else:
		return 'Rejeté'

def f6(*args):
	coeffs= [1, 1, 1, 1, 2] #calcul de la Moyenne
	thresholds2 = [11, 13] # prendre la décision
	S1, S2, S3, S4, S5, Logique, BDD, PS = args
	moyenne = (S1 * coeffs[0] + S2 * coeffs[1] + S3 * coeffs[2] + S4 * coeffs[3] + S5 * coeffs[4]) / sum(coeffs)
	e = (Logique and BDD and PS)
	m = (moyenne>thresholds2[1])*2 + (thresholds2[0] <= moyenne <= thresholds2[1])
	if e and m==2:
		return 'Accepté'
	elif e and m==1:
		return 'En liste d\'attente'
	else:
		return 'Rejeté'

def find_cause():
	ModelBifact = Bifact(data_object)
	id1 = int(id1_entry.get())
	id2 = int(id2_entry.get())
	display("model init done")
	cause = ModelBifact.find_cause(id1, id2)
	display(cause)
	
	

def toggle_min_max_step_visibility():
	global min_label, max_label, step_label, min_entry, max_entry, step_entry
	selected_type = variable_type_var.get()

	if selected_type in ["int", "float"]:
		min_label.grid()
		min_entry.grid()
		max_label.grid()
		max_entry.grid()

		if selected_type == "float":
			step_label.grid()
			step_entry.grid()
		else:
			
			step_label.grid_remove()
			step_entry.grid_remove()
	else:
		min_label.grid_remove()
		min_entry.grid_remove()
		max_label.grid_remove()
		max_entry.grid_remove()
		step_label.grid_remove()
		step_entry.grid_remove()

def get_default_min_value(var_type):
	if var_type == "boolean":
		return "False"
	elif var_type == "int":
		return "0"
	elif var_type == "float":
		return "0.0"

def get_default_max_value(var_type):
	if var_type == "boolean":
		return "True"
	elif var_type == "int":
		return "20"
	elif var_type == "float":
		return "20.0"

def get_default_step_value(var_type):
	if var_type == "float":
		return "2"
	else:
		return ""



# mon gui principal
root = tk.Tk()
root.title("Bifact")
root.geometry(f"{WINDOW_WIDTH}x{WINDOW_HEIGHT}")

import_button = tk.Button(root, text="Import a CSV File", command=import_csv)
import_button.pack()
generate_button = tk.Button(root, text="Generate Randomly", command=generate_random)
generate_button.pack()

root.mainloop()
