In [50]:
from pyparsing import Word, alphas, nums, oneOf, infixNotation, opAssoc, ParserElement

# Configurar pyparsing para ignorar espaços em branco
ParserElement.enablePackrat()

# Definir elementos básicos da expressão
identifier = Word(alphas, alphas + nums + "_")
number = Word(nums)

# Definir operadores de comparação
comparison_operator = oneOf("<= >= < > == !=")

# Definir expressão básica
operand = identifier | number

# Definir expressão lógica
logical_expr = infixNotation(operand,
    [
        (comparison_operator, 2, opAssoc.LEFT),
        ("AND", 2, opAssoc.LEFT),
        ("OR", 2, opAssoc.LEFT),
    ])

def extract_leaves(parsed_expr):
    """Extrai folhas de uma expressão lógica analisada."""
    if isinstance(parsed_expr, str):
        return []
    elif len(parsed_expr) == 3 and parsed_expr[1] in ["<=", ">=", "<", ">", "==", "!="]:
        return [" ".join(parsed_expr)]
    else:
        leaves = []
        for sub_expr in parsed_expr:
            leaves.extend(extract_leaves(sub_expr))
        return leaves

# Exemplo de uso
# expression = "(((a <= 3 AND (b > 10 OR c == 10)) OR ((d != 5 AND e >= 20) OR f < 2)) AND ((g <= 7 OR h > 8) AND (i == 9 OR j != 3)))"
expression = "(a <= 3 AND b > 10)"
parsed_expression = logical_expr.parseString(expression, parseAll=True)
leaves = extract_leaves(parsed_expression.asList())
print("Folhas extraídas:", leaves)


Folhas extraídas: ['a <= 3', 'b > 10']


In [51]:
def lista_para_dicionario(lista):
    """
    Transforma uma lista de strings em um dicionário com chaves 'x1', 'x2', ..., 'xn'.
    
    Parâmetros:
    lista (list): Lista de strings.

    Retorno:
    dict: Dicionário com chaves 'x1', 'x2', ..., 'xn' e valores da lista original.
    """
    return {f'x{i+1}': valor for i, valor in enumerate(lista)}

# Exemplo de uso
# lista = ['a <= 3', 'b > 10']
dicionario = lista_para_dicionario(leaves)
print(dicionario)

{'x1': 'a <= 3', 'x2': 'b > 10'}


In [52]:
def substituir_valores_por_chaves(string, dicionario):
    """
    Substitui ocorrências dos valores do dicionário pelas chaves na string.

    Parâmetros:
    string (str): A string onde as substituições serão feitas.
    dicionario (dict): O dicionário com chaves e valores a serem substituídos.

    Retorno:
    str: A string com os valores substituídos pelas chaves.
    """
    for chave, valor in dicionario.items():
        string = string.replace(valor, chave)
    return string

nova_string = substituir_valores_por_chaves(expression, dicionario)
print(nova_string)

(x1 AND x2)


In [53]:
 # Substituindo 'AND' por '&' e 'OR' por '|'
nova_string = nova_string.replace('AND', '&').replace('OR', '|')
nova_string

'(x1 & x2)'

In [54]:
from sympy import symbols, sympify
from sympy.logic.boolalg import to_dnf


# Expressão lógica em forma de string
expressao_str = nova_string

# # Converter a string em uma expressão simbólica
# expressao_logica = sympify(expressao_str)

# # Exibir a expressão lógica
# print(expressao_logica)
dnf_expr=to_dnf(expressao_str, simplify=True, force=True)
print(dnf_expr)


x1 & x2


In [55]:
def reverter_chaves_por_valores(string, dicionario):
    """
    Substitui ocorrências das chaves do dicionário pelos valores na string.

    Parâmetros:
    string (str): A string onde as substituições serão feitas.
    dicionario (dict): O dicionário com chaves e valores a serem substituídos.

    Retorno:
    str: A string com as chaves substituídas pelos valores originais.
    """
    for chave, valor in dicionario.items():
        string = string.replace(chave, valor)
    
    return string

string_final = reverter_chaves_por_valores(str(dnf_expr), dicionario)
print(f"String final com valores originais: {string_final}")

String final com valores originais: a <= 3 & b > 10


In [56]:
#VERSAO 1
##############
# import re
# import json

# def parse_expression(expression):
#     """
#     Parse the expression into its components: variable, operator, and value.
#     """
#     match = re.match(r"(\w+)\s*(>=|<=|>|<|==|!=)\s*(.+)", expression)
#     if match:
#         variable, operator, value = match.groups()
#         return variable, operator, value
#     else:
#         raise ValueError(f"Invalid expression format: {expression}")

# def load_variable_ranges_from_file(file_path):
#     with open(file_path, 'r') as file:
#         json_data = file.read()
#     variable_ranges = json.loads(json_data)
#     return {var['name']: var for var in variable_ranges}

# # def normalize_value(value, var_type):
# #     """
# #     Normalize the value based on its type (integer or float).
# #     """
# #     if var_type == 'integer':
# #         return int(value)
# #     elif var_type == 'float':
# #         return float(value)
# #     else:
# #         raise ValueError(f"Unsupported variable type: {var_type}")


# def get_expression_range(var_name, operator, value, min_val, max_val):
#     """
#     Determine the range of values that satisfy the expression.
#     """
#     value = float(value)
#     if operator == '>=':
#         return (value, max_val)
#     elif operator == '<=':
#         return (min_val, value)
#     elif operator == '>':
#         return (value + 1e-9, max_val)  # Adding a small value to make it exclusive
#     elif operator == '<':
#         return (min_val, value - 1e-9)  # Subtracting a small value to make it exclusive
#     elif operator == '==':
#         return (value, value)
#     elif operator == '!=':
#         return [(min_val, value - 1e-9), (value + 1e-9, max_val)]  # Returns two ranges
#     else:
#         raise ValueError(f"Unsupported operator: {operator}")

# def calculate_jaccard_similarity(expr1, expr2, variable_ranges):
#     """
#     Calculate the Jaccard similarity between two expressions.
#     The expressions should be of the format: "variable operator value".
#     """
#     try:
#         var1, op1, val1 = parse_expression(expr1)
#         var2, op2, val2 = parse_expression(expr2)
#     except ValueError as e:
#         print(e)
#         return 0.0

#     # Ensure the variable names are the same
#     if var1 != var2:
#         return 0.0

#     # Get variable ranges
#     var_info = variable_ranges.get(var1)
#     if not var_info:
#         print(f"Variable {var1} not found in ranges.")
#         return 0.0

#     min_val = var_info['min_value']
#     max_val = var_info['max_value']

#     # Get ranges for each expression
#     range1 = get_expression_range(var1, op1, val1, min_val, max_val)
#     range2 = get_expression_range(var2, op2, val2, min_val, max_val)

#     # Calculate the intersection and union of the ranges
#     if isinstance(range1, list) or isinstance(range2, list):
#         # Handle != case where range might be a list of two ranges
#         intersection = []
#         union = [(min_val, max_val)]
#         if isinstance(range1, list):
#             for r in range1:
#                 if isinstance(range2, list):
#                     for s in range2:
#                         intersection.append(max(r[0], s[0]))
#                         intersection.append(min(r[1], s[1]))
#                 else:
#                     intersection.append(max(r[0], range2[0]))
#                     intersection.append(min(r[1], range2[1]))
#         else:
#             for s in range2:
#                 intersection.append(max(range1[0], s[0]))
#                 intersection.append(min(range1[1], s[1]))
#     else:
#         intersection = (max(range1[0], range2[0]), min(range1[1], range2[1]))
#         union = (min(range1[0], range2[0]), max(range1[1], range2[1]))

#     # Calculate intersection length
#     if intersection[0] <= intersection[1]:
#         intersection_length = intersection[1] - intersection[0]
#     else:
#         intersection_length = 0

#     # Calculate union length
#     union_length = union[1] - union[0]

#     # Calculate the Jaccard similarity
#     similarity = intersection_length / union_length
#     return similarity



# variable_ranges = load_variable_ranges_from_file("variables_config.json")

# expr1 = "height > 1"
# expr2 = "height > 1"

# similarity = calculate_jaccard_similarity(expr1, expr2, variable_ranges)
# similarity

In [57]:
#VERSAO 2
##############

# import re
# import json

# def parse_expression(expression):
#     """
#     Parse the expression into its components: variable, operator, and value.
#     """
#     match = re.match(r"(\w+)\s*(>=|<=|>|<|==|!=)\s*(.+)", expression)
#     if match:
#         variable, operator, value = match.groups()
#         return variable, operator, value
#     else:
#         raise ValueError(f"Invalid expression format: {expression}")

# def load_variable_ranges_from_file(file_path):
#     with open(file_path, 'r') as file:
#         json_data = file.read()
#     variable_ranges = json.loads(json_data)
#     return {var['name']: var for var in variable_ranges}

# def get_expression_range(var_name, operator, value, min_val, max_val):
#     """
#     Determine the range of values that satisfy the expression.
#     """
#     value = float(value)
#     if operator == '>=':
#         return (value, max_val)
#     elif operator == '<=':
#         return (min_val, value)
#     elif operator == '>':
#         return (value + 1e-9, max_val)  # Adding a small value to make it exclusive
#     elif operator == '<':
#         return (min_val, value - 1e-9)  # Subtracting a small value to make it exclusive
#     elif operator == '==':
#         return (value, value)
#     elif operator == '!=':
#         return [(min_val, value - 1e-9), (value + 1e-9, max_val)]  # Returns two ranges
#     else:
#         raise ValueError(f"Unsupported operator: {operator}")

# def calculate_jaccard_similarity(expr1, expr2, variable_ranges):

#     """
#     Calculate the Jaccard similarity between two expressions.
#     The expressions should be of the format: "variable operator value".
#     """
#     try:
#         var1, op1, val1 = parse_expression(expr1)
#         var2, op2, val2 = parse_expression(expr2)
#     except ValueError as e:
#         print(e)
#         return 0.0

#     # Ensure the variable names are the same
#     if var1 != var2:
#         return 0.0

#     # Get variable ranges
#     var_info = variable_ranges.get(var1)
#     if not var_info:
#         print(f"Variable {var1} not found in ranges.")
#         return 0.0

#     min_val = var_info['min_value']
#     max_val = var_info['max_value']

#     # Get ranges for each expression
#     range1 = get_expression_range(var1, op1, val1, min_val, max_val)
#     range2 = get_expression_range(var2, op2, val2, min_val, max_val)

#     # Calculate intersection and union considering the equality operator cases
#     if op1 == '==' and op2 != '==':
#         if range2[0] <= range1[0] <= range2[1]:
#             intersection_length = 1
#             union_length = range2[1] - range2[0]
#         else:
#             return 0.0
#     elif op2 == '==' and op1 != '==':
#         if range1[0] <= range2[0] <= range1[1]:
#             intersection_length = 1
#             union_length = range1[1] - range1[0]
#         else:
#             return 0.0
#     elif op1 == '==' and op2 == '==':
#         if val1 == val2:
#             return 1.0
#         else:
#             return 0.0
#     else:
#         if isinstance(range1, list) or isinstance(range2, list):
#             # Handle != case where range might be a list of two ranges
#             intersection = []
#             union = [(min_val, max_val)]
#             if isinstance(range1, list):
#                 for r in range1:
#                     if isinstance(range2, list):
#                         for s in range2:
#                             intersection.append((max(r[0], s[0]), min(r[1], s[1])))
#                     else:
#                         intersection.append((max(r[0], range2[0]), min(r[1], range2[1])))
#             else:
#                 for s in range2:
#                     intersection.append((max(range1[0], s[0]), min(range1[1], s[1])))
#         else:
#             intersection = (max(range1[0], range2[0]), min(range1[1], range2[1]))
#             union = (min(range1[0], range2[0]), max(range1[1], range2[1]))

#         # Calculate intersection length
#         if isinstance(intersection, list):
#             intersection_length = 0
#             for r in intersection:
#                 if r[0] <= r[1]:
#                     intersection_length += r[1] - r[0]
#         else:
#             if intersection[0] <= intersection[1]:
#                 intersection_length = intersection[1] - intersection[0]
#             else:
#                 intersection_length = 0

#         # Calculate union length
#         union_length = union[1] - union[0] if isinstance(union, tuple) else max_val - min_val

#     # Calculate the Jaccard similarity
#     similarity = intersection_length / union_length
#     return similarity

# # Load the variable ranges from the file
# variable_ranges = load_variable_ranges_from_file('variables_config.json')

# # Test cases
# test_cases = [
#     ("height >= 30", "height == 40"),  # equality with range
#     ("height == 30", "height == 30"),  # equality with equality (same)
#     ("height == 30", "height == 40"),  # equality with equality (different)
#     ("height >= 30", "height <= 40"),  # range with range
#     ("height != 30", "height <= 40"),  # inequality with range
# ]

# results = {f"{leaf1} vs {leaf2}": calculate_jaccard_similarity(leaf1, leaf2, variable_ranges)
#            for leaf1, leaf2 in test_cases}

# results


In [58]:
import re
import json

def parse_expression(expression):
    """
    Parse the expression into its components: variable, operator, and value.
    """
    match = re.match(r"(\w+)\s*(>=|<=|>|<|==|!=)\s*(.+)", expression)
    if match:
        variable, operator, value = match.groups()
        return variable, operator, value
    else:
        raise ValueError(f"Invalid expression format: {expression}")

def load_variable_ranges_from_file(file_path):
    with open(file_path, 'r') as file:
        json_data = file.read()
    variable_ranges = json.loads(json_data)
    return {var['name']: var for var in variable_ranges}

def get_expression_range(var_name, operator, value, min_val, max_val):
    """
    Determine the range of values that satisfy the expression.
    """
    value = float(value)
    if operator == '>=':
        return (value, max_val)
    elif operator == '<=':
        return (min_val, value)
    elif operator == '>':
        return (value + 1e-9, max_val)  # Adding a small value to make it exclusive
    elif operator == '<':
        return (min_val, value - 1e-9)  # Subtracting a small value to make it exclusive
    elif operator == '==':
        return (value, value)
    elif operator == '!=':
        return [(min_val, value - 1e-9), (value + 1e-9, max_val)]  # Returns two ranges
    else:
        raise ValueError(f"Unsupported operator: {operator}")

def calculate_jaccard_similarity(expr1, expr2, variable_ranges):
    """
    Calculate the Jaccard similarity between two expressions.
    The expressions should be of the format: "variable operator value".
    """
    try:
        var1, op1, val1 = parse_expression(expr1)
        var2, op2, val2 = parse_expression(expr2)
    except ValueError as e:
        print(e)
        return 0.0

    # Ensure the variable names are the same
    if var1 != var2:
        return 0.0

    # Get variable ranges
    var_info = variable_ranges.get(var1)
    if not var_info:
        print(f"Variable {var1} not found in ranges.")
        return 0.0

    min_val = var_info['min_value']
    max_val = var_info['max_value']

    # Get ranges for each expression
    range1 = get_expression_range(var1, op1, val1, min_val, max_val)
    range2 = get_expression_range(var2, op2, val2, min_val, max_val)

    def calculate_intersection_union(range1, range2):
        """
        Helper function to calculate intersection and union of two ranges.
        """
        if isinstance(range1, list):
            intersections = []
            for r1 in range1:
                if isinstance(range2, list):
                    for r2 in range2:
                        intersections.append((max(r1[0], r2[0]), min(r1[1], r2[1])))
                else:
                    intersections.append((max(r1[0], range2[0]), min(r1[1], range2[1])))
        else:
            if isinstance(range2, list):
                intersections = [(max(range1[0], r2[0]), min(range1[1], r2[1])) for r2 in range2]
            else:
                intersections = [(max(range1[0], range2[0]), min(range1[1], range2[1]))]
        
        valid_intersections = [r for r in intersections if r[0] <= r[1]]
        intersection_length = sum(r[1] - r[0] for r in valid_intersections)
        
        union_min = min(range1[0] if not isinstance(range1, list) else min(r[0] for r in range1),
                        range2[0] if not isinstance(range2, list) else min(r[0] for r in range2))
        union_max = max(range1[1] if not isinstance(range1, list) else max(r[1] for r in range1),
                        range2[1] if not isinstance(range2, list) else max(r[1] for r in range2))
        union_length = union_max - union_min
        
        return intersection_length, union_length

    # Calculate intersection and union considering the equality operator cases
    if op1 == '==' and op2 != '==':
        if isinstance(range2, list):
            range2_min, range2_max = min(range2[0][0], range2[1][0]), max(range2[0][1], range2[1][1])
            if range2_min <= range1[0] <= range2_max:
                intersection_length = 1
                union_length = range2_max - range2_min
            else:
                return 0.0
        else:
            if range2[0] <= range1[0] <= range2[1]:
                intersection_length = 1
                union_length = range2[1] - range2[0]
            else:
                return 0.0
    elif op2 == '==' and op1 != '==':
        if isinstance(range1, list):
            range1_min, range1_max = min(range1[0][0], range1[1][0]), max(range1[0][1], range1[1][1])
            if range1_min <= range2[0] <= range1_max:
                intersection_length = 1
                union_length = range1_max - range1_min
            else:
                return 0.0
        else:
            if range1[0] <= range2[0] <= range1[1]:
                intersection_length = 1
                union_length = range1[1] - range1[0]
            else:
                return 0.0
    elif op1 == '==' and op2 == '==':
        if val1 == val2:
            return 1.0
        else:
            return 0.0
    else:
        intersection_length, union_length = calculate_intersection_union(range1, range2)

    # Calculate the Jaccard similarity
    similarity = intersection_length / union_length
    return similarity

# Load the variable ranges from the file
variable_ranges_from_file = load_variable_ranges_from_file('variables_config.json')

# Test cases
test_cases = [
    ("height == 30", "height <= 40"),  # equality with range
    ("height == 30", "height == 30"),  # equality with equality (same)
    ("height == 30", "height == 40"),  # equality with equality (different)
    ("height >= 30", "height <= 40"),  # range with range
    ("height != 30", "height <= 40"),  # inequality with range
    ("height != 30", "height == 40")   # inequality with equality
]

results = {f"{expr1} vs {expr2}": calculate_jaccard_similarity(expr1, expr2, variable_ranges_from_file)
           for expr1, expr2 in test_cases}

# # Convert results to DataFrame and display
# results_df = pd.DataFrame(results.items(), columns=["Test Case", "Similarity"])
# results_df
results


{'height == 30 vs height <= 40': 0.025,
 'height == 30 vs height == 30': 1.0,
 'height == 30 vs height == 40': 0.0,
 'height >= 30 vs height <= 40': 0.1,
 'height != 30 vs height <= 40': 0.39999999998,
 'height != 30 vs height == 40': 0.01}

In [59]:
leaf1 = "height > 1"
leaf2 = "height >= 1"

similarity = calculate_jaccard_similarity(leaf1, leaf2, variable_ranges_from_file)
similarity

0.999999999989899

In [60]:
#nao estou tratanto o caso do OR
# nao nao trado caso o usario queira colocar um intervalor tipo assim 30<=var<=40

expression1 = "(a <= 3 AND b > 10 AND c == 10)"
parsed_expression1 = logical_expr.parseString(expression1, parseAll=True)
leaves1 = extract_leaves(parsed_expression1.asList())
print("Folhas extraídas:", leaves1)

Folhas extraídas: ['a <= 3', 'b > 10', 'c == 10']


In [61]:
expression2 = "(a <= 4 AND b > 6 AND c == 9)"
parsed_expression2 = logical_expr.parseString(expression2, parseAll=True)
leaves2 = extract_leaves(parsed_expression2.asList())
print("Folhas extraídas:", leaves2)

Folhas extraídas: ['a <= 4', 'b > 6', 'c == 9']


In [62]:
# Função para dividir as cláusulas e criar um dicionário com a variável como chave
def parse_clauses(clauses):
    parsed_dict = {}
    for clause in clauses:
        variable, expression = clause.split(maxsplit=1)
        parsed_dict[variable] = clause
    return parsed_dict

# Parseando os vetores
parsed_vector1 = parse_clauses(leaves1)
parsed_vector2 = parse_clauses(leaves2)

# Criando o vetor de tuplas
result_tuples = []
for variable in parsed_vector1:
    if variable in parsed_vector2:
        result_tuples.append((parsed_vector1[variable], parsed_vector2[variable]))

# Exibindo o resultado
print(result_tuples)

[('a <= 3', 'a <= 4'), ('b > 10', 'b > 6'), ('c == 10', 'c == 9')]


In [63]:
results = {f"{leaf1} vs {leaf2}": calculate_jaccard_similarity(leaf1, leaf2, variable_ranges_from_file)
for leaf1, leaf2 in result_tuples}

results


{'a <= 3 vs a <= 4': 0.75, 'b > 10 vs b > 6': 0.0, 'c == 10 vs c == 9': 0.0}

In [None]:
####calcular a similaridade de jaccard com media pondera

In [None]:
##### calcular a outra similariadade por penalizacao por falta de featueres


In [None]:
#### calcular a similaridade total para uma expressao

In [None]:
# realizar o passo 1, 2 3 para a postcondicao


In [None]:
# calcular a similaridade total para o cenário