In [None]:
from google.colab import drive
import os
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')
# Load DataFrame from the CSV file
csv_file_path = '/content/drive/My Drive/Projet 2CS/DATASET/dataset.csv'
df = pd.read_csv(csv_file_path)

# Now df contains the DataFrame with the data from the CSV file
print("DataFrame loaded successfully from:", csv_file_path)


In [None]:
!pip install astor
!pip install deap



In [None]:
import ast
import astor

In [None]:
# Assuming df is your DataFrame containing the dataset
erroneous_program = df.iloc[13]['Content']
print(erroneous_program)

lines = erroneous_program.split('\n')

for i, line in enumerate(lines, start=1):
    print(f"Line {i}: {line}")

print('lines[5] ' , lines[5])


In [None]:
import subprocess
import re

def evaluate_program(erroneous_program):
    # Execute the erroneous program as a subprocess
    result = subprocess.run(['python', '-c', erroneous_program], capture_output=True, text=True)

    # Initialize counters
    total_tests = 0
    total_failed_tests = 0

    # Extract information from the stderr
    stderr_output = result.stderr

    # Use regex to find the number of tests ran
    match = re.search(r'Ran (\d+) tests', stderr_output)
    if match:
        total_tests = int(match.group(1))

    # Use regex to find the number of failures and errors
    match_failures = re.search(r'FAILED \((failures=(\d+))?(, )?(errors=(\d+))?\)', stderr_output)
    if match_failures:
        failures = match_failures.group(2)
        errors = match_failures.group(5)
        if failures:
            total_failed_tests += int(failures)
        if errors:
            total_failed_tests += int(errors)

    # Calculate the number of successful tests
    successful_tests = total_tests - total_failed_tests
    failed_tests = total_failed_tests

    print('total_tests : ', total_tests)
    print('failed_tests (including errors) : ', total_failed_tests)
    print('successful_tests : ', successful_tests)

    return total_tests, failed_tests, successful_tests

In [None]:
def replace_expression_condition(code, line_number, old_expression, new_expression):
    # Parse the code into an abstract syntax tree (AST)
    print("Old Expression:", old_expression)
    print("New Expression:", new_expression)
    tree = ast.parse(code)

    # Define a visitor to traverse the AST and perform replacements
    class ReplaceExpression(ast.NodeTransformer):
        def visit_While(self, node):
            # Check if the node corresponds to the specified line number
            if getattr(node, 'lineno', None) == line_number:
                # Try parsing the old and new expressions into AST nodes
                try:
                    old_expr_ast = ast.parse(old_expression, mode='eval').body
                    new_expr_ast = ast.parse(new_expression, mode='eval').body
                except SyntaxError as e:
                    print("SyntaxError:", e)
                    return node

                # Compare the old expression in the AST with the given old expression
                if self.compare_expr(node.test, old_expr_ast):
                    # Replace the old expression with the new expression
                    node.test = new_expr_ast

                # Handle the else part of the while statement
                if node.orelse:
                    self.visit(node.orelse)

            self.generic_visit(node)
            return node

        def compare_expr(self, expr1, expr2):
            # Compare the string representations of the AST nodes
            return ast.dump(expr1) == ast.dump(expr2)

    # Instantiate the visitor and traverse the AST
    transformer = ReplaceExpression()
    transformed_tree = transformer.visit(tree)

    # Generate Python code from the modified AST
    modified_code = astor.to_source(transformed_tree)

    return modified_code


In [None]:
from pyparsing import Forward, Literal, Word, alphas, nums, alphanums

# Define the grammar for the prefix expression
expr = Forward()
identifier = Word(alphas + '_', alphanums + '_')
operand = Word(nums) | (Literal('-').suppress() + Word(nums))
op = Literal('+') | Literal('-') | Literal('*') | Literal('/') | Literal('<=') | Literal('>=') | Literal('<') | Literal('==') | Literal('!=') | Literal('>') | Literal('and') | Literal('or') | Literal('while_')
open_paren = Literal("(").suppress()
close_paren = Literal(")").suppress()
comma = Literal(",").suppress()
expr <<= op + open_paren + expr + comma + expr + close_paren | identifier | operand

# Define the infix notation with the correct operator precedence
def infix_action(tokens):
    if tokens[0] == 'while_':
        return f"while {tokens[1]}: {tokens[2]}"
    if len(tokens) == 1:
        return tokens[0]
    elif len(tokens) == 2:  # Unary operators like '-' or 'not'
        return f"{tokens[0]}({tokens[1]})"
    else:
        return f"({tokens[1]} {tokens[0]} {tokens[2]})"

expr.setParseAction(infix_action)


print()




In [None]:
import ast
import keyword
import builtins

def extract_variables_constants(erroneous_program):
    tree = ast.parse(erroneous_program)
    variables = set()
    constants = set()

    # Collecter les noms des fonctions pour les exclure des variables
    function_names = {node.name for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)}

    # Collecter les noms définis au niveau du module (par exemple, modules importés, classes)
    module_level_names = set()
    for node in ast.walk(tree):
        if isinstance(node, (ast.FunctionDef, ast.ClassDef)):
            module_level_names.add(node.name)
        elif isinstance(node, (ast.Import, ast.ImportFrom)):
            for alias in node.names:
                module_level_names.add(alias.name)

    # Ajouter les noms des fonctions intégrées pour les exclure des variables
    builtins_names = set(dir(builtins))

    # Collecter les noms définis dans les méthodes de test
    test_method_names = set()
    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"):
            # Exclure les variables définies dans les méthodes de test
            for child_node in ast.walk(node):
                if isinstance(child_node, ast.Name) and not isinstance(child_node.ctx, ast.Store):
                    test_method_names.add(child_node.id)

    # Parcourir l'arbre syntaxique pour extraire les noms des variables et constantes
    for node in ast.walk(tree):
        if isinstance(node, ast.Name) and not isinstance(node.ctx, ast.Store):
            # Vérifier si le nom n'est pas un mot-clé, une fonction, un intégré, un nom de niveau module, ou un nom de méthode de test
            if node.id not in keyword.kwlist and node.id not in function_names \
               and node.id not in module_level_names and node.id not in builtins_names \
               and node.id not in test_method_names:
                variables.add(node.id)
        elif isinstance(node, ast.Constant) and node.value != '__main__':  # Pour Python 3.8+
            if isinstance(node.value, (int, float, str)):
                constants.add(node.value)
        elif isinstance(node, ast.Num):  # Pour compatibilité avec Python 3.7 et versions antérieures
            constants.add(node.n)

    return list(variables), list(constants)

variables, constants = extract_variables_constants(erroneous_program)


In [None]:
import operator
import sys
import random
import numpy as np
import threading
import queue
from deap import algorithms, base, creator, tools, gp
import ast
import astor
import time  # Importer le module time pour mesurer le temps d'exécution


# Define the primitive set for the symbolic regression problem
pset = gp.PrimitiveSet("MAIN", arity=len(variables))
pset.addPrimitive(operator.lt, arity=2)
pset.addPrimitive(operator.le, arity=2)
pset.addPrimitive(operator.eq, arity=2)
pset.addPrimitive(operator.ne, arity=2)
pset.addPrimitive(operator.gt, arity=2)
pset.addPrimitive(operator.ge, arity=2)
pset.addEphemeralConstant("rand101", lambda: random.randint(0, 1))

creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

def translate_expr(individual, variables):
    expr = str(individual)
    # Replace conditional operators
    expr = expr.replace("lt", "<")
    expr = expr.replace("le", "<=")
    expr = expr.replace("eq", "==")
    expr = expr.replace("ne", "!=")
    expr = expr.replace("gt", ">")
    expr = expr.replace("ge", ">=")
    # Replace ephemeral constants with their values
    expr = expr.replace("rand101", str(random.randint(-10, 10)))
    # Replace ARG0, ARG1, ..., ARGn with the variable names
    for i, var in enumerate(variables):
        arg_name = "ARG{}".format(i)
        # Replace the argument name with the variable name
        expr = expr.replace(arg_name, var)
    return expr

def get_variable_name(node, variables):
    """
    Helper function to get the variable name from a node, if it exists in the given variables list.
    """
    if isinstance(node, ast.Name):
        if node.id in variables:
            return node.id
    return None

def has_duplicate_vars(expr, variables):
    """
    Check if an expression contains duplicate variables.
    """
    try:
        tree = ast.parse(expr, mode='eval')
        for node in ast.walk(tree):
            if isinstance(node, (ast.BinOp, ast.Compare, ast.BoolOp)):
                if isinstance(node, ast.BinOp):
                    left = get_variable_name(node.left, variables)
                    right = get_variable_name(node.right, variables)
                    if left == right:
                        return True
                elif isinstance(node, ast.Compare):
                    left = get_variable_name(node.left, variables)
                    for comparator in node.comparators:
                        right = get_variable_name(comparator, variables)
                        if left == right:
                            return True
    except (SyntaxError, ValueError):
        pass
    return False



def evalSymbReg_with_timeout(individual, variables, timeout_event, result_queue):
    try:
        new_expression = translate_expr(individual, variables)
        new_expression = expr.parseString(str(new_expression))
        # Assuming erroneous_program and evaluate_program are defined elsewhere
        erroneous_code = replace_expression_condition(erroneous_program, 9 ,"a != 0", str(new_expression)[2:-2].strip())
        total_tests, failed_tests, successful_tests = evaluate_program(erroneous_code)
        if has_duplicate_vars(str(new_expression)[2:-2].strip(), variables):
          # Apply a penalty to the fitness score
          failed_tests += 10
        if failed_tests == total_tests:
            successful_tests = 0
        result_queue.put((failed_tests, total_tests))
    except Exception as e:
        print("Evaluation error:", e)
        result_queue.put((total_tests, 0))
    finally:
        timeout_event.set()  # Set the event to signal the end of evaluation

def evalSymbReg(individual, variables):
    timeout_event = threading.Event()  # Event to signal timeout
    result_queue = queue.Queue()  # Queue to store evaluation result

    # Start a new thread to execute evalSymbReg_with_timeout
    eval_thread = threading.Thread(target=evalSymbReg_with_timeout,
                                   args=(individual, variables, timeout_event, result_queue))
    eval_thread.start()

    # Wait for the thread to finish or timeout
    eval_thread.join(timeout=3)  # Timeout set to 3 seconds

    if not timeout_event.is_set():
        print("Evaluation timed out.")
        return (25,)  # Return infinite value for minimization if evaluation timed out

    evaluation_result = result_queue.get()  # Get the result from the queue
    if evaluation_result is None:
        print("Evaluation failed.")
        return (25,)  # Return infinite value for minimization if evaluation failed

    failed_tests, total_tests = evaluation_result
    return (failed_tests,)

def main():
    start_time = time.time()  # Start the timer
    random.seed(42)
    # Assuming extract_variables_constants and erroneous_program are defined elsewhere
    variables, _ = extract_variables_constants(erroneous_program)

    if len(variables) < 1:
        print("Erreur: Nombre incorrect de variables extraites.")
        return

    variables_list = list(variables)  # Convertir l'ensemble en liste
    pop = toolbox.population(n=100)  # Réduire la taille de la population à 100
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)

    toolbox.register("evaluate", evalSymbReg, variables=variables_list)  # Passer la liste des variables
    toolbox.register("select", tools.selTournament, tournsize=3)  # Enregistrer l'opérateur de sélection de tournoi
    toolbox.register("mate", gp.cxOnePoint)  # Enregistrer l'opérateur de croisement
    toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)  # Enregistrer l'opérateur de mutation
    toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)  # Enregistrer l'opérateur de mutation

    best_solution = None  # Variable pour stocker la meilleure solution trouvée jusqu'à présent
    best_fitness = float('inf')  # Initialiser le meilleur fitness à une valeur inférieure à 0

    for gen in range(10):  # Changer cela au nombre de générations souhaité
        pop = algorithms.varAnd(pop, toolbox, cxpb=0.5, mutpb=0.1)  # Variation basée sur la variance de eaSimple
        fitnesses = toolbox.map(toolbox.evaluate, pop)
        for ind, fit in zip(pop, fitnesses):
            ind.fitness.values = fit
            # Mettre à jour la meilleure solution si le fitness est meilleur que le meilleur fitness actuel
            if fit[0] < best_fitness:
                best_solution = ind
                best_fitness = fit[0]

        record = stats.compile(pop)
        print("Génération {}: Min {}, Max {}, Avg {}, Std {}".format(gen, record["min"], record["max"], record["avg"], record["std"]))

        # Vérifier si la meilleure solution passe tous les cas de test
        if best_fitness == 0:  # Si la meilleure solution passe tous les cas de test
            best_expr_str = translate_expr(best_solution, variables_list)  # Passer la liste des variables
            best_expr_str2 = expr.parseString(str(best_expr_str))
            print("Tous les cas de test réussis. Meilleur individu trouvé.")
            print("Meilleure expression infixée de l'individu:", best_expr_str2)
            break  # Sortir de la boucle dès que la meilleure solution est trouvée

    if best_fitness != 0:
        # Afficher la meilleure solution après toutes les générations si aucune solution parfaite n'est trouvée
        best_expr_str = translate_expr(best_solution, variables_list)
        # Passer la liste des variables
        best_expr_str2 = expr.parseString(str(best_expr_str))
        print("Best individual infix expression:", best_expr_str2)
        print("Meilleure expression infixée après toutes les générations:", best_expr_str2)
    end_time = time.time()  # Stop the timer
    execution_time = end_time - start_time  # Calculate the execution time
    print("execution time:",execution_time)
if __name__ == "__main__":
    main()
