In [None]:
from modules.engine import *
from modules.log_reader import read_qsym_log
import logging
import pandas as pd
from dataclasses import asdict
from tqdm import tqdm
from abc import abstractmethod

In [None]:
logging.basicConfig(level=logging.INFO)

In [None]:
perform_assumption_assertion = False
if perform_assumption_assertion:
    for i in tqdm(range(len(queries))):
        for j in range(i + 1, len(queries)):
            first = queries[i]
            second = queries[j]
            if first.dependencies != second.dependencies:
                continue

            shorter = first if len(first.path) < len(second.path) else second
            longer = first if shorter is second else second
            if shorter.path != longer.path[:len(shorter.path)]:
                continue

            assert shorter.constraints == longer.constraints[:len(shorter.constraints)], f"{shorter}, {longer}"
    
    print("Assumption of same prefices generate same constraints holds.")

### Strategies
Solver selection strategy  is the component that determines how solvers are used or generated for queries.

#### Basic
Generates a new solver for each dependency set and doesn't upgrade it. So it is always an empty solver.

##### Rationale
Works as a base case (with the benefit of dependency sets).

#### Negation
Generates a new solver for each branch negation. By negation we mean the negation of the last constraint that symbolic/concolic execution engines check to make a diverging test case.

##### Rationale
The longer queries of the current program execution, as well as those generated by the new diverging one with share a common prefix.

#### Exact Path
Almost for each query, generates a new solver. 

##### Rationale
Only useful when there are lots of exactly repeated queries. Also, can be used to measure the repeated query count and the sanity of concolic executions.


In [None]:
class LengthSolverSelectionStrategy(SolverSelectionStrategy):
    def __init__(self, log_level=logging.DEBUG, use_copy=False, use_exact_for_optimistic=True):
        super().__init__(log_level=log_level)
        self.use_copy = use_copy
        if use_exact_for_optimistic:
            self.exact_strategy = ExactPathSolverSelectionStrategy(
                use_copy, log_level)

    def _create_solver(self, path, constraints, existing_solver: MySolver = None):
        return super()._create_solver(path, constraints, existing_solver, self.use_copy)

    def get_solver(self, found_solver: MySolver | None, query: Query) -> MySolver:
        # if query.path[-1][1] == BranchAction.OPTIMISTIC and hasattr(self, "exact_strategy"):
        #     self.exact_strategy.get_solver(found_solver, query)
        if query.path[-1][1] == BranchAction.OPTIMISTIC:
            if found_solver is None:
                return self.create_empty_solver()
            return found_solver

        if found_solver is None:
            path, constraints = self._get_upgrade_parts(query)
            solver = self._create_solver(path, constraints)
            self._log("No solver available for query with id = %d, created a new one: %s", query.id, solver)
            return solver

        self._log("Found solver for query with id = %d: %s",
                  query.id, found_solver)
        solver = found_solver
        if not self._is_appropriate(found_solver, query):
            self._log("Creating a new solver for a longer path")
            path, constraints = self._get_upgrade_parts(query)
            solver = self._create_solver(path, constraints, found_solver)
        else:
            self._log("Solver is appropriate")

        return solver

    @abstractmethod
    def _is_appropriate(self, found_solver: MySolver, query: Query) -> bool:
        pass

    @abstractmethod
    def _get_upgrade_parts(self, query):
        pass

    def __setattr__(self, __name: str, __value):
        if __name == "create_empty_solver" and hasattr(self, "exact_strategy"):
            self.exact_strategy.__setattr__(__name, __value)
        return super().__setattr__(__name, __value)


class NegationSolverSelectionStrategy(LengthSolverSelectionStrategy):
    def _is_appropriate(self, found_solver: MySolver, query: Query) -> bool:
        return found_solver.stack_path_len >= len(query.path) - 1

    def _get_upgrade_parts(self, query):
        return query.path[:-1], query.constraints[:-1]


class ExactPathSolverSelectionStrategy(LengthSolverSelectionStrategy):
    def __init__(self, log_level=logging.DEBUG, use_copy=False, use_exact_for_optimistic=False):
        super().__init__(log_level, use_copy, use_exact_for_optimistic)

    def _is_appropriate(self, found_solver: MySolver, query: Query) -> bool:
        return found_solver.stack_path_len == len(query.path)

    def _get_upgrade_parts(self, query):
        return query.path, query.constraints


class LengthRatioSolverSelectionStrategy(ExactPathSolverSelectionStrategy):
    def __init__(self, ratio = 3/4, log_level=logging.DEBUG, use_copy=False, use_exact_for_optimistic=True) -> None:
        super().__init__(log_level=log_level, use_copy=use_copy,
                         use_exact_for_optimistic=use_exact_for_optimistic)
        self.ratio = ratio

    def _is_appropriate(self, found_solver: MySolver, query: Query) -> bool:
        result = found_solver.stack_path_len >= len(query.path) * self.ratio
        if not result:
            self._log("%d was not enough for %d", found_solver.stack_path_len, len(query.path))
        return result


In [None]:
# Strategies correctness
perform_strategy_assertions = False

logging.getLogger().setLevel(level=logging.INFO)

if perform_strategy_assertions:
    target_queries = queries[:300]
    pool = SolverPool()
    pool.selection_strategy = BasicSolverSelectionStrategy()
    base_results = tuple(pool.solve(q) for q in tqdm(target_queries))

    strategies = [
        NegationSolverSelectionStrategy(use_copy=False),
        NegationSolverSelectionStrategy(use_copy=True),
        ExactPathSolverSelectionStrategy(),
        LengthRatioSolverSelectionStrategy()
    ]
    for strategy in strategies:
        name = type(strategy).__name__
        logging.info("Asserting %s", name)
        pool = SolverPool()
        pool.selection_strategy = strategy
        pool.enable_solve_prefix_assertions()
        for i, q in enumerate(tqdm(target_queries)):
            result = pool.solve(q)
            assert result == base_results[i], f"Different result found for the {i}th query, in {name} strategy.\nDetails:\nQuery: {q.to_json_serializable()},\nStrategy Result: {result}, Base Result: {base_results[i]}"


In [None]:
def expand_times_column(df: pd.DataFrame) -> pd.DataFrame:
    return df.drop(columns=["times"], inplace=False).join(
        df.apply(lambda x: pd.Series(x["times"].values(), index=x["times"].keys()), axis=1))

def describe(df: pd.DataFrame) -> pd.DataFrame:
    return pd.concat([df.describe(), pd.Series(df.sum()).to_frame("sum").transpose()])

In [None]:
def evaluate(queries, strategy, max_solvers=200, repeat_count=5):
    with tqdm(total=repeat_count * len(queries)) as pb:
        datas = list()
        for _ in range(repeat_count):
            pool = SolverPool(max_solvers=max_solvers)
            pool.selection_strategy = strategy
            for q in queries:
                pool.solve(q)
                pb.update()

            solvers_counts = pd.DataFrame(
                [stats.times for stats in pool.statistics.solvers.values()])

            trees_stats = pd.DataFrame(
                [stats.times for stats in pool.statistics.trees.values()])

            datas.append((describe(solvers_counts), describe(trees_stats)))

    def average(datas):
        return pd.concat(datas).groupby(level=0).mean().transpose().drop(columns=["count"], inplace=False)

    solvers_times = average([data[0] for data in datas])
    trees_times = average([data[1] for data in datas])

    # Counting statistics is same for each run.
    cache_stats = pool.statistics.cache
    solvers_counts = pd.DataFrame([asdict(stats) for stats in pool.statistics.solvers.values()])\
        .drop(columns=["times"], inplace=False)
    solvers_counts = describe(solvers_counts).transpose()

    del pool
    return {
        "cache": cache_stats,
        "solvers_counts": solvers_counts,
        "solvers_times": solvers_times,
        "trees_times": trees_times,
    }


def evaluate_and_save(name, queries, strategy, max_solvers=200, repeat_count=5):
    result = evaluate(queries, strategy, max_solvers, repeat_count)
    to_write = {
        "cache": asdict(result["cache"]),
        "solvers_counts": result["solvers_counts"].to_dict(),
        "solvers_times": result["solvers_times"].to_dict(),
        "trees_times": result["trees_times"].to_dict(),
    }

    from pathlib import Path
    Path(f"./evaluations").mkdir(exist_ok=True)
    with open(f"./evaluations/{name}.json", "w") as f:
        import json
        json.dump(to_write, f)

    logging.info("Wrote evaluation result to %s.json", name)


In [None]:
def remove_repeated_queries(queries):
    counter = 0
    with tqdm(total=(len(queries) + 1)* len(queries) // 2) as pb:
        for i in tqdm(range(len(queries))):
            for j in range(i + 1, len(queries)):
                pb.update()
                
                first = queries[i]
                second = queries[j]
                if first is None or second is None:
                    continue
                if (first.dependencies == second.dependencies
                    and first.path == second.path
                        and first.constraints == second.constraints):
                    counter += 1
                    queries[j] = None
    logging.info("Removed %d repeated queries from %d", counter, len(queries))

In [None]:
log_files = [
    # "quicksort_medium_export.log",
    # "The_longest_Road.log",
    "pattern_finder.log",
]
strategies = {
    "basic": BasicSolverSelectionStrategy(),
    "negnocopy": NegationSolverSelectionStrategy(use_copy=False),
    "negcopy": NegationSolverSelectionStrategy(use_copy=True),
    "exactnocopy": ExactPathSolverSelectionStrategy(use_copy=False),
    "ratio3_4nocopy": LengthRatioSolverSelectionStrategy(3/4, use_copy=False),
    "ratio1_2nocopy": LengthRatioSolverSelectionStrategy(1/2, use_copy=False),
}

def evaluate_for_strategy_max_solvers(strategy_name, max_solvers):
    global eval_queries
    name = f"{strategy_name}__{log_file}_{len(eval_queries)}__{max_solvers}"
    logging.info("Evaluating: %s", name)
    evaluate_and_save(
        name,
        eval_queries,
        strategies[strategy_name],
        max_solvers=max_solvers,
        repeat_count=2)
        

max_solvers_sizes = [200, 400, 1000]
for log_file in log_files:
    global eval_queries
    logging.info("Reading the log file: %s", log_file)
    eval_queries = read_qsym_log(log_file)[:5000]
    logging.info("Removing repeated queries")
    remove_repeated_queries(eval_queries)

    from multiprocessing import Pool
    with Pool() as p:
        p.starmap(evaluate_for_strategy_max_solvers, [(strategy, max_solvers) for strategy in strategies.keys() for max_solvers in max_solvers_sizes])