# Evaluate localization strategies

This notebook does a comparative evaluation of different localization strategies.
- Defines a base interface for localization
- Implements a few localization strategies
- Defines an evaluator the runs a test suite on those localization strategies
- Evaluator dumps the results in a pandas dataframe
- Uses Milvus as the vector database
- Uses OpenAI's embeddings model
- Uses langchain's abstractions for processing

In [1]:
import os
import tempfile
import pandas as pd
from typing import Dict, List, Tuple
from abc import ABC, abstractmethod
from langchain_core.documents import Document
from langchain_milvus import Milvus
from langchain_openai import OpenAIEmbeddings

## Base interface for localization strategies

In [2]:
class Strategy(ABC):
    @abstractmethod
    def localize(self, issue: Dict[str, str], top_n: int) -> List[Tuple[str, str]]:
        """
        Localizes the issue to a set of relevant packages and files.

        Args:
            issue (Dict[str, str]): A dictionary containing issue details with at least:
                - `title` (str): The title of the issue.
                - `description` (str): The detailed description of the issue.
            top_n (int): The maximum number of localization results to return.

        Returns:
            List[Tuple[str, str]]: A list of tuples representing relevant localization results,
                each containing `package` (str) and `file` (str).
        """
        pass

## Semantic vector search strategy

This implements a simple semantic vector search strategy. It uses Milvus as the vector database and OpenAI's embeddings model. Implementation may be used as-is for multiple strategies by feeding in different types of sources. E.g.,
- **Code file embeddings**: Providing a `source_dir` pointing to code files will directly embed code
- **Code semantics embeddings**: Providing a `source_dir` pointing to semantic descriptions of code files will embed code semantics

In [None]:
class SemanticVectorSearchStrategy(Strategy):
    def __init__(self, source_dir: str, root_package_name: str, embeddings, strategy_name: str):
        self.strategy_name = strategy_name
        self.vector_store = self.create_vector_store(source_dir, root_package_name, embeddings)

    def create_vector_store(self, folder_path: str, root_package_name: str, embeddings) -> Milvus:
        """Creates a Milvus vector store from the files in the specified folder."""
        documents = self.create_documents(folder_path, root_package_name)
        with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp_file:
            uri = tmp_file.name
        return Milvus.from_documents(
            documents,
            embeddings,
            collection_name=root_package_name,
            connection_args={"uri": uri},
        )
    
    def create_documents(self, folder_path: str, root_package_name: str) -> List[Document]:
        """Create a list of Document instances from the files in the specified folder."""
        documents = []
        for root, _, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                with open(file_path, "r") as f:
                    page_content = f.read()
                if not page_content.strip():
                    continue
                filename = file.split('.')[0]
                relative_path = os.path.relpath(root, folder_path)
                package = (f"{root_package_name}/{relative_path.replace(os.sep, '/')}"
                           if relative_path != "." else root_package_name)
                document = Document(
                    page_content=page_content,
                    metadata={"file": filename, "package": package}
                )
                documents.append(document)
        return documents

    def localize(self, issue: Dict[str, str], top_n: int) -> List[Tuple[str, str]]:
        query_string = f"{issue['title']}: {issue['description']}"
        results = self.vector_store.similarity_search(query_string, k=top_n)
        return [(res.metadata["package"], res.metadata["file"]) for res in results]

## Evaluator

In [4]:
class LocalizationEvaluator:
    def __init__(self, issues_dir: str, strategies_to_evaluate: List[Strategy]):
        self.issues_dir = issues_dir
        self.strategies = strategies_to_evaluate

    def evaluate(self) -> pd.DataFrame:
        """Evaluates each strategy on all test issues and returns a DataFrame with results."""
        df = pd.DataFrame(columns=["Test case"] + [f"Results ({strategy.strategy_name})" for strategy in self.strategies])

        # Iterate over files in the issues directory
        for root, _, files in os.walk(self.issues_dir):
            for file in files:
                file_path = os.path.join(root, file)
                test_case_name = os.path.splitext(file)[0].replace('_', ' ').replace('-', ' ')
                with open(file_path, 'r') as f:
                    issue_content = f.read().strip()
                issue = {"title": test_case_name, "description": issue_content}

                # Collect localization results for each strategy
                results = {}
                for strategy in self.strategies:
                    localization_results = strategy.localize(issue, top_n=5)
                    results[f"Results ({strategy.strategy_name})"] = localization_results

                # Append the data to the DataFrame
                row_data = {"Test case": test_case_name, **results}
                df = pd.concat([df, pd.DataFrame([row_data])], ignore_index=True)
        return df

**Test setup**

In [5]:
projects_store = "/Users/pdhoolia/se-agent-projects"
repo_full_name = "conversational-ai/se-agent"
src_dir = "se_agent"

code_dir = os.path.join(projects_store, repo_full_name, "repo", src_dir)
code_semantics_dir = os.path.join(projects_store, repo_full_name, "metadata", "package_details")
test_issues_folder = "test/issues"

**Embeddings**

In [6]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

**Strategies**

In [7]:
code_file_embeddings = SemanticVectorSearchStrategy(code_dir, src_dir, embeddings, strategy_name="Code File Embeddings")
code_semantics_embeddings = SemanticVectorSearchStrategy(code_semantics_dir, src_dir, embeddings, strategy_name="Code Semantics Embeddings")

strategies_to_evaluate = [code_file_embeddings, code_semantics_embeddings]

**Evaluate**

In [8]:
evaluator = LocalizationEvaluator(
    issues_dir=test_issues_folder,
    strategies_to_evaluate=strategies_to_evaluate
)

evaluation_results = evaluator.evaluate()

**Display results**

In [30]:
# Create a copy of the DataFrame for display purposes
display_df = evaluation_results.copy()

# Transform the 'Results' columns to display only the file name
display_df['Results (Code File Embeddings)'] = display_df['Results (Code File Embeddings)'].apply(lambda lst: [item[1] for item in lst])
display_df['Results (Code Semantics Embeddings)'] = display_df['Results (Code Semantics Embeddings)'].apply(lambda lst: [item[1] for item in lst])

# Set the index to start from 1
display_df.index = display_df.index + 1

# Apply left alignment to all columns, including headers
df_style = display_df.style \
    .set_table_attributes("style='width:100%'") \
    .set_properties(**{'text-align': 'left'}) \
    .set_table_styles([{
        'selector': 'th',
        'props': [('text-align', 'left')]
    }])

df_style

Unnamed: 0,Test case,Results (Code File Embeddings),Results (Code Semantics Embeddings)
2,avoid user annoyance,"['project', 'issue_analyzer', 'change_suggester', 'localizer', 'onboard_agent']","['issue_analyzer', 'github_listener', 'onboard_agent', 'change_suggester', 'localizer']"
3,structured output from semantic summar genaration task,"['package_summary', 'change_suggester', 'localizer', 'file_analyzer', 'project']","['package_summary', 'change_suggester', 'localizer', 'file_analyzer', '__init__']"
4,retry llm call on ratelimiterror,"['api', 'retry_with_backoff', 'localizer', 'change_suggester', 'project']","['retry_with_backoff', 'api', 'localizer', 'package_summary', 'change_suggester']"
5,rag for localization,"['localizer', 'change_suggester', 'project', 'api', 'issue_analyzer']","['localizer', 'change_suggester', 'api', 'issue_analyzer', 'package_summary']"
6,tool based code structure name gen,"['package_summary', 'change_suggester', 'file_analyzer', 'localizer', 'project']","['package_summary', 'change_suggester', '__init__', 'localizer', '__init__']"
