In [3]:
import weave
import asyncio
from docstringify.ingest import load_documents_from_folder # (Assumed helper)
from docstringify.loader import load_embeddings
from docstringify.retrievers import get_semantic_retriever, get_hybrid_retriever
from docstringify.main import generate_docstring_pipeline

# 1. Define an Evaluation Class (The Model)
class DocstringModel(weave.Model):
    retrieval_strategy: str
    _embeddings: any = None
    _docs: any = None

    def __init__(self, strategy, documents):
        super().__init__(retrieval_strategy=strategy)
        self._embeddings = load_embeddings()
        self._docs = documents

    @weave.op()
    def predict(self, target_code: str):
        # Select Retriever based on strategy
        if self.retrieval_strategy == "semantic":
            retriever = get_semantic_retriever(self._docs, self._embeddings)
        elif self.retrieval_strategy == "hybrid":
            retriever = get_hybrid_retriever(self._docs, self._embeddings)
        
        # Retrieve
        context = retriever.invoke(target_code)
        
        # Generate
        result = generate_docstring_pipeline(target_code, context)
        return result

# 2. Define a Scoring Function
@weave.op()
def check_docstring_quality(target_code: str, output: str):
    """
    A simple LLM-based evaluator to check if the docstring is valid.
    """
    # In a real scenario, use an LLM here to grade the output
    has_args = "Args:" in output
    has_returns = "Returns:" in output
    has_summary = len(output.split('\n')) > 2
    
    return {
        "has_google_style": has_args and has_returns,
        "is_detailed": has_summary,
        "score": (int(has_args) + int(has_returns) + int(has_summary)) / 3
    }

# 3. Run Evaluation
def run_eval():
    weave.init("docstringify-eval")
    
    # Load some sample code as your "Dataset"
    docs = load_documents_from_folder("./src") 
    examples = [
        {"target_code": "def calculate_mean(data): return sum(data)/len(data)"},
        {"target_code": "def connect_db(uri): ..."}
    ]
    
    # Compare two strategies
    semantic_model = DocstringModel(strategy="semantic", documents=docs)
    hybrid_model = DocstringModel(strategy="hybrid", documents=docs)
    
    evaluation = weave.Evaluation(
        dataset=examples,
        scorers=[check_docstring_quality]
    )
    
    print("Evaluating Semantic Search...")
    evaluation.evaluate(semantic_model)
    
    print("Evaluating Hybrid Search...")
    evaluation.evaluate(hybrid_model)

if __name__ == "__main__":
    run_eval()

ModuleNotFoundError: No module named 'docstringify.ingest'