# Helper Functions

In [1]:
import os
import json
import re
import pandas as pd
from dotenv import load_dotenv
from genai import Credentials, Client
from genai.text.generation import TextGenerationParameters
from genai.text.tokenization import (
    TextTokenizationParameters,
    TextTokenizationReturnOptions,
    TextTokenizationCreateResults,
)
from genai.credentials import Credentials
import sys
sys.path.append('../../app')
from utils import eval_using_model, generate_text_using_OpenAI, generate_prompt, generate_text
from langchain.evaluation import (
    Criteria,
    load_evaluator,
    EvaluatorType
)
from langchain_community.chat_models import ChatOpenAI

In [2]:
def get_response(model_id, api_key, openai_key, file, instruction, functions, classes, documentation, imports, other, functions_code, functions_doc, classes_code, classes_doc):

    
    DATASET_PATH = "../../data/raw/chunked_data.json"

    with open(DATASET_PATH, "r", encoding="utf-8") as f:
        data = json.load(f)

    code = data[file]["code_chunks"]

    actual_doc = data[file]["markdown"]

    functions_text = code["functions"]
    classes_text = code["classes"]
    documentation_text = code["documentation"]
    imports_text = code["imports"]
    other_text = code["other"]
    functions_code_text = code["functions_code"]
    functions_doc_text = code["functions_docstrings"]
    classes_code_text = code["classes_code"]
    classes_doc_text = code["classes_docstrings"]


    prompt = generate_prompt(
        instruction=instruction,
        functions=functions,
        functions_text=functions_text,
        classes=classes,
        classes_text=classes_text,
        documentation=documentation,
        documentation_text=documentation_text,
        imports=imports,
        imports_text=imports_text,
        other=other,
        other_text=other_text,
        functions_code=functions_code,
        functions_code_text=functions_code_text,
        functions_doc=functions_doc,
        functions_doc_text=functions_doc_text,
        classes_code=classes_code,
        classes_code_text=classes_code_text,
        classes_doc=classes_doc,
        classes_doc_text=classes_doc_text,
    )

    if model_id == "OpenAI/gpt3.5":
        result = generate_text_using_OpenAI(prompt, openai_key)

    else:
        result = generate_text(model_id, prompt, decoding_method="sample", max_new_tokens=1024, temperature=0.7, top_k=50, top_p=0.50, genai_key=api_key)
    
    return prompt, result, actual_doc

In [3]:
def extract_scores(gpt_score):
    pattern = r'(\w+):\s(\d+)'
    matches = re.findall(pattern, gpt_score)

    evaluation_scores = {match[0]: int(match[1]) for match in matches}

    gpt_accuracy_score = evaluation_scores['Accuracy']
    gpt_relevance_score = evaluation_scores['Relevance']
    gpt_clarity_score = evaluation_scores['Clarity']
    gpt_completeness_score = evaluation_scores['Completeness']
    gpt_readability_score = evaluation_scores['Readability']
    
    return gpt_accuracy_score, gpt_relevance_score, gpt_clarity_score, gpt_completeness_score, gpt_readability_score

In [4]:
def langchain_scores(generated_patch, prompt, actual_doc):
    llm = ChatOpenAI(model="gpt-4", temperature=0)
    evaluator = load_evaluator("criteria", llm=llm, criteria="helpfulness")
    eval_result = evaluator.evaluate_strings(prediction=generated_patch, input=prompt)
    print(eval_result)
    langchain_helpfulness = eval_result['score']
    
    evaluator = load_evaluator("labeled_criteria", llm=llm, criteria="correctness")
    eval_result = evaluator.evaluate_strings(prediction=generated_patch, input=prompt, reference=actual_doc)
    print(eval_result)
    langchain_correctness = eval_result['score']

    custom_criteria = {
    "logical": "Is the output logical and complete? Does it capture all required fields"
                    }
    eval_chain = load_evaluator(
    EvaluatorType.CRITERIA,
    criteria=custom_criteria,
    llm=llm
    )
    eval_result = eval_chain.evaluate_strings(prediction=generated_patch, input=prompt)
    print(eval_result)
    langchain_logical = eval_result['score']
    
    return langchain_helpfulness, langchain_correctness, langchain_logical

In [5]:
def append_row_to_dataframe(df, prompt, generated_patch, gpt_accuracy_score, gpt_relevance_score, gpt_clarity_score, gpt_completeness_score, gpt_readability_score, langchain_helpfulness, langchain_correctness, langchain_logical):

    new_row = {
        'prompt': prompt,
        'response': generated_patch,
        'gpt_accuracy_score': gpt_accuracy_score,
        'gpt_relevance_score': gpt_relevance_score,
        'gpt_clarity_score' : gpt_clarity_score,
        'gpt_completeness_score' : gpt_completeness_score,
        'gpt_readability_score' : gpt_readability_score,
        'langchain_helpfulness' : langchain_helpfulness,
        'langchain_correctness' : langchain_correctness,
        'langchain_logical' : langchain_logical
    }

    df = df.append(new_row, ignore_index=True)

    return df