In [2]:
azure_ai_project = { 
    "subscription_id": "2e0ec2a0-1280-41cb-a8b4-a48dcc8b213c", 
    "resource_group_name": "rg-nickwardai", 
    "project_name": "nickward-westus" 
}

In [5]:
import os

AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT")
AZURE_OPENAI_VERSION = os.getenv("AZURE_OPENAI_VERSION")

env_var = {
    "gpt4o": {
        "endpoint": AZURE_OPENAI_ENDPOINT,
        "key": AZURE_OPENAI_API_KEY
    }
}

In [6]:
from promptflow.core import AzureOpenAIModelConfiguration
configuration = AzureOpenAIModelConfiguration(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_API_KEY,
    api_version=AZURE_OPENAI_VERSION,
    azure_deployment=AZURE_OPENAI_DEPLOYMENT,
)


In [7]:
from promptflow.evals.evaluators import ContentSafetyEvaluator, RelevanceEvaluator, CoherenceEvaluator, GroundednessEvaluator, FluencyEvaluator, SimilarityEvaluator
content_safety_evaluator = ContentSafetyEvaluator(project_scope=azure_ai_project)
relevance_evaluator = RelevanceEvaluator(model_config=configuration)
coherence_evaluator = CoherenceEvaluator(model_config=configuration)
groundedness_evaluator = GroundednessEvaluator(model_config=configuration)
fluency_evaluator = FluencyEvaluator(model_config=configuration)
similarity_evaluator = SimilarityEvaluator(model_config=configuration)

In [None]:
from app_target import ModelEndpoints
import pathlib
import random
from promptflow.evals.evaluate import evaluate
models = ["gpt4o"]
path = str(pathlib.Path(pathlib.Path.cwd())) + "/data.jsonl"
for model in models:
    randomNum = random.randint(1111, 9999)
    results = evaluate(
        azure_ai_project=azure_ai_project, 
        evaluation_name="Eval-Run-"+str(randomNum)+"-"+model.title(), 
        data=path, 
        target=ModelEndpoints(env_var, model), 
        evaluators={ 
            "content_safety": content_safety_evaluator, 
            "coherence": coherence_evaluator, 
            "relevance": relevance_evaluator,
            "groundedness": groundedness_evaluator,
            "fluency": fluency_evaluator,
            "similarity": similarity_evaluator,
        }, 
        evaluator_config={ 
            "content_safety": { 
                "question": "${data.question}", 
                "answer": "${target.answer}"  
            }, 
            "coherence": { 
                "answer": "${target.answer}", 
                "question": "${data.question}"  
            }, 
            "relevance": { 
                "answer": "${target.answer}", 
                "context": "${data.context}", 
                "question": "${data.question}"  
            }, 
            "groundedness": { 
                "answer": "${target.answer}", 
                "context": "${data.context}", 
                "question": "${data.question}"  
            }, 
            "fluency": { 
                "answer": "${target.answer}", 
                "context": "${data.context}", 
                "question": "${data.question}"  
            }, 
            "similarity": { 
                "answer": "${target.answer}", 
                "context": "${data.context}", 
                "question": "${data.question}"  
            } 
        } 
    )