### Goal
Run flows using sdk and allowing for external configuration of RAG parameters. 

### Check Connections

In [1]:
import promptflow

pf = promptflow.PFClient()

# List all the available connections
for c in pf.connections.list():
    print(c.name + " (" + c.type + ")")

azureopenai_local (AzureOpenAI)
aoai_connection (AzureOpenAI)
default_connection (CognitiveSearch)
cosmodb_connection (Custom)
acs_connection (CognitiveSearch)
azure_form_recognizer_connection (Custom)
acs_connection_test (CognitiveSearch)
aoai_connection_test (AzureOpenAI)
postgres_connection (Custom)


### Test the flow

In [4]:
output = pf.flows.test(
    "../../rag-azure-search/flow.dag.yaml",
    inputs={
        "chat_history": [],
        "query": "What is the growth rate of Azure ML revenue in FY23Q1?",
    },
)

print(output)

2024-03-13 21:46:01 -0700   37812 execution.flow     INFO     Start executing nodes in thread pool mode.
2024-03-13 21:46:01 -0700   37812 execution.flow     INFO     Start to run 13 nodes with concurrency level 16.
2024-03-13 21:46:01 -0700   37812 execution.flow     INFO     Executing node ConfigLoader. node run id: 912e8b7d-89eb-4cee-9ad4-2aff174f6c8d_ConfigLoader_0
2024-03-13 21:46:01 -0700   37812 execution.flow     INFO     Executing node CheckExistingContext. node run id: 912e8b7d-89eb-4cee-9ad4-2aff174f6c8d_CheckExistingContext_0
2024-03-13 21:46:01 -0700   37812 execution.flow     INFO     Executing node QueryParser. node run id: 912e8b7d-89eb-4cee-9ad4-2aff174f6c8d_QueryParser_0
2024-03-13 21:46:01 -0700   37812 execution.flow     INFO     Executing node FormatConversation. node run id: 912e8b7d-89eb-4cee-9ad4-2aff174f6c8d_FormatConversation_0
2024-03-13 21:46:01 -0700   37812 execution.flow     INFO     Node CheckExistingContext completes.
2024-03-13 21:46:01 -0700   37812 e

### Run the flow with the benchmark data file

In [5]:
flow_path = "../../rag-azure-search/flow.dag.yaml"
data_path = "../../datasets/evalset.csv"

column_mapping = {
    "chat_history": "${data.chat_history}",
    "query": "${data.question}",
}

run_topk3 = pf.run(flow=flow_path, data=data_path, column_mapping=column_mapping)
pf.stream(run_topk3)
print(run_topk3)

2024-03-13 21:47:18 -0700   27608 execution.bulk     INFO     Process 27608 started.
2024-03-13 21:47:19 -0700   27912 execution.bulk     INFO     Process 27912 started.
2024-03-13 21:47:19 -0700   11604 execution.bulk     INFO     Process 11604 started.
2024-03-13 21:47:19 -0700   35524 execution.bulk     INFO     Process 35524 started.
2024-03-13 21:47:19 -0700   12872 execution.bulk     INFO     Process 12872 started.
2024-03-13 21:47:19 -0700   10716 execution.bulk     INFO     Process 10716 started.
2024-03-13 21:47:19 -0700   38824 execution.bulk     INFO     Process 38824 started.
2024-03-13 21:47:19 -0700   35348 execution.bulk     INFO     Process 35348 started.
2024-03-13 21:47:19 -0700   39144 execution.bulk     INFO     Process 39144 started.
2024-03-13 21:47:20 -0700    4568 execution.bulk     INFO     Process 4568 started.
2024-03-13 21:47:25 -0700   24564 execution.bulk     INFO     Process 24564 started.

Run name: "rag_azure_search_variant_0_20240313_214628_356508"
Run

In [6]:
pf.get_details(run_topk3)

Unnamed: 0,inputs.chat_history,inputs.query,inputs.line_number,outputs.fetched_docs,outputs.log_params,outputs.reply
0,[],What is the quarterly revenue of Microsoft Clo...,0,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to the retrieved documents, the Micr..."
1,[],How many Azure Arc customers does Microsoft cu...,1,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to the retrieved documents, Microsof..."
2,[],Which companies run SAP on Azure in FY23Q1?,2,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to the retrieved documents, companie..."
3,[],What is the annual recurring revenue of GitHub...,3,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to the retrieved documents, GitHub h..."
4,[],How many monthly active users does Power BI ha...,4,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to the retrieved documents, Power BI..."
5,[],What is the quarterly revenue of Microsoft Clo...,5,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved documents, the Micr..."
6,[],How many Azure Arc customers does Microsoft cu...,6,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved document, Microsoft..."
7,[],What are some examples of companies using Azur...,7,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved documents, over 200..."
8,[],How many developers are using GitHub Copilot a...,8,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved documents, GitHub i..."
9,[],What is the growth rate of Azure Machine Learn...,9,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved documents do not provide any inf...


In [8]:
ddf = pf.get_details(run_topk3)
#ddf.to_csv("checkoutput.csv")

## Evaluate 

In [9]:
eval_flow_path = "../../evaluator/eval_aistudio_score/"
data_path = "../../datasets/evalset.csv"

eval_run_topk3 = pf.run(
    flow=eval_flow_path,
    run=run_topk3,
    data=data_path,
    column_mapping={
        "question": "${data.question}",
        "answer": "${data.answer}",
    },
    display_name="eval_multiple_score_topk3",
)
pf.stream(eval_run_topk3)
print(eval_run_topk3)

2024-03-13 21:48:44 -0700   42384 execution.bulk     INFO     Process 42384 started.
2024-03-13 21:48:45 -0700   36144 execution.bulk     INFO     Process 36144 started.
2024-03-13 21:48:45 -0700   37608 execution.bulk     INFO     Process 37608 started.
2024-03-13 21:48:45 -0700   43936 execution.bulk     INFO     Process 43936 started.
2024-03-13 21:48:45 -0700   43456 execution.bulk     INFO     Process 43456 started.
2024-03-13 21:48:45 -0700   43688 execution.bulk     INFO     Process 43688 started.
2024-03-13 21:48:45 -0700   19832 execution.bulk     INFO     Process 19832 started.
2024-03-13 21:48:45 -0700   42972 execution.bulk     INFO     Process 42972 started.
2024-03-13 21:48:45 -0700   38556 execution.bulk     INFO     Process 38556 started.
2024-03-13 21:48:45 -0700   30240 execution.bulk     INFO     Process 30240 started.

Run name: "eval_aistudio_score_variant_0_20240313_214834_346155"
Run status: "Completed"
Start time: "2024-03-13 21:48:34.346155"
Duration: "0:01:42.

In [10]:
pf.get_details(eval_run_topk3)

Unnamed: 0,inputs.question,inputs.answer,inputs.line_number,inputs.context,inputs.log_params,inputs.reply,outputs.coherence,outputs.fluency,outputs.gpt_similarity,outputs.groundedness,outputs.relevance
0,What is the quarterly revenue of Microsoft Clo...,"$25 billion in quarterly revenue, up 24 percen...",0,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,4,5,1,1
1,How many Azure Arc customers does Microsoft cu...,"More than 8,500 Arc customers, more than doubl...",1,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,4,5,5,5
2,Which companies run SAP on Azure in FY23Q1?,"Companies like Chobani, Munich RE, Sodexo, Vol...",2,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,5,5,3,1
3,What is the annual recurring revenue of GitHub...,GitHub is now at $1 billion in annual recurrin...,3,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,4,4,5,1,1
4,How many monthly active users does Power BI ha...,Power BI is the market leader in business inte...,4,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,5,5,1,1
5,What is the quarterly revenue of Microsoft Clo...,"$27 billion in quarterly revenue, up 22 percen...",5,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,4,5,1,1
6,How many Azure Arc customers does Microsoft cu...,"More than 12,000 Arc customers, double the num...",6,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,4,5,1,1
7,What are some examples of companies using Azur...,"Over 200 customers, including KPMG and Al Jaze...",7,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,5,5,3,1
8,How many developers are using GitHub Copilot a...,More than one million people have used GitHub ...,8,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,4,5,3,1
9,What is the growth rate of Azure Machine Learn...,Azure ML revenue alone has increased more than...,9,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,4,4,5,3,1


In [11]:
pf.get_metrics(eval_run_topk3)

{'relevance': 1.4,
 'coherence': 4.8,
 'fluency': 4.3,
 'groundedness': 2.2,
 'gpt_similarity': 5.0}

In [12]:
pf.visualize(eval_run_topk3)

The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-ekop591c.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.


### Multiple runs 
Using config.yaml file

In [13]:
import yaml
import promptflow

pf = promptflow.PFClient()

with open("./runs_config.yaml", "r") as file:
    runs_config = yaml.safe_load(file)

# Access the contents of the config.yaml file
print(runs_config)

{'Run1': {'maxTokens': 3500, 'topK': 5, 'searchType': 'vector', 'indexName': 'msft-transcripts-1'}, 'Run2': {'maxTokens': 3500, 'topK': 5, 'searchType': 'filter_vector', 'indexName': 'msft-transcripts-1'}, 'Run3': {'maxTokens': 3500, 'topK': 5, 'searchType': 'filter_hybrid', 'indexName': 'msft-transcripts-1'}}


In [14]:
# function to update param_config systematically 
import os 
def update_param_config(flow_path, config):
    base_dir = os.path.dirname(flow_path)
    param_config_path = os.path.join(base_dir, 'param_config.yaml')

    with open(param_config_path, 'r') as file:
        param_dict = yaml.safe_load(file)

    param_dict.update(config)

    with open(param_config_path, 'w') as file:
        yaml.dump(param_dict, file)


In [17]:
from datetime import datetime

run_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")


def run_dags(
    flow_path="../../rag-azure-search/flow.dag.yaml",
    data_path="../../datasets/evalset.csv",
    eval_flow_path="../../evaluator/eval_aistudio_score/",
    config=None,
):
    column_mapping = {
        "chat_history": "${data.chat_history}",
        "query": "${data.question}",
    }
    update_param_config(flow_path, config)

    copilot_run_from_config = pf.run(
        flow=flow_path,
        data=data_path,
        column_mapping=column_mapping,
        display_name="rag_azuresearch_run_from_config_" + run_time,
    )


    # pf.stream(current_run)
    # print(run_topk3)

    eval_run_from_config = pf.run(
        flow=eval_flow_path,
        run=copilot_run_from_config,
        data=data_path,
        column_mapping={
            "question": "${data.question}",
            "answer": "${data.answer}",
            "reply": "${run.outputs.reply}",
            "context": "${run.outputs.fetched_docs}",
            "log_params": "${run.outputs.log_params}",
        },
        display_name="eval_run_azuresearch_from_config_" + run_time,
    )
    return copilot_run_from_config, eval_run_from_config

In [18]:
i = 0
run_logs = {}
for run in runs_config:
    i += 1
    print(runs_config[run])
    copilot_run_from_config, eval_run_from_config = run_dags(config=runs_config[run])
    run_logs[i] = eval_run_from_config

{'maxTokens': 3500, 'topK': 5, 'searchType': 'vector', 'indexName': 'msft-transcripts-1'}
{'maxTokens': 3500, 'topK': 5, 'searchType': 'filter_vector', 'indexName': 'msft-transcripts-1'}
{'maxTokens': 3500, 'topK': 5, 'searchType': 'filter_hybrid', 'indexName': 'msft-transcripts-1'}


In [19]:
for run_id in run_logs:
    print(run_id)
    pf.visualize(run_logs[run_id])

1
The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-mq5bku6g.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.
2
The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-4bkhmyoe.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.
3
The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-kuy8d9a2.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.
