### Goal
Run flows using sdk and allowing for external configuration of RAG parameters. 

### Check Connections

In [1]:
import promptflow

pf = promptflow.PFClient()

# List all the available connections
for c in pf.connections.list():
    print(c.name + " (" + c.type + ")")

azureopenai_local (AzureOpenAI)
aoai_connection (AzureOpenAI)
default_connection (CognitiveSearch)
cosmodb_connection (Custom)
acs_connection (CognitiveSearch)
azure_form_recognizer_connection (Custom)
acs_connection_test (CognitiveSearch)
aoai_connection_test (AzureOpenAI)
postgres_connection (Custom)


### Test the flow

In [2]:
output = pf.flows.test(
    "rag-copilot/flow.dag.yaml",
    inputs={
        "chat_history": [],
        "query": "What is the growth rate of Azure ML revenue in FY23Q1?",
        "topK": 5,
        "searchType": "filter_vector",
        "index_name": "msft-transcripts-1"
    },
)

print(output)



2024-03-11 17:46:59 -0700   45772 execution.flow     INFO     Start executing nodes in thread pool mode.
2024-03-11 17:46:59 -0700   45772 execution.flow     INFO     Start to run 12 nodes with concurrency level 16.
2024-03-11 17:46:59 -0700   45772 execution.flow     INFO     Executing node CheckExistingContext. node run id: c30f2360-7691-4e77-9b09-2d6b9e4f33e7_CheckExistingContext_0
2024-03-11 17:46:59 -0700   45772 execution.flow     INFO     Executing node QueryParser. node run id: c30f2360-7691-4e77-9b09-2d6b9e4f33e7_QueryParser_0
2024-03-11 17:46:59 -0700   45772 execution.flow     INFO     Executing node FormatConversation. node run id: c30f2360-7691-4e77-9b09-2d6b9e4f33e7_FormatConversation_0
2024-03-11 17:46:59 -0700   45772 execution.flow     INFO     Node CheckExistingContext completes.
2024-03-11 17:46:59 -0700   45772 execution.flow     INFO     Node QueryParser completes.
2024-03-11 17:46:59 -0700   45772 execution.flow     INFO     Node FormatConversation completes.
2024

### Run the flow with the benchmark data file

In [3]:
flow_path = "rag-copilot/flow.dag.yaml"
data_path = "./datasets/evalset.csv"

column_mapping = {
    "chat_history": "${data.chat_history}",
    "query": "${data.question}",
    "topK": 3,
    "searchType": "filter_vector",
    "index_name": "msft-transcripts-1"
}

run_topk3 = pf.run(flow=flow_path, data=data_path, column_mapping=column_mapping)
pf.stream(run_topk3)
print(run_topk3)

2024-03-11 17:47:14 -0700   22480 execution.bulk     INFO     Process 22480 started.
2024-03-11 17:47:14 -0700   42884 execution.bulk     INFO     Process 42884 started.
2024-03-11 17:47:15 -0700   27348 execution.bulk     INFO     Process 27348 started.
2024-03-11 17:47:15 -0700    9020 execution.bulk     INFO     Process 9020 started.
2024-03-11 17:47:15 -0700   17168 execution.bulk     INFO     Process 17168 started.
2024-03-11 17:47:15 -0700   11592 execution.bulk     INFO     Process 11592 started.
2024-03-11 17:47:15 -0700   46324 execution.bulk     INFO     Process 46324 started.
2024-03-11 17:47:15 -0700   40260 execution.bulk     INFO     Process 40260 started.
2024-03-11 17:47:15 -0700   22972 execution.bulk     INFO     Process 22972 started.
2024-03-11 17:47:15 -0700   45920 execution.bulk     INFO     Process 45920 started.

Run name: "rag_copilot_variant_0_20240311_174702_913055"
Run status: "Completed"
Start time: "2024-03-11 17:47:02.912035"
Duration: "0:00:22.422181"
O

In [4]:
pf.get_details(run_topk3)

Unnamed: 0,inputs.chat_history,inputs.query,inputs.topK,inputs.searchType,inputs.index_name,inputs.line_number,inputs.indexName,inputs.maxTokens,outputs.fetched_docs,outputs.log_params,outputs.reply
0,[],What is the quarterly revenue of Microsoft Clo...,3,filter_vector,msft-transcripts-1,0,msft-transcripts-1,3500,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to [doc0], the Microsoft Cloud excee..."
1,[],How many Azure Arc customers does Microsoft cu...,3,filter_vector,msft-transcripts-1,1,msft-transcripts-1,3500,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to the retrieved documents, Microsof..."
2,[],Which companies run SAP on Azure in FY23Q1?,3,filter_vector,msft-transcripts-1,2,msft-transcripts-1,3500,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to the retrieved documents, companie..."
3,[],What is the annual recurring revenue of GitHub...,3,filter_vector,msft-transcripts-1,3,msft-transcripts-1,3500,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to the retrieved documents, GitHub h..."
4,[],How many monthly active users does Power BI ha...,3,filter_vector,msft-transcripts-1,4,msft-transcripts-1,3500,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to the retrieved documents, Power BI..."
5,[],What is the quarterly revenue of Microsoft Clo...,3,filter_vector,msft-transcripts-1,5,msft-transcripts-1,3500,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved documents, the Micr..."
6,[],How many Azure Arc customers does Microsoft cu...,3,filter_vector,msft-transcripts-1,6,msft-transcripts-1,3500,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved document, Microsoft..."
7,[],What are some examples of companies using Azur...,3,filter_vector,msft-transcripts-1,7,msft-transcripts-1,3500,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved documents, over 200..."
8,[],How many developers are using GitHub Copilot a...,3,filter_vector,msft-transcripts-1,8,msft-transcripts-1,3500,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved documents, GitHub i..."
9,[],What is the growth rate of Azure Machine Learn...,3,filter_vector,msft-transcripts-1,9,msft-transcripts-1,3500,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved documents do not provide any inf...


## Evaluate 

In [9]:
eval_flow_path = "evaluator/eval_aistudio_score/"
data_path = "./datasets/evalset.csv"

eval_run_topk3 = pf.run(
    flow=eval_flow_path,
    run=run_topk3,
    data=data_path,
    column_mapping={
        "question": "${data.question}",
        "answer": "${data.answer}",
        "reply": "${run.outputs.reply}",
        "context": "${run.outputs.fetched_docs}",
        "log_params": "${run.outputs.log_params}",
    },
    display_name="eval_multiple_score_topk3",
)
pf.stream(eval_run_topk3)
print(eval_run_topk3)

2024-03-11 18:02:11 -0700   23544 execution.bulk     INFO     Process 23544 started.
2024-03-11 18:02:11 -0700   44164 execution.bulk     INFO     Process 44164 started.
2024-03-11 18:02:11 -0700   31512 execution.bulk     INFO     Process 31512 started.
2024-03-11 18:02:11 -0700   42844 execution.bulk     INFO     Process 42844 started.
2024-03-11 18:02:11 -0700   46424 execution.bulk     INFO     Process 46424 started.
2024-03-11 18:02:11 -0700   20648 execution.bulk     INFO     Process 20648 started.
2024-03-11 18:02:19 -0700   31040 execution.bulk     INFO     Process 31040 started.
2024-03-11 18:02:19 -0700   44548 execution.bulk     INFO     Process 44548 started.
2024-03-11 18:02:20 -0700   44160 execution.bulk     INFO     Process 44160 started.
2024-03-11 18:02:20 -0700   25896 execution.bulk     INFO     Process 25896 started.
  File "c:\Users\hokhadiv\AppData\Local\miniconda3\envs\appliedaipf\lib\site-packages\promptflow\tools\common.py", line 240, in wrapper
    time.sleep

In [11]:
pf.get_details(eval_run_topk3)

Unnamed: 0,inputs.question,inputs.answer,inputs.reply,inputs.context,inputs.log_params,inputs.line_number,outputs.coherence,outputs.fluency,outputs.gpt_similarity,outputs.groundedness,outputs.relevance
0,What is the quarterly revenue of Microsoft Clo...,"$25 billion in quarterly revenue, up 24 percen...","According to [doc0], the Microsoft Cloud excee...","{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",0,5,4,5,5,5
1,How many Azure Arc customers does Microsoft cu...,"More than 8,500 Arc customers, more than doubl...","According to the retrieved documents, Microsof...","{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",1,5,4,5,5,5
2,Which companies run SAP on Azure in FY23Q1?,"Companies like Chobani, Munich RE, Sodexo, Vol...","According to the retrieved documents, companie...","{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",2,5,5,5,5,4
3,What is the annual recurring revenue of GitHub...,GitHub is now at $1 billion in annual recurrin...,"According to the retrieved documents, GitHub h...","{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",3,5,3,5,5,5
4,How many monthly active users does Power BI ha...,Power BI is the market leader in business inte...,"According to the retrieved documents, Power BI...","{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",4,5,4,5,1,5
5,What is the quarterly revenue of Microsoft Clo...,"$27 billion in quarterly revenue, up 22 percen...","According to the retrieved documents, the Micr...","{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...",5,5,4,5,5,5
6,How many Azure Arc customers does Microsoft cu...,"More than 12,000 Arc customers, double the num...","According to the retrieved document, Microsoft...","{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...",6,5,3,5,5,5
7,What are some examples of companies using Azur...,"Over 200 customers, including KPMG and Al Jaze...","According to the retrieved documents, over 200...","{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...",7,5,5,5,5,4
8,How many developers are using GitHub Copilot a...,More than one million people have used GitHub ...,"According to the retrieved documents, GitHub i...","{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...",8,5,5,5,5,4
9,What is the growth rate of Azure Machine Learn...,Azure ML revenue alone has increased more than...,The retrieved documents do not provide any inf...,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",9,4,4,5,1,1


In [12]:
pf.get_metrics(eval_run_topk3)

{'relevance': 4.3,
 'coherence': 4.9,
 'fluency': 4.1,
 'groundedness': 4.2,
 'gpt_similarity': 5.0}

In [13]:
pf.visualize(eval_run_topk3)

The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-yik6pq2a.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.


### Multiple runs 
Using config.yaml file

In [14]:
import yaml
import promptflow

pf = promptflow.PFClient()

with open("./runs_config.yaml", "r") as file:
    runs_config = yaml.safe_load(file)

# Access the contents of the config.yaml file
print(runs_config)

{'Run1': {'maxTokens': 3500, 'topK': 1, 'searchType': 'filter_vector', 'indexName': 'msft-transcripts-1'}, 'Run2': {'maxTokens': 3500, 'topK': 3, 'searchType': 'filter_vector', 'indexName': 'msft-transcripts-1'}}


In [15]:
from datetime import datetime

run_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")


def run_dags(
    flow_path="rag-copilot/flow.dag.yaml",
    data_path="./datasets/evalset.csv",
    eval_flow_path="evaluator/eval_aistudio_score/",
    config=None,
):
    column_mapping = {
        "chat_history": "${data.chat_history}",
        "query": "${data.question}",
        "topK": config["topK"],
        "maxTokens": config["maxTokens"],
        "searchType": "filter_vector",
        "indexName": config["indexName"]
    }

    copilot_run_from_config = pf.run(
        flow=flow_path,
        data=data_path,
        column_mapping=column_mapping,
        display_name="copilot_run_from_config_" + run_time,
    )
    # pf.stream(current_run)
    # print(run_topk3)

    eval_run_from_config = pf.run(
        flow=eval_flow_path,
        run=copilot_run_from_config,
        data=data_path,
        column_mapping={
            "question": "${data.question}",
            "answer": "${data.answer}",
            "reply": "${run.outputs.reply}",
            "context": "${run.outputs.fetched_docs}",
            "log_params": "${run.outputs.log_params}",
        },
        display_name="eval_run_from_config_" + run_time,
    )
    return copilot_run_from_config, eval_run_from_config

In [16]:
i = 0
run_logs = {}
for run in runs_config:
    i += 1
    print(runs_config[run])
    copilot_run_from_config, eval_run_from_config = run_dags(config=runs_config[run])
    run_logs[i] = eval_run_from_config

{'maxTokens': 3500, 'topK': 1, 'searchType': 'filter_vector', 'indexName': 'msft-transcripts-1'}
{'maxTokens': 3500, 'topK': 3, 'searchType': 'filter_vector', 'indexName': 'msft-transcripts-1'}


In [17]:
for run_id in run_logs:
    pf.visualize(run_logs[run_id])

The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-izucjinc.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.
The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-o4xdxhf_.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.
