### Goal
Run flows using sdk and allowing for external configuration of RAG parameters. 

### Check Connections

In [20]:
import promptflow

pf = promptflow.PFClient()

# List all the available connections
for c in pf.connections.list():
    print(c.name + " (" + c.type + ")")

azureopenai_local (AzureOpenAI)
aoai_connection (AzureOpenAI)
default_connection (CognitiveSearch)
cosmodb_connection (Custom)
acs_connection (CognitiveSearch)
azure_form_recognizer_connection (Custom)
acs_connection_test (CognitiveSearch)
aoai_connection_test (AzureOpenAI)
postgres_connection (Custom)


### Test the flow

In [21]:
output = pf.flows.test(
    "rag-copilot/flow.dag.yaml",
    inputs={
        "chat_history": [],
        "query": "What is the growth rate of Azure ML revenue in FY23Q1?",
    },
)

print(output)

{'fetched_docs': '{"retrieved_documents": [{"[doc0]": {"ticker": "MSFT", "quarter": "1", "year": "23", "content": "hiring resulting in mid-to- high-single-digits revenue growth or low to mid- teens growth in constant currency. And in Dynamics, we expect revenue growth in the low-double-digits or the low 20s in constant currency driven by continued share gains in Dynamics 365. For Intelligent Cloud we expect revenue to grow between 22 and 24 percent in constant currency or $21.25 to $21.55 billion US dollars. Revenue will continue to be driven by Azure which, as a reminder, can have quarterly variability primarily from our per-user business and from in- period revenue recognition depending on the mix of contracts. "}}]}', 'log_params': {'filter': {'Year': '23', 'Quarter': '1'}, 'topK': 1, 'maxTokens': 3500, 'searchType': 'filter_vector', 'indexName': 'msft-transcripts-1'}, 'reply': 'The retrieved document mentions the expected revenue growth rate for Intelligent Cloud, which includes Az

### Run the flow with the benchmark data file

In [22]:
flow_path = "rag-copilot/flow.dag.yaml"
data_path = "./datasets/evalset.csv"

column_mapping = {
    "chat_history": "${data.chat_history}",
    "query": "${data.question}",
}

run_topk3 = pf.run(flow=flow_path, data=data_path, column_mapping=column_mapping)
pf.stream(run_topk3)
print(run_topk3)

2024-03-12 09:56:11 -0700   18528 execution.bulk     INFO     Process 18528 started.
2024-03-12 09:56:11 -0700   20876 execution.bulk     INFO     Process 20876 started.
2024-03-12 09:56:13 -0700    8188 execution.bulk     INFO     Process 8188 started.
2024-03-12 09:56:13 -0700   40920 execution.bulk     INFO     Process 40920 started.
2024-03-12 09:56:13 -0700   23268 execution.bulk     INFO     Process 23268 started.
2024-03-12 09:56:13 -0700   18600 execution.bulk     INFO     Process 18600 started.
2024-03-12 09:56:13 -0700   46124 execution.bulk     INFO     Process 46124 started.
2024-03-12 09:56:13 -0700   51144 execution.bulk     INFO     Process 51144 started.
2024-03-12 09:56:13 -0700   51936 execution.bulk     INFO     Process 51936 started.
2024-03-12 09:56:16 -0700   44540 execution.bulk     INFO     Process 44540 started.
2024-03-12 09:56:16 -0700   31320 execution.bulk     INFO     Process 31320 started.
2024-03-12 09:56:21 -0700   48088 execution.bulk     INFO     Proc

In [24]:
pf.get_details(run_topk3)

Unnamed: 0,inputs.chat_history,inputs.query,inputs.line_number,outputs.fetched_docs,outputs.log_params,outputs.reply
0,[],What is the quarterly revenue of Microsoft Clo...,0,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to the retrieved document [doc0], th..."
1,[],How many Azure Arc customers does Microsoft cu...,1,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved document mentions that Microsoft...
2,[],Which companies run SAP on Azure in FY23Q1?,2,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","Based on the retrieved document, companies lik..."
3,[],What is the annual recurring revenue of GitHub...,3,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved document does not provide any in...
4,[],How many monthly active users does Power BI ha...,4,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved document does not provide inform...
5,[],What is the quarterly revenue of Microsoft Clo...,5,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...",The retrieved document does not provide the qu...
6,[],How many Azure Arc customers does Microsoft cu...,6,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved document, Microsoft..."
7,[],What are some examples of companies using Azur...,7,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...",The retrieved document mentions that Microsoft...
8,[],How many developers are using GitHub Copilot a...,8,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved document, as of FY2..."
9,[],What is the growth rate of Azure Machine Learn...,9,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved document does not provide inform...


In [27]:
ddf = pf.get_details(run_topk3)
ddf.to_csv("checkoutput.csv")

## Evaluate 

In [28]:
eval_flow_path = "evaluator/eval_aistudio_score/"
data_path = "./datasets/evalset.csv"

eval_run_topk3 = pf.run(
    flow=eval_flow_path,
    run=run_topk3,
    data=data_path,
    column_mapping={
        "question": "${data.question}",
        "answer": "${data.answer}",
    },
    display_name="eval_multiple_score_topk3",
)
pf.stream(eval_run_topk3)
print(eval_run_topk3)

2024-03-12 10:01:25 -0700    6624 execution.bulk     INFO     Process 6624 started.
2024-03-12 10:01:25 -0700   49880 execution.bulk     INFO     Process 49880 started.
2024-03-12 10:01:25 -0700   51696 execution.bulk     INFO     Process 51696 started.
2024-03-12 10:01:25 -0700   50276 execution.bulk     INFO     Process 50276 started.
2024-03-12 10:01:25 -0700   41200 execution.bulk     INFO     Process 41200 started.
2024-03-12 10:01:25 -0700   51640 execution.bulk     INFO     Process 51640 started.
2024-03-12 10:01:25 -0700   48152 execution.bulk     INFO     Process 48152 started.
2024-03-12 10:01:30 -0700   35164 execution.bulk     INFO     Process 35164 started.
2024-03-12 10:01:30 -0700   50240 execution.bulk     INFO     Process 50240 started.
2024-03-12 10:01:31 -0700   22212 execution.bulk     INFO     Process 22212 started.

Run name: "eval_aistudio_score_variant_0_20240312_100120_184491"
Run status: "Completed"
Start time: "2024-03-12 10:01:20.184491"
Duration: "0:01:29.7

In [29]:
pf.get_details(eval_run_topk3)

Unnamed: 0,inputs.question,inputs.answer,inputs.line_number,inputs.context,inputs.log_params,inputs.reply,outputs.coherence,outputs.fluency,outputs.gpt_similarity,outputs.groundedness,outputs.relevance
0,What is the quarterly revenue of Microsoft Clo...,"$25 billion in quarterly revenue, up 24 percen...",0,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,4,5,1,1
1,How many Azure Arc customers does Microsoft cu...,"More than 8,500 Arc customers, more than doubl...",1,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,3,5,5,5
2,Which companies run SAP on Azure in FY23Q1?,"Companies like Chobani, Munich RE, Sodexo, Vol...",2,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,4,5,5,3,1
3,What is the annual recurring revenue of GitHub...,GitHub is now at $1 billion in annual recurrin...,3,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,4,5,1,1
4,How many monthly active users does Power BI ha...,Power BI is the market leader in business inte...,4,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,4,5,1,1
5,What is the quarterly revenue of Microsoft Clo...,"$27 billion in quarterly revenue, up 22 percen...",5,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,4,5,1,1
6,How many Azure Arc customers does Microsoft cu...,"More than 12,000 Arc customers, double the num...",6,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,3,5,1,1
7,What are some examples of companies using Azur...,"Over 200 customers, including KPMG and Al Jaze...",7,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,5,5,3,1
8,How many developers are using GitHub Copilot a...,More than one million people have used GitHub ...,8,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,5,5,3,1
9,What is the growth rate of Azure Machine Learn...,Azure ML revenue alone has increased more than...,9,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,4,4,5,3,1


In [30]:
pf.get_metrics(eval_run_topk3)

{'relevance': 1.4,
 'coherence': 4.8,
 'fluency': 4.1,
 'groundedness': 2.2,
 'gpt_similarity': 5.0}

In [31]:
pf.visualize(eval_run_topk3)

The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-3lchnuuj.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.


### Multiple runs 
Using config.yaml file

In [14]:
import yaml
import promptflow

pf = promptflow.PFClient()

with open("./runs_config.yaml", "r") as file:
    runs_config = yaml.safe_load(file)

# Access the contents of the config.yaml file
print(runs_config)

{'Run1': {'maxTokens': 3500, 'topK': 1, 'searchType': 'filter_vector', 'indexName': 'msft-transcripts-1'}, 'Run2': {'maxTokens': 3500, 'topK': 3, 'searchType': 'filter_vector', 'indexName': 'msft-transcripts-1'}}


In [34]:
# function to update param_config systematically 
import os 
def update_param_config(flow_path, config):
    base_dir = os.path.dirname(flow_path)
    param_config_path = os.path.join(base_dir, 'param_config.yaml')

    with open(param_config_path, 'r') as file:
        param_dict = yaml.safe_load(file)

    param_dict.update(config)

    with open(param_config_path, 'w') as file:
        yaml.dump(param_dict, file)


In [35]:
from datetime import datetime

run_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")


def run_dags(
    flow_path="rag-copilot/flow.dag.yaml",
    data_path="./datasets/evalset.csv",
    eval_flow_path="evaluator/eval_aistudio_score/",
    config=None,
):
    column_mapping = {
        "chat_history": "${data.chat_history}",
        "query": "${data.question}",
    }
    update_param_config(flow_path, config)

    copilot_run_from_config = pf.run(
        flow=flow_path,
        data=data_path,
        column_mapping=column_mapping,
        display_name="copilot_run_from_config_" + run_time,
    )


    # pf.stream(current_run)
    # print(run_topk3)

    eval_run_from_config = pf.run(
        flow=eval_flow_path,
        run=copilot_run_from_config,
        data=data_path,
        column_mapping={
            "question": "${data.question}",
            "answer": "${data.answer}",
            "reply": "${run.outputs.reply}",
            "context": "${run.outputs.fetched_docs}",
            "log_params": "${run.outputs.log_params}",
        },
        display_name="eval_run_from_config_" + run_time,
    )
    return copilot_run_from_config, eval_run_from_config

In [36]:
i = 0
run_logs = {}
for run in runs_config:
    i += 1
    print(runs_config[run])
    copilot_run_from_config, eval_run_from_config = run_dags(config=runs_config[run])
    run_logs[i] = eval_run_from_config

{'maxTokens': 3500, 'topK': 1, 'searchType': 'filter_vector', 'indexName': 'msft-transcripts-1'}
{'maxTokens': 3500, 'topK': 3, 'searchType': 'filter_vector', 'indexName': 'msft-transcripts-1'}


In [40]:
for run_id in run_logs:
    print(run_id)
    pf.visualize(run_logs[run_id])

1
The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-7isqmd5k.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.
2
The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-xag0ngps.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.


In [53]:
run_logs[1].name

'eval_aistudio_score_variant_0_20240312_102500_238868'