### Goal
Run flows using sdk and allowing for external configuration of RAG parameters. 

### Check Connections

In [1]:
import promptflow

pf = promptflow.PFClient()

# List all the available connections
for c in pf.connections.list():
    print(c.name + " (" + c.type + ")")

azureopenai_local (AzureOpenAI)
aoai_connection (AzureOpenAI)
default_connection (CognitiveSearch)
cosmodb_connection (Custom)
acs_connection (CognitiveSearch)
azure_form_recognizer_connection (Custom)
acs_connection_test (CognitiveSearch)
aoai_connection_test (AzureOpenAI)
postgres_connection (Custom)


### Test the flow

In [2]:
output = pf.flows.test(
    "../../rag-cosmos-mongo/flow.dag.yaml",
    inputs={
        "chat_history": [],
        "query": "What is the growth rate of Azure ML revenue in FY23Q1?",
    },
)

print(output)



2024-03-15 08:36:24 -0700   22004 execution.flow     INFO     Start executing nodes in thread pool mode.
2024-03-15 08:36:24 -0700   22004 execution.flow     INFO     Start to run 15 nodes with concurrency level 16.
2024-03-15 08:36:24 -0700   22004 execution.flow     INFO     Executing node ConfigLoader. node run id: aed520b1-ccf4-409a-bc6f-c97fd9004408_ConfigLoader_0
2024-03-15 08:36:24 -0700   22004 execution.flow     INFO     Executing node CheckExistingContext. node run id: aed520b1-ccf4-409a-bc6f-c97fd9004408_CheckExistingContext_0
2024-03-15 08:36:24 -0700   22004 execution.flow     INFO     Executing node QueryParser. node run id: aed520b1-ccf4-409a-bc6f-c97fd9004408_QueryParser_0
2024-03-15 08:36:24 -0700   22004 execution.flow     INFO     Executing node FormatConversation. node run id: aed520b1-ccf4-409a-bc6f-c97fd9004408_FormatConversation_0
2024-03-15 08:36:24 -0700   22004 execution.flow     INFO     Executing node Embedding. node run id: aed520b1-ccf4-409a-bc6f-c97fd9004

### Run the flow with the benchmark data file

In [3]:
flow_path = "../../rag-cosmos-mongo/flow.dag.yaml"
data_path = "../../datasets/evalset.csv"

column_mapping = {
    "chat_history": "${data.chat_history}",
    "query": "${data.question}",
}

run_mongo_topk3 = pf.run(flow=flow_path, data=data_path, column_mapping=column_mapping)
pf.stream(run_mongo_topk3)
print(run_mongo_topk3)

2024-03-15 08:36:34 -0700   41700 execution.bulk     INFO     Process 41700 started.
2024-03-15 08:36:34 -0700   14304 execution.bulk     INFO     Process 14304 started.
2024-03-15 08:36:34 -0700   43744 execution.bulk     INFO     Process 43744 started.
2024-03-15 08:36:34 -0700    7644 execution.bulk     INFO     Process 7644 started.
2024-03-15 08:36:34 -0700   33012 execution.bulk     INFO     Process 33012 started.
2024-03-15 08:36:34 -0700   42908 execution.bulk     INFO     Process 42908 started.
2024-03-15 08:36:34 -0700   38116 execution.bulk     INFO     Process 38116 started.
2024-03-15 08:36:34 -0700   48124 execution.bulk     INFO     Process 48124 started.
2024-03-15 08:36:34 -0700   19560 execution.bulk     INFO     Process 19560 started.
2024-03-15 08:36:39 -0700   42392 execution.bulk     INFO     Process 42392 started.

Run name: "rag_cosmos_mongo_variant_0_20240315_083629_665343"
Run status: "Completed"
Start time: "2024-03-15 08:36:29.665343"
Duration: "0:00:17.0782

In [4]:
pf.get_details(run_mongo_topk3)

Unnamed: 0,inputs.chat_history,inputs.query,inputs.line_number,outputs.fetched_docs,outputs.log_params,outputs.reply
0,[],What is the quarterly revenue of Microsoft Clo...,0,"{""retrieved_documents"": [{""[doc[0]"": {""content...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved documents do not provide informa...
1,[],How many Azure Arc customers does Microsoft cu...,1,"{""retrieved_documents"": [{""[doc[0]"": {""content...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved documents do not provide informa...
2,[],Which companies run SAP on Azure in FY23Q1?,2,"{""retrieved_documents"": [{""[doc[0]"": {""content...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved documents do not provide informa...
3,[],What is the annual recurring revenue of GitHub...,3,"{""retrieved_documents"": [{""[doc[0]"": {""content...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved documents do not provide any inf...
4,[],How many monthly active users does Power BI ha...,4,"{""retrieved_documents"": [{""[doc[0]"": {""content...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved documents do not provide informa...
5,[],What is the quarterly revenue of Microsoft Clo...,5,"{""retrieved_documents"": [{""[doc[0]"": {""content...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved document, the Micro..."
6,[],How many Azure Arc customers does Microsoft cu...,6,"{""retrieved_documents"": [{""[doc[0]"": {""content...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...",The retrieved documents do not provide any inf...
7,[],What are some examples of companies using Azur...,7,"{""retrieved_documents"": [{""[doc[0]"": {""content...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...",The retrieved documents do not contain any inf...
8,[],How many developers are using GitHub Copilot a...,8,"{""retrieved_documents"": [{""[doc[0]"": {""content...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...",The retrieved documents do not provide any inf...
9,[],What is the growth rate of Azure Machine Learn...,9,"{""retrieved_documents"": [{""[doc[0]"": {""content...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved documents do not provide informa...


In [5]:
ddf = pf.get_details(run_mongo_topk3)
#ddf.to_csv("checkoutput.csv")

## Evaluate 

In [6]:
eval_flow_path = "../../evaluator/eval_aistudio_score/"
data_path = "../../datasets/evalset.csv"

eval_mongo_run_topk3 = pf.run(
    flow=eval_flow_path,
    run=run_mongo_topk3,
    data=data_path,
    column_mapping={
        "question": "${data.question}",
        "answer": "${data.answer}",
        "reply": "${run.outputs.reply}",
        "context": "${run.outputs.fetched_docs}",
        "log_params": "${run.outputs.log_params}"
    },
    display_name="eval_mongo_aistudio_score_topk3",
)
pf.stream(eval_mongo_run_topk3)
print(eval_mongo_run_topk3)

2024-03-15 08:36:51 -0700   42568 execution.bulk     INFO     Process 42568 started.
2024-03-15 08:36:51 -0700   37748 execution.bulk     INFO     Process 37748 started.
2024-03-15 08:36:51 -0700   41340 execution.bulk     INFO     Process 41340 started.
2024-03-15 08:36:51 -0700   47680 execution.bulk     INFO     Process 47680 started.
2024-03-15 08:36:51 -0700    4552 execution.bulk     INFO     Process 4552 started.
2024-03-15 08:36:51 -0700    5356 execution.bulk     INFO     Process 5356 started.
2024-03-15 08:36:51 -0700   27236 execution.bulk     INFO     Process 27236 started.
2024-03-15 08:36:51 -0700   15696 execution.bulk     INFO     Process 15696 started.
2024-03-15 08:36:51 -0700    1468 execution.bulk     INFO     Process 1468 started.
2024-03-15 08:36:55 -0700   47408 execution.bulk     INFO     Process 47408 started.
  File "c:\Users\hokhadiv\AppData\Local\miniconda3\envs\appliedaipf\lib\site-packages\promptflow\tools\common.py", line 240, in wrapper
    time.sleep(re

In [7]:
pf.get_details(eval_mongo_run_topk3)

Unnamed: 0,inputs.question,inputs.answer,inputs.line_number,inputs.context,inputs.log_params,inputs.reply,outputs.coherence,outputs.fluency,outputs.gpt_similarity,outputs.groundedness,outputs.relevance
0,What is the quarterly revenue of Microsoft Clo...,"$25 billion in quarterly revenue, up 24 percen...",0,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,2,5,1,1
1,How many Azure Arc customers does Microsoft cu...,"More than 8,500 Arc customers, more than doubl...",1,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,3,5,5,5
2,Which companies run SAP on Azure in FY23Q1?,"Companies like Chobani, Munich RE, Sodexo, Vol...",2,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,5,5,1,1
3,What is the annual recurring revenue of GitHub...,GitHub is now at $1 billion in annual recurrin...,3,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,4,4,5,3,1
4,How many monthly active users does Power BI ha...,Power BI is the market leader in business inte...,4,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,5,5,1,1
5,What is the quarterly revenue of Microsoft Clo...,"$27 billion in quarterly revenue, up 22 percen...",5,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,4,5,1,1
6,How many Azure Arc customers does Microsoft cu...,"More than 12,000 Arc customers, double the num...",6,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,4,5,1,1
7,What are some examples of companies using Azur...,"Over 200 customers, including KPMG and Al Jaze...",7,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,5,5,3,1
8,How many developers are using GitHub Copilot a...,More than one million people have used GitHub ...,8,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,4,5,5,3,1
9,What is the growth rate of Azure Machine Learn...,Azure ML revenue alone has increased more than...,9,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,4,4,5,3,1


In [8]:
pf.get_metrics(eval_mongo_run_topk3)

{'relevance': 1.4,
 'coherence': 4.7,
 'fluency': 4.1,
 'groundedness': 2.2,
 'gpt_similarity': 5.0}

In [9]:
pf.visualize(eval_mongo_run_topk3)

The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-ltifu8mg.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.


### Multiple runs 
Using config.yaml file

In [10]:
import yaml
import promptflow

pf = promptflow.PFClient()

with open("./runs_config.yaml", "r") as file:
    runs_config = yaml.safe_load(file)

# Access the contents of the config.yaml file
print(runs_config)

{'Run1': {'maxTokens': 3500, 'searchType': 'vector', 'dbName': 'earning_calls', 'topK': 3}, 'Run2': {'maxTokens': 3500, 'searchType': 'vector', 'dbName': 'earning_calls', 'topK': 5}}


In [11]:
# function to update param_config systematically 
import os 
def update_param_config(flow_path, config):
    base_dir = os.path.dirname(flow_path)
    param_config_path = os.path.join(base_dir, 'param_config.yaml')

    with open(param_config_path, 'r') as file:
        param_dict = yaml.safe_load(file)

    param_dict.update(config)

    with open(param_config_path, 'w') as file:
        yaml.dump(param_dict, file)


In [12]:
from datetime import datetime

run_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")


def run_dags(
    flow_path="../../rag-cosmos-mongo/flow.dag.yaml",
    data_path="../../datasets/evalset.csv",
    eval_flow_path="../../evaluator/eval_aistudio_score/",
    config=None,
):
    column_mapping = {
        "chat_history": "${data.chat_history}",
        "query": "${data.question}",
    }
    update_param_config(flow_path, config)

    copilot_run_from_config = pf.run(
        flow=flow_path,
        data=data_path,
        column_mapping=column_mapping,
        display_name="rag_mongo_run_from_config_" + run_time,
    )


    # pf.stream(current_run)
    # print(run_topk3)

    eval_run_from_config = pf.run(
        flow=eval_flow_path,
        run=copilot_run_from_config,
        data=data_path,
        column_mapping={
            "question": "${data.question}",
            "answer": "${data.answer}",
            "reply": "${run.outputs.reply}",
            "context": "${run.outputs.fetched_docs}",
            "log_params": "${run.outputs.log_params}",
        },
        display_name="eval_run_mongo_aistudio_scores_from_config_" + run_time,
    )
    return copilot_run_from_config, eval_run_from_config

In [13]:
i = 0
run_logs = {}
for run in runs_config:
    i += 1
    print(runs_config[run])
    copilot_run_from_config, eval_run_from_config = run_dags(config=runs_config[run])
    run_logs[i] = eval_run_from_config

{'maxTokens': 3500, 'searchType': 'vector', 'dbName': 'earning_calls', 'topK': 3}


{'maxTokens': 3500, 'searchType': 'vector', 'dbName': 'earning_calls', 'topK': 5}


In [14]:
for run_id in run_logs:
    print(run_id)
    pf.visualize(run_logs[run_id])

1
The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-75p_gi3g.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.
2
The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-l3oi13hv.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.


### Run different flow variants
Now, lets evaluate same flow but on a node variant.

In [23]:
from datetime import datetime

run_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

def run_dags(
    flow_path="../../rag-cosmos-mongo/flow.dag.yaml",
    data_path="../../datasets/evalset.csv",
    eval_flow_path="../../evaluator/eval_aistudio_score/",
    variant=None,
    config=None,
):
    column_mapping = {
        "chat_history": "${data.chat_history}",
        "query": "${data.question}",
    }
    update_param_config(flow_path, config)

    copilot_run_from_config = pf.run(
        flow=flow_path,
        data=data_path,
        column_mapping=column_mapping,
        variant =variant,
        display_name="rag_mongo_run__variant_" + variant + "_from_config_" + run_time,
    )

    # pf.stream(current_run)
    # print(run_topk3)

    eval_run_from_config = pf.run(
        flow=eval_flow_path,
        run=copilot_run_from_config,
        data=data_path,
        column_mapping={
            "question": "${data.question}",
            "answer": "${data.answer}",
            "reply": "${run.outputs.reply}",
            "context": "${run.outputs.fetched_docs}",
            "log_params": "${run.outputs.log_params}",
        },
        display_name="eval_run_mongo_variant_"+ variant + "_aistudio_scores_from_config_" + run_time,
    )
    return copilot_run_from_config, eval_run_from_config

In [24]:
i = 0
run_logs = {}
for run in runs_config:
    i += 1
    print(runs_config[run])
    copilot_run_from_config, eval_run_from_config = run_dags(variant= "${DetermineReply.variant_1}", config=runs_config[run])
    run_logs[i] = eval_run_from_config

{'maxTokens': 3500, 'searchType': 'vector', 'dbName': 'earning_calls', 'topK': 3}
{'maxTokens': 3500, 'searchType': 'vector', 'dbName': 'earning_calls', 'topK': 5}




In [25]:
for run_id in run_logs:
    print(run_id)
    pf.visualize(run_logs[run_id])

1
The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-tzot0ewi.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.
2
