### Goal
Run flows using sdk and allowing for external configuration of RAG parameters. 

### Check Connections

In [15]:
import promptflow

pf = promptflow.PFClient()

# List all the available connections
for c in pf.connections.list():
    print(c.name + " (" + c.type + ")")

azureopenai_local (AzureOpenAI)
aoai_connection (AzureOpenAI)
default_connection (CognitiveSearch)
cosmodb_connection (Custom)
acs_connection (CognitiveSearch)
azure_form_recognizer_connection (Custom)
acs_connection_test (CognitiveSearch)
aoai_connection_test (AzureOpenAI)
postgres_connection (Custom)


### Test the flow

In [16]:
output = pf.flows.test(
    "../../rag-cosmos-postgres/flow.dag.yaml",
    inputs={
        "chat_history": [],
        "query": "What is the growth rate of Azure ML revenue in FY23Q1?",
    },
)

print(output)



{'fetched_docs': '{"retrieved_documents": [{"[doc0]": {"ticker": "MSFT", "quarter": 1, "year": "23", "content": "For Intelligent Cloud we expect revenue to grow between 22 and 24 percent in constant currency or $21.25 to $21.55 billion US dollars.\\n\\nRevenue will continue to be driven by Azure which, as a reminder, can have quarterly variability primarily from our per-user business and from in-period revenue recognition depending on the mix of contracts. \\n\\nWe expect Azure revenue growth to be sequentially lower by roughly 5 points on a constant currency basis. Azure revenue will continue to be driven by strong growth in consumption with some impact from the Q1 \\n\\n\\n\\ntrends noted earlier. And our per-user business should continue to benefit from Microsoft 365 suite momentum, though we expect moderation in growth rates given the size of the installed base. \\n\\nIn our on-premises server business, we expect revenue to decline low-single digits as demand for our hybrid solutio

### Run the flow with the benchmark data file

In [17]:
flow_path = "../../rag-cosmos-postgres/flow.dag.yaml"
data_path = "../../datasets/evalset.csv"

column_mapping = {
    "chat_history": "${data.chat_history}",
    "query": "${data.question}",
}

run_postgres_topk3 = pf.run(flow=flow_path, data=data_path, column_mapping=column_mapping)
pf.stream(run_postgres_topk3)
print(run_postgres_topk3)

2024-03-14 14:16:31 -0700   35032 execution.bulk     INFO     Process 35032 started.
2024-03-14 14:16:31 -0700   45036 execution.bulk     INFO     Process 45036 started.
2024-03-14 14:16:31 -0700   45748 execution.bulk     INFO     Process 45748 started.
2024-03-14 14:16:31 -0700   39416 execution.bulk     INFO     Process 39416 started.
2024-03-14 14:16:31 -0700   47364 execution.bulk     INFO     Process 47364 started.
2024-03-14 14:16:33 -0700   13524 execution.bulk     INFO     Process 13524 started.
2024-03-14 14:16:33 -0700   41100 execution.bulk     INFO     Process 41100 started.
2024-03-14 14:16:33 -0700   46708 execution.bulk     INFO     Process 46708 started.
2024-03-14 14:16:33 -0700   36520 execution.bulk     INFO     Process 36520 started.
2024-03-14 14:16:33 -0700    1284 execution.bulk     INFO     Process 1284 started.

Run name: "rag_cosmos_postgres_variant_0_20240314_141553_398126"
Run status: "Completed"
Start time: "2024-03-14 14:15:53.397125"
Duration: "0:01:02.6

In [18]:
pf.get_details(run_postgres_topk3)

Unnamed: 0,inputs.chat_history,inputs.query,inputs.line_number,outputs.fetched_docs,outputs.log_params,outputs.reply
0,[],What is the quarterly revenue of Microsoft Clo...,0,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to the retrieved document [doc0], Mi..."
1,[],How many Azure Arc customers does Microsoft cu...,1,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to the retrieved document, Microsoft..."
2,[],Which companies run SAP on Azure in FY23Q1?,2,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...","According to the retrieved documents, companie..."
3,[],What is the annual recurring revenue of GitHub...,3,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved documents do not provide informa...
4,[],How many monthly active users does Power BI ha...,4,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved documents do not provide informa...
5,[],What is the quarterly revenue of Microsoft Clo...,5,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved document [doc0], th..."
6,[],How many Azure Arc customers does Microsoft cu...,6,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved document [doc1], Mi..."
7,[],What are some examples of companies using Azur...,7,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...","According to the retrieved document, over 200 ..."
8,[],How many developers are using GitHub Copilot a...,8,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '2'}, 'to...",The retrieved documents do not provide any inf...
9,[],What is the growth rate of Azure Machine Learn...,9,"{""retrieved_documents"": [{""[doc0]"": {""ticker"":...","{'filter': {'Year': '23', 'Quarter': '1'}, 'to...",The retrieved documents do not provide informa...


In [19]:
ddf = pf.get_details(run_postgres_topk3)
#ddf.to_csv("checkoutput.csv")

## Evaluate 

In [20]:
eval_flow_path = "../../evaluator/eval_aistudio_score/"
data_path = "../../datasets/evalset.csv"

eval_postgres_run_topk3 = pf.run(
    flow=eval_flow_path,
    run=run_postgres_topk3,
    data=data_path,
    column_mapping={
        "question": "${data.question}",
        "answer": "${data.answer}",
    },
    display_name="eval_postgres_aistudio_score_topk3",
)
pf.stream(eval_postgres_run_topk3)
print(eval_postgres_run_topk3)

2024-03-14 14:17:01 -0700   33012 execution.bulk     INFO     Process 33012 started.
2024-03-14 14:17:01 -0700   30820 execution.bulk     INFO     Process 30820 started.
2024-03-14 14:17:01 -0700   38008 execution.bulk     INFO     Process 38008 started.
2024-03-14 14:17:01 -0700    4760 execution.bulk     INFO     Process 4760 started.
2024-03-14 14:17:01 -0700   13660 execution.bulk     INFO     Process 13660 started.
2024-03-14 14:17:01 -0700   35844 execution.bulk     INFO     Process 35844 started.
2024-03-14 14:17:01 -0700   40524 execution.bulk     INFO     Process 40524 started.
2024-03-14 14:17:01 -0700   33768 execution.bulk     INFO     Process 33768 started.
2024-03-14 14:17:01 -0700   29704 execution.bulk     INFO     Process 29704 started.
2024-03-14 14:17:04 -0700   33608 execution.bulk     INFO     Process 33608 started.
  File "c:\Users\hokhadiv\AppData\Local\miniconda3\envs\appliedaipf\lib\site-packages\promptflow\tools\common.py", line 240, in wrapper
    time.sleep(

In [21]:
pf.get_details(eval_postgres_run_topk3)

Unnamed: 0,inputs.question,inputs.answer,inputs.line_number,inputs.context,inputs.log_params,inputs.reply,outputs.coherence,outputs.fluency,outputs.gpt_similarity,outputs.groundedness,outputs.relevance
0,What is the quarterly revenue of Microsoft Clo...,"$25 billion in quarterly revenue, up 24 percen...",0,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,2,5,1,1
1,How many Azure Arc customers does Microsoft cu...,"More than 8,500 Arc customers, more than doubl...",1,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,3,5,5,5
2,Which companies run SAP on Azure in FY23Q1?,"Companies like Chobani, Munich RE, Sodexo, Vol...",2,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,5,5,1,1
3,What is the annual recurring revenue of GitHub...,GitHub is now at $1 billion in annual recurrin...,3,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,4,5,5,1,1
4,How many monthly active users does Power BI ha...,Power BI is the market leader in business inte...,4,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,5,5,1,1
5,What is the quarterly revenue of Microsoft Clo...,"$27 billion in quarterly revenue, up 22 percen...",5,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,5,5,1,1
6,How many Azure Arc customers does Microsoft cu...,"More than 12,000 Arc customers, double the num...",6,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,3,5,1,1
7,What are some examples of companies using Azur...,"Over 200 customers, including KPMG and Al Jaze...",7,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,5,5,3,1
8,How many developers are using GitHub Copilot a...,More than one million people have used GitHub ...,8,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,5,4,5,1,1
9,What is the growth rate of Azure Machine Learn...,Azure ML revenue alone has increased more than...,9,"[""More than 8,500 Arc customers, more than dou...",{},Over 8500 Arc customers. This is more than dou...,4,4,5,3,1


In [22]:
pf.get_metrics(eval_postgres_run_topk3)

{'relevance': 1.4,
 'coherence': 4.8,
 'fluency': 4.1,
 'groundedness': 1.8,
 'gpt_similarity': 5.0}

In [23]:
pf.visualize(eval_postgres_run_topk3)

The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-lhh_hgyf.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.


### Multiple runs 
Using config.yaml file

In [24]:
import yaml
import promptflow

pf = promptflow.PFClient()

with open("./runs_config.yaml", "r") as file:
    runs_config = yaml.safe_load(file)

# Access the contents of the config.yaml file
print(runs_config)

{'Run1': {'maxTokens': 3500, 'topK': 3, 'searchType': 'vector', 'tableName': 'msft_transcript'}, 'Run2': {'maxTokens': 3500, 'topK': 3, 'searchType': 'hybrid', 'tableName': 'msft_transcript'}}


In [25]:
# function to update param_config systematically 
import os 
def update_param_config(flow_path, config):
    base_dir = os.path.dirname(flow_path)
    param_config_path = os.path.join(base_dir, 'param_config.yaml')

    with open(param_config_path, 'r') as file:
        param_dict = yaml.safe_load(file)

    param_dict.update(config)

    with open(param_config_path, 'w') as file:
        yaml.dump(param_dict, file)


In [26]:
from datetime import datetime

run_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")


def run_dags(
    flow_path="../../rag-cosmos-postgres/flow.dag.yaml",
    data_path="../../datasets/evalset.csv",
    eval_flow_path="../../evaluator/eval_aistudio_score/",
    config=None,
):
    column_mapping = {
        "chat_history": "${data.chat_history}",
        "query": "${data.question}",
    }
    update_param_config(flow_path, config)

    copilot_run_from_config = pf.run(
        flow=flow_path,
        data=data_path,
        column_mapping=column_mapping,
        display_name="rag_postgres_run_from_config_" + run_time,
    )


    # pf.stream(current_run)
    # print(run_topk3)

    eval_run_from_config = pf.run(
        flow=eval_flow_path,
        run=copilot_run_from_config,
        data=data_path,
        column_mapping={
            "question": "${data.question}",
            "answer": "${data.answer}",
            "reply": "${run.outputs.reply}",
            "context": "${run.outputs.fetched_docs}",
            "log_params": "${run.outputs.log_params}",
        },
        display_name="eval_run_postgres_aistudio_scores_from_config_" + run_time,
    )
    return copilot_run_from_config, eval_run_from_config

In [27]:
i = 0
run_logs = {}
for run in runs_config:
    i += 1
    print(runs_config[run])
    copilot_run_from_config, eval_run_from_config = run_dags(config=runs_config[run])
    run_logs[i] = eval_run_from_config

{'maxTokens': 3500, 'topK': 3, 'searchType': 'vector', 'tableName': 'msft_transcript'}




{'maxTokens': 3500, 'topK': 3, 'searchType': 'hybrid', 'tableName': 'msft_transcript'}




In [29]:
for run_id in run_logs:
    print(run_id)
    pf.visualize(run_logs[run_id])

1
The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-4n8w4nix.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.
2
The HTML file is generated at 'C:\\Users\\hokhadiv\\AppData\\Local\\Temp\\pf-visualize-detail-6iyac4r1.html'.
Trying to view the result in a web browser...
Successfully visualized from the web browser.
