# TEAM 80


## Importing necessary libraries

Ensure that the Python environment you are running this in has all the libraries present in [requirements.txt](experimentation/requirements.txt).

In [None]:
import langchain
import openai
import json
import os
import pandas as pd
import time
import re
import nltk

from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document

from langchain.prompts import PromptTemplate
from langchain.callbacks import get_openai_callback
from langchain.llms import OpenAI
from langchain.agents import AgentExecutor
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.schema import StrOutputParser
from langchain.chat_models import ChatOpenAI
import stanza

## Loading Stanza Pipeline for sentence splitting

In [None]:
stanza.download('en')
nlp = stanza.Pipeline(lang='en', processors='tokenize,pos,constituency')

In [None]:
def get_verb_phrases(t):
    verb_phrases = []
    num_children = len(t)
    num_VP = sum(1 if t[i].label() == "VP" else 0 for i in range(0, num_children))

    if t.label() != "VP":
        for i in range(0, num_children):
            if t[i].height() > 2:
                verb_phrases.extend(get_verb_phrases(t[i]))
    elif t.label() == "VP" and num_VP > 1:
        for i in range(0, num_children):
            if t[i].label() == "VP":
                if t[i].height() > 2:
                    verb_phrases.extend(get_verb_phrases(t[i]))
    else:
        verb_phrases.append(' '.join(t.leaves()))
    return verb_phrases

def get_pos(t):
    vp_pos = []
    sub_conj_pos = []
    num_children = len(t)
    children = [t[i].label() for i in range(0, num_children)]
    flag = re.search(r"(S|SBAR|SBARQ|SINV|SQ)", ' '.join(children))
    if "VP" in children and not flag:
        for i in range(0, num_children):
            if t[i].label() == "VP":
                vp_pos.append(t[i].treeposition())
    elif not "VP" in children and not flag:
        for i in range(0, num_children):
            if t[i].height() > 2:
                temp1, temp2 = get_pos(t[i])
                vp_pos.extend(temp1)
                sub_conj_pos.extend(temp2)
    else:
        for i in range(0, num_children):
            if t[i].label() in ["S", "SBAR", "SBARQ", "SINV", "SQ"]:
                temp1, temp2 = get_pos(t[i])
                vp_pos.extend(temp1)
                sub_conj_pos.extend(temp2)
            else:
                sub_conj_pos.append(t[i].treeposition())
    return (vp_pos, sub_conj_pos)

def print_clauses(parse_str):
    sent_tree = nltk.tree.ParentedTree.fromstring(parse_str)
    clause_level_list = ["S", "SBAR", "SBARQ", "SINV", "SQ"]
    clause_list = []
    sub_trees = []
    for sub_tree in reversed(list(sent_tree.subtrees())):
        if sub_tree.label() in clause_level_list:
            if sub_tree.parent().label() in clause_level_list:
                continue
            if (len(sub_tree) == 1 and sub_tree.label() == "S" and sub_tree[0].label() == "VP"
                    and not sub_tree.parent().label() in clause_level_list):
                continue
            sub_trees.append(sub_tree)
            del sent_tree[sub_tree.treeposition()]
    for t in sub_trees:
        verb_phrases = get_verb_phrases(t)
        vp_pos, sub_conj_pos = get_pos(t)
        for i in reversed(vp_pos):
            del t[i]
        for i in reversed(sub_conj_pos):
            del t[i]
        subject_phrase = ' '.join(t.leaves())
        for i in verb_phrases:
            clause_list.append(subject_phrase + " " + i)
    return clause_list

def get_clauses(query):
    doc = nlp(query)
    z = doc.sentences
    all_clauses = []
    for i in range(len(z)):
        clauses = print_clauses(parse_str = str(z[i].constituency))
        all_clauses += clauses
    if len(all_clauses) == 0:
        all_clauses = [query]
    return all_clauses

## OpenAI API Key
To set the OPENAI_API_KEY as an environment variable run the following command:

```
export OPENAI_API_KEY = <Your API Key>
```

In [None]:
os.environ['OPENAI_API_KEY'] = "<YOUR_OPENAI_API_KEY>"

## Loading the API documentation and usage examples
The required datasets [.json] -

*   API/Tool Descriptions (api_desc.json)
*   Tool Usage Example (examples.json)
*   Queries (PS_queries.json)

These can be loaded either by


1.   Uploading the datasets on google drive and using its id to use ```gdown``` method (as shown).
2.   By directly uploading the datasets to the runtime

In [None]:
!gdown 1nJZpWHRdowbdzdRI7ylb1RrZUV_2E6k9 -O examples.json # PS examples
!gdown 1Q2srYniRZYhDzN52k1Tk4Q8YUzArmPfF -O api_desc.json # Devrev OG
!gdown 1JCjW2f0fTsL6W7r7QjQO2FrwKNUj37ao -O PS_queries.json # Devrev OG

Downloading...
From: https://drive.google.com/uc?id=1nJZpWHRdowbdzdRI7ylb1RrZUV_2E6k9
To: /content/examples.json
100% 6.71k/6.71k [00:00<00:00, 21.4MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Q2srYniRZYhDzN52k1Tk4Q8YUzArmPfF
To: /content/api_desc.json
100% 34.7k/34.7k [00:00<00:00, 62.4MB/s]
Downloading...
From: https://drive.google.com/uc?id=1JCjW2f0fTsL6W7r7QjQO2FrwKNUj37ao
To: /content/PS_queries.json
100% 1.99k/1.99k [00:00<00:00, 7.86MB/s]


In [None]:
#Loading the JSON files as a list

documentation_file_path = '/content/api_desc.json'
with open(documentation_file_path, 'r') as file:
    documentation = json.load(file)

examples_file_path = '/content/examples.json'
with open(examples_file_path, 'r') as file:
    examples = json.load(file)

queries_file_path = '/content/PS_queries.json'
with open(queries_file_path, 'r') as file:
    queries = json.load(file)

print(documentation)
print(examples)
print(queries)

[{'tool_name': 'artifacts.locate', 'description': 'Gets the download URL for the artifact.', 'arguments': [{'argument_name': 'id', 'argument_description': 'The ID of the artifact to get the URL for.', 'argument_type': 'string', 'examples': 'ARTIFACT-12345'}, {'argument_name': 'version', 'argument_description': 'The version of the artifact that needs to be fetched.', 'argument_type': 'string', 'examples': []}]}, {'tool_name': 'artifacts.prepare', 'description': 'Creates an artifact and generates an upload URL for its data.\n', 'arguments': []}, {'tool_name': 'auth-tokens.create', 'description': 'Creates a JWT corresponding to the requested token type for the\nauthenticated user.\n', 'arguments': []}, {'tool_name': 'auth-tokens.delete', 'description': 'Revokes the token that matches the given token ID issued under the\ngiven Dev organization.\n', 'arguments': []}, {'tool_name': 'auth-tokens.get', 'description': 'Gets the token metadata corresponding to the given token ID under the\ngiven

## Retrievers for relevant tools and examples
- The following section implements the retriever to filter out only the relevant tools and examples from the complete tool description bank and usage examples respectively.
- The `get_tools` function returns the set of relevant tools,their arguments and the cost of embeddings based on the user query, similarily `get_examples` function returns the set of relevant tool usage examples with the cost of embeddings.
- Additionally, it calculates the number of tokens used to create embeddings.

In [None]:
# Inputs a string and returns the number of tokens in the string

import tiktoken

def num_tokens_from_string(string: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding("cl100k_base")
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [None]:
#Applying semantic search on the API descriptions

API_descriptions = [
    Document(page_content=t['description'], metadata={"index": i})
    for i, t in enumerate(documentation)
]
API_descriptions_vector_store = FAISS.from_documents(API_descriptions, OpenAIEmbeddings())
API_retriever = API_descriptions_vector_store.as_retriever()

def get_tools(query):
    all_clauses = get_clauses(query)
    all_docs = []

    n_tokens_p = 0.0
    for item in API_descriptions:
        n_tokens_p+= num_tokens_from_string(item.page_content)*0.0001/10000
    n_tokens_p *= len(all_clauses)

    for clauses in all_clauses:
        n_tokens_p += num_tokens_from_string(clauses)*0.0001/10000
        docs = API_retriever.get_relevant_documents(clauses)
        for doc in docs:
            if doc.metadata["index"] not in all_docs:
                all_docs.append(doc.metadata["index"])
    tool_names = [documentation[idx]['tool_name'] for idx in all_docs]
    tools_with_arguments = [documentation[idx] for idx in all_docs]
    return tool_names, tools_with_arguments, n_tokens_p

In [None]:
#Applying semantic search on the API usage examples

API_usage_examples = [
    Document(page_content=t['Query'], metadata={"index": i})
    for i, t in enumerate(examples)
]
API_usage_examples_vector_score = FAISS.from_documents(API_usage_examples, OpenAIEmbeddings())
Examples_retriever = API_usage_examples_vector_score.as_retriever()
def get_examples(query):
    n_tokens_p=0
    n_tokens_p = num_tokens_from_string(query)*0.0001/10000
    for item in API_usage_examples:
        n_tokens_p+= num_tokens_from_string(item.page_content)*0.0001/10000
    docs = Examples_retriever.get_relevant_documents(query)
    return [examples[d.metadata["index"]] for d in docs] , n_tokens_p

# PIRO Prompts

## Planning

In [None]:
planning_template ="""
Given a query and a list of APIs with their arguments, find whether the APIs can solve the query or not. If not, return empty list. Else, extract values or variables from the query that correspond to each argument of the tools that needs to be called. If some argument value isn't known, use tools that can retrieve those argument values. Return a JSON file containing the tools in the order they need to be called. Identify cases where an API argument depends on the output of a previous API call. Replace such arguments with the notation $$PREV[i] where 'i' represents the order of the previous API call in the chain. Do NOT explain yourself, only return the JSON.

Query: {query}
Tools: {tools}

Answer:
"""

## Improvement with Examples

Note: Same template will be used for the reflection process also

In [None]:
improvement_template = """
Given a list of examples, each containing a query and the corresponding APIs to be called to solve the query, find whether the JSON present in current solution solves the current query.
If yes, modify its format to match with that of solutions in examples and return the modified JSON.
If not, modify the current JSON solution to include necessary tools from list of available tools.
If no solution is available, return a blank list.
Remember to use double quotes instead of single quotes in JSON output.

Examples : {examples}

Current Query : {query}
Current Solution : {solution}

Answer:
"""

## Optimization

In [None]:
optimization_template = """
Given a query, solution and available APIs, optimize the number of APIs calls in the solution. Do not use API calls unless absolutely necessary. Look for redundancy and check for mistakes in the combination of API calls. Return current solution if no changes are necessary.

Current Query : {query}
Current Solution : {solution}
Available APIs : {tools}

Final solution (same format as current solution):
"""

# Execution

The ```get_result``` function return the final predicted output,the total time taken to generate the output, and the cost given a user query

>Note: To avoid the ```RateLimitError``` keep the sleep time >10 sec

In [None]:
import time
import pandas as pd

model_name = 'gpt-3.5-turbo'
df = pd.DataFrame(columns=['query', 'f_output','time','cost'])

In [None]:
def get_result(Query):
    cost = 0
    start = time.time()

    #Getting the relevant APIs
    _, Tools , c1 = get_tools(Query)

    #Selecting the first 3 relevant examples
    Examples, c2 = get_examples(Query)[:3]

    #Calculating the cost of the API calls
    cost = cost + c1+ c2

    #Planning
    planning_prompt = PromptTemplate.from_template(template=planning_template)
    planning_chain = planning_prompt | ChatOpenAI(temperature=0.1,model_name=model_name) | StrOutputParser()
    #Calculating the cost of the API calls
    with get_openai_callback() as cb:
      Solution1 = planning_chain.invoke({"query":Query, "tools":Tools})
      cost+=cb.total_cost
    print(f"Output 1: {Solution1}")

    #Improvement
    improvement_prompt = PromptTemplate.from_template(template=improvement_template)
    improvement_chain = improvement_prompt | ChatOpenAI(temperature=0,model_name=model_name) | StrOutputParser()
    #Calculating the cost of the API calls
    with get_openai_callback() as cb:
      Solution2 = improvement_chain.invoke({"examples":Examples, "query":Query, "solution":Solution1})
      cost+=cb.total_cost
    print(f"Output 2: {Solution2}")

    #Reflection
    improvement_prompt = PromptTemplate.from_template(template=improvement_template)
    improvement_chain = improvement_prompt | ChatOpenAI(temperature=0,model_name=model_name) | StrOutputParser()
    #Calculating the cost of the API calls
    with get_openai_callback() as cb:
      Solution3 = improvement_chain.invoke({"examples":Examples, "query":Query, "solution":Solution2})
      cost+=cb.total_cost
    print(f"Output 3: {Solution3}")

    #Optimization
    optimization_prompt = PromptTemplate.from_template(template=optimization_template)
    optimization_chain = optimization_prompt | ChatOpenAI(temperature=0,model_name=model_name) | StrOutputParser()
    #Calculating the cost of the API calls
    with get_openai_callback() as cb:
      Solution4 = optimization_chain.invoke({"query":Query, "solution":Solution3, "tools":Tools})
      cost+=cb.total_cost
    print(f"Output 4: {Solution4}")

    end = time.time()
    time_taken = end-start

    return Solution4 , time_taken , cost

In [None]:
#Running the pipeline on the queries
count = 1
for query in queries:
  print("Query no. - ", count)
  print(query)
  f_output ,time_taken , cost = get_result(query)
  f_output = f_output.strip("```").lstrip("json")

  df.loc[len(df.index)] = [query, f_output, time_taken, cost]
  time.sleep(10)
  count+=1
  print("=========================================================================")

Query no. -  1
Add work items 'AB12-XYZ' and 'DEF-456' to sprint 'Sprint123' and retrieve the ID of the current sprint.
Output 1: ```json
[
  {
    "tool": "get_sprint_id",
    "arguments": {}
  },
  {
    "tool": "add_work_items_to_sprint",
    "arguments": {
      "work_ids": ["AB12-XYZ", "DEF-456"],
      "sprint_id": "$$PREV[0]"
    }
  }
]
```
Output 2: ```json
[
  {
    "tool_name": "get_sprint_id_by_name",
    "arguments": [
      {
        "argument_name": "sprint_name",
        "argument_value": "Sprint123"
      }
    ]
  },
  {
    "tool_name": "add_work_items_to_sprint",
    "arguments": [
      {
        "argument_name": "work_ids",
        "argument_value": ["AB12-XYZ", "DEF-456"]
      },
      {
        "argument_name": "sprint_id",
        "argument_value": "$$PREV[0]"
      }
    ]
  }
]
```
Output 4: ```json
[
  {
    "tool_name": "get_sprint_id",
    "arguments": []
  },
  {
    "tool_name": "add_work_items_to_sprint",
    "arguments": [
      {
        "argument_na



Output 1: ```json
[
  {
    "tool": "works_list",
    "arguments": {
      "issue.rev_orgs": ["REV-123"],
      "ticket.severity": ["high", "medium"],
      "owned_by": ["DEVU-123"]
    }
  }
]
```
Output 2: ```json
[
  {
    "tool_name": "works_list",
    "arguments": [
      {
        "argument_name": "ticket.rev_org",
        "argument_value": "REV-123"
      },
      {
        "argument_name": "ticket.severity",
        "argument_value": ["high", "medium"]
      },
      {
        "argument_name": "owned_by",
        "argument_value": "DEVU-123"
      }
    ]
  }
]
```
Output 4: ```json
[
  {
    "tool_name": "works_list",
    "arguments": [
      {
        "argument_name": "ticket.rev_org",
        "argument_value": "REV-123"
      },
      {
        "argument_name": "ticket.severity",
        "argument_value": ["high", "medium"]
      },
      {
        "argument_name": "owned_by",
        "argument_value": "DEVU-123"
      }
    ]
  }
]
```
Query no. -  8
Retrieve work items cre

  "error": {
    "message": "The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID e63f27b58d7e5ed882f40e6e41e9a051 in your message.)",
    "type": "server_error",
    "param": null,
    "code": null
  }
}
 500 {'error': {'message': 'The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID e63f27b58d7e5ed882f40e6e41e9a051 in your message.)', 'type': 'server_error', 'param': None, 'code': None}} {'Date': 'Tue, 12 Dec 2023 10:35:11 GMT', 'Content-Type': 'application/json', 'Content-Length': '366', 'Connection': 'keep-alive', 'access-control-allow-origin': '*', 'openai-organization': 'user-pooaeyireisstw1wayrpzylz', 'openai-processing-ms': '90019', 'openai-version': '2020-10-01', '

Output 1: ```json
[
    {
        "tool": "works.list",
        "arguments": {
            "issue.rev_orgs": ["REV-123"]
        }
    },
    {
        "tool": "get_similar_work_items",
        "arguments": {
            "work_id": "$$PREV[0]"
        }
    },
    {
        "tool": "summarize_objects",
        "arguments": {
            "objects": "$$PREV[1]"
        }
    }
]
```
Output 2: ```json
[
    {
        "tool_name": "works_list",
        "arguments": [
            {
                "argument_name": "issue.rev_orgs",
                "argument_value": "REV-123"
            }
        ]
    },
    {
        "tool_name": "get_similar_work_items",
        "arguments": [
            {
                "argument_name": "work_id",
                "argument_value": "ENH-123"
            }
        ]
    },
    {
        "tool_name": "summarize_objects",
        "arguments": [
            {
                "argument_name": "objects",
                "argument_value": "$$PREV[1]"
        

In [None]:
df

Unnamed: 0,query,f_output,time,cost
0,Add work items 'AB12-XYZ' and 'DEF-456' to spr...,"\n[\n {\n ""tool_name"": ""get_sprint_id"",\n ...",35.58404,0.05497
1,Search for work items similar to 'WK-789' and ...,"\n[\n {\n ""tool_name"": ""get_similar_work_i...",67.974623,0.08317
2,"Search for 'Item123', prioritize similar items...","\n[\n {\n ""tool_name"": ""works_list"",\n ...",40.737921,0.061159
3,"Search for 'Object456', add its ID to 'Sprint7...","\n[\n {\n ""tool_name"": ""search_object_by_n...",58.61255,0.047
4,Retrieve work items owned by 'DEVU-123' in 'FE...,"\n[\n {\n ""tool_name"": ""works_list"",\n ...",61.400264,0.06661
5,Search for 'CAPL-123' and 'PROD-123' by name. ...,"\n[\n {\n ""tool_name"": ""works_list"",\n ...",44.267059,0.07033
6,Filter work items in 'REV-123' with severity '...,"\n[\n {\n ""tool_name"": ""works_list"",\n ...",39.57218,0.047221
7,Retrieve work items created by 'DEVU-123' in '...,"\n[\n {\n ""tool_name"": ""works_list"",...",36.344761,0.07205
8,Find work items related to 'REV-123'. Get simi...,"\n[\n {\n ""tool_name"": ""works_list"",...",139.648842,0.0763
9,Search for 'CAPL-123' and 'ENH-123'. Retrieve ...,"\n[\n {\n ""tool_name"": ""works.list"",\n ...",32.618044,0.0474


In [None]:
#Exporting the results to excel files
df.to_excel("Queries and output.xlsx")