# Reverse Chain

##  Importing necessary libraries

Ensure that the Python environment you are running this in has all the libraries present in [requirements.txt](requirements.txt).


In [None]:
import langchain
import openai
import json
import os
import pandas as pd
import time

from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document

from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.agents import AgentExecutor
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.schema import StrOutputParser
from langchain.chat_models import ChatOpenAI

## OpenAI API Key


To set the OPENAI_API_KEY as an environment variable run the following command:
```
export OPENAI_API_KEY = <Your API Key>
```


## Loading the API documentation and usage examples

The required datasets [.json] -
- API/Tool Descriptions (api_desc.json)
- Tool Usage Example (examples.json)
- Queries (PS_queries.json)
  
  These can be loaded either by 
  1. Uploading the datasets on google drive and using its id to use `gdown` method (as shown).
  2. By directly uploading the datasets to the runtime 


In [None]:
!gdown 1gG6Ghpkjxqz7vlPjCs2pTdmOSqjt0EIM
!gdown 1nJZpWHRdowbdzdRI7ylb1RrZUV_2E6k9
!gdown 1JCjW2f0fTsL6W7r7QjQO2FrwKNUj37ao

Downloading...
From: https://drive.google.com/uc?id=1gG6Ghpkjxqz7vlPjCs2pTdmOSqjt0EIM
To: /content/api_desc.json
100% 8.81k/8.81k [00:00<00:00, 34.0MB/s]
Downloading...
From: https://drive.google.com/uc?id=1nJZpWHRdowbdzdRI7ylb1RrZUV_2E6k9
To: /content/examples.json
100% 6.71k/6.71k [00:00<00:00, 25.2MB/s]


In [None]:
#Loading the JSON files as a list  

documentation_file_path = '/content/api_desc.json'
with open(documentation_file_path, 'r') as file:
    documentation = json.load(file)


examples_file_path = '/content/examples.json'
with open(examples_file_path, 'r') as file:
    examples = json.load(file)
    
queries_file_path = '/content/PS_queries.json'
with open(queries_file_path, 'r') as file:
    queries = json.load(file)

print(documentation)
print(examples)
print(queries)

[{'tool': 'works_list', 'description': 'Returns a list of work items matching the request', 'arguments': [{'argument_name': ' applies_to_part', 'argument_description': 'Filters for work belonging to any of the provided parts', 'argument_type': 'array of strings', 'examples': ['FEAT-123', 'ENH-123', 'PROD-123', 'CAPL-123']}, {'argument_name': 'created_by', 'argument_description': 'Filters for work created by any of these users', 'argument_type': 'array of strings', 'examples': ['DEVU-123']}, {'argument_name': 'issue.priority', 'argument_description': 'Filters for issues with any of the provided priorities. Allowed values: p0, p1, p2,p3', 'argument_type': 'array of strings', 'examples': []}, {'argument_name': 'issue.rev_orgs', 'argument_description': 'Filters for issues with any of the provided Rev organizations', 'argument_type': 'array of strings', 'examples': ['REV-123']}, {'argument_name': 'limit', 'argument_description': "The maximum number of works to return. The default is '50'", 

## Retrievers for relevant tools and examples

- The following section implements the retriever to filter out only the relevant tools and examples from the complete tool description bank and usage examples respectively.
- The `get_tools` function returns the set of relevant tools, and their arguments based on the user query, similarily `get_examples` function returns the set of relevant tool usage examples.
- The `retrieve_args` function returns the set of argument of a tool used for a specific query




In [None]:
#Applying semantic search on the API descriptions
API_descriptions = [
    Document(page_content=t['description'], metadata={"index": i})
    for i, t in enumerate(documentation)
]
#Making a vector store of the API descriptions embeddings
API_descriptions_vector_store = FAISS.from_documents(API_descriptions, OpenAIEmbeddings())
API_retriever = API_descriptions_vector_store.as_retriever()

#Function to get the relevant documents(tools) for a query
def get_tools(query):
    docs = API_retriever.get_relevant_documents(query)
    tools = [documentation[d.metadata["index"]]['tool'] for d in docs]
    arguments = [documentation[d.metadata["index"]] for d in docs]
    return tools, arguments

In [None]:
#Applying semantic search on the API usage examples

API_usage_examples = [
    Document(page_content=t['Query'], metadata={"index": i})
    for i, t in enumerate(examples)
]
#Making a vector store of the API usage examples embeddings
API_usage_examples_vector_score = FAISS.from_documents(API_usage_examples, OpenAIEmbeddings())
Examples_retriever = API_usage_examples_vector_score.as_retriever()

#Function to get the relevant documents(usage examples) for a query
def get_examples(query):
    docs = Examples_retriever.get_relevant_documents(query)
    return [examples[d.metadata["index"]] for d in docs]

In [None]:
#Funtion to retrieve the arguments for a tool for a given query

def retrieve_args(llm_tool,query):
    _, mytools = get_tools(query)
    for tool in mytools:
        if tool['tool'] == llm_tool:
            return tool['arguments']


[{'argument_name': 'objects',
  'argument_description': 'List of objects to summarize',
  'argument_type': 'array of objects',
  'examples': []}]

# Reverse Chain Prompts


In [None]:
r1_template = """ We are given some APIs -
{tools}
If someone is saying: "{query}"
Which final API should we use for this instruction?
Only return API code. Only return one word!"""

In [None]:
r2_template = """You are an argument extractor. For each argument, you need to
determine whether you can extract the value from user input
directly or you need to use an API to get the value. The output
should be in json format, key is the argument, and value is the
value of argument or the API name, return None if you cannot get
value or API name. Return only the json object.
The Arguments to be extracted are:
{args}
The API you can use includes:
{tools}
......
Now, Let's start.
=>
If someone is saying: "{query}"
Arguments :"""

# Execution

In [None]:
#Set the model you want to generate the solution(s) with
model = "gpt-4"

#Creating empty dataframes to store the results
df2 = pd.DataFrame(columns=['query', 'f_output','time'])

In [None]:
from collections import deque
def hasnoargs(args):
    if len(args)>1:
        return False
    else:
        if (args[0]['argument_name']):
            return False
        else:
            return True

# Funtion using the templates to generate the solution(s) for a query with the total computation time
def get_results_r(Query,model_name=model,temp=0.1):
    global r1_template, r2_template
    start = time.time()
    
    fin_solution = {'tools':[]}
    _,Tools = get_tools(Query)
    tool_names = [tool['tool'] for tool in Tools]
    r1_prompt = PromptTemplate.from_template(template=r1_template)
    r1_chain = r1_prompt | ChatOpenAI(temperature=temp,model_name=model_name) | StrOutputParser()
    r2_prompt = PromptTemplate.from_template(template=r2_template)
    r2_chain = r2_prompt | ChatOpenAI(temperature=temp,model_name=model_name) | StrOutputParser()
    last_tool = r1_chain.invoke({"query":Query, "tools":Tools})
    print(f"Output 1: {last_tool}")
    tools_left = deque([last_tool,])
    # df.loc[len(df.index)] = [planning_prompt.format(query=Query, tools=Tools), Solution1]
    i=0
    while (i<10 and tools_left):
        last_tool = tools_left.popleft()
        cur_args = retrieve_args(last_tool,Query)
        if hasnoargs(cur_args):
            fin_solution['tools'] = [{'tool':last_tool,'arguments':[]},] + fin_solution['tools']
            continue
        Solution2 = eval(r2_chain.invoke({"query":Query, "tools":Tools, "args":cur_args}).strip().strip('```').strip('json'))
        print(f"Output {2+i}: {Solution2}")
        for arg in Solution2.keys():
            if Solution2[arg] in tool_names:
                tools_left.append(Solution2[arg])
        fin_solution['tools'] = [{'tool':last_tool,'arguments':Solution2},] + fin_solution['tools']
        i += 1
    print(fin_solution)
    end = time.time()
    time = end-start 
    
    return fin_solution , time

In [None]:
for query in queries:
  f_output ,time_taken , cost ,_= get_results_r(query)
  f_output = f_output.strip("```").lstrip("json")
  df2 = df2.append({'query': query, 'f_output': f_output, 'time': time_taken}, ignore_index=True)
  time.sleep(5)

Output 1: add_work_items_to_sprint
Output 2: {'work_ids': 'get_similar_work_items', 'sprint_id': 'get_sprint_id'}
Output 3: {'work_id': 'WK-789', 'work_ids': 'get_similar_work_items', 'sprint_id': 'get_sprint_id'}
Output 4: {'work_id': 'WK-789'}
{'tools': [{'tool': 'get_sprint_id', 'arguments': []}, {'tool': 'get_similar_work_items', 'arguments': {'work_id': 'WK-789'}}, {'tool': 'get_sprint_id', 'arguments': []}, {'tool': 'get_similar_work_items', 'arguments': {'work_id': 'WK-789', 'work_ids': 'get_similar_work_items', 'sprint_id': 'get_sprint_id'}}, {'tool': 'add_work_items_to_sprint', 'arguments': {'work_ids': 'get_similar_work_items', 'sprint_id': 'get_sprint_id'}}]}


In [None]:
#exporting the saved results to a xlsx file

df2.to_excel('results.xlsx')