In [3]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from llm_client import VicunaLLM
from langchain.memory import ConversationBufferMemory
from langchain.agents import Tool, AgentType, initialize_agent


In [4]:
from llm_client import VicunaLLM

In [5]:
llm = VicunaLLM()

In [None]:
##Create chunk for QnA of the document
print("Creating Chunks...")
with open("airplanehistory.txt") as f:
    book = f.read()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_text(book)
texts[15]

In [2]:

from langchain.embeddings import HuggingFaceInstructEmbeddings

instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl", 
                                                      model_kwargs={"device": "cuda"})

  from tqdm.autonotebook import trange


load INSTRUCTOR_Transformer
max_seq_length  512


In [8]:
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
persist_directory = 'db'

## Here is the nmew embeddings being used
embedding = instructor_embeddings


vectordb = Chroma.from_texts(
    texts, embedding, metadatas=[{"source": str(i)} for i in range(len(texts))],
                                 persist_directory=persist_directory)
# persiste the db to disk
vectordb.persist()
# Now we can load the persisted database from disk, and use it as normal. 
# vectordb = Chroma(persist_directory=persist_directory, 
#                   embedding_function=embedding)



Using embedded DuckDB with persistence: data will be stored in: db


In [31]:
retriever = vectordb.as_retriever(search_kwargs={"k": 5})
docs = retriever.get_relevant_documents("When was the first flight made and who all were present?")
docs[1]

Document(page_content='The first flights with the power machine were made on December 17, 1903.\nOnly five persons besides ourselves were present. These were Messrs.\nJohn T. Daniels, W. S. Dough, and A. D. Etheridge, of the Kill Devil\nLife-Saving Station; Mr. W. C. Brinkley, of Manteo; and Mr. John Ward,\nof Naghead. Although a general invitation had been extended to the\npeople living within five or six miles, not many were willing to face\nthe rigors of a cold December wind in order to see, as they no doubt\nthought, another flying machine not fly. The first flight lasted only 12\nseconds, a flight very modest compared with that of birds, but it was,\nnevertheless, the first in the history of the world in which a machine\ncarrying a man had raised itself by its own power into the air in free\nflight, had sailed forward on a level course without reduction of speed,\nand had finally landed without being wrecked. The second and third\nflights were a little longer, and the fourth laste

In [34]:
from langchain.chains import RetrievalQA
# create the chain to answer questions 
qa_chain = RetrievalQA.from_chain_type(llm=llm, 
                                  chain_type="stuff", 
                                  retriever=retriever, 
                                  return_source_documents=True)

import textwrap
def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

def process_llm_response(llm_response):
    print(wrap_text_preserve_newlines(llm_response['result']))
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [35]:
# Query the question 
query = "When was the first flight made and which attempt was the longest flight duration?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 The first flight was made on December 17, 1903. The fourth flight was the longest, lasting 59 seconds.


Sources:
24
24
56
56
20


In [None]:
llm_response

In [50]:

from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.prompts import StringPromptTemplate
from langchain import OpenAI, SerpAPIWrapper, LLMChain
from typing import List, Union, Any, Optional, Type
from langchain.schema import AgentAction, AgentFinish
import re
from langchain import PromptTemplate
from langchain.tools import BaseTool
from langchain.utilities import GoogleSerperAPIWrapper

from llm_client import VicunaLLM

template = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Answer the following questions as best you can. You have access to the following tools:

Google Search: A wrapper around Google Search. Useful for when you need to answer questions about current events. The input is the question to search relavant information.
Before providing a definitive answer,no need to double-checkcredibility of these sources.

Strictly use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [Google Search]
Action Input: the input to the action, should be a question.
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

For examples:
Question: How old is CEO of Microsoft wife?
Thought: First, I need to find who is the CEO of Microsoft.
Action: Google Search
Action Input: Who is the CEO of Microsoft?
Observation: Satya Nadella is the CEO of Microsoft.
Thought: Now, I should find out Satya Nadella's wife.
Action: Google Search
Action Input: Who is Satya Nadella's wife?
Observation: Satya Nadella's wife's name is Anupama Nadella.
Thought: Then, I need to check Anupama Nadella's age.
Action: Google Search
Action Input: How old is Anupama Nadella?
Observation: Anupama Nadella's age is 50.
Thought: I now know the final answer.
Final Answer: Anupama Nadella is 50 years old.

### Input:
{input}

### Response:
{agent_scratchpad}"""

temp_Ins = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Question: {thought}
Query: {query}
Observation: {observation}

### Input:
Make a short summary of useful information from the result observation that is related to the question.

### Response:"""

prompt_Ins = PromptTemplate(
    input_variables=["thought", "query", "observation"],
    template=temp_Ins,
)


class CustomPromptTemplate(StringPromptTemplate):
    """Schema to represent a prompt for an LLM.

    Example:
        .. code-block:: python

            from langchain import PromptTemplate
            prompt = PromptTemplate(input_variables=["foo"], template="Say {foo}")
    """

    input_variables: List[str]
    """A list of the names of the variables the prompt template expects."""

    template: str
    """The prompt template."""

    template_format: str = "f-string"
    """The format of the prompt template. Options are: 'f-string', 'jinja2'."""

    validate_template: bool = False
    """Whether or not to try validating the template."""
    

    def format(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        if len(intermediate_steps) > 0:
            regex = r"Thought\s*\d*\s*:(.*?)\nAction\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)\nObservation"
            text_match = intermediate_steps[-1][0].log
            print("intermediate step:")
            #print(text_match)
            if len(intermediate_steps) > 1:
                text_match = 'Thought: ' + text_match + '\nObservation'
                print("intermediate step more than 1:")
                print(text_match)
            match = re.search(regex, text_match, re.DOTALL)  
            #print("matches with regex")
            #print(match)
            my_list = list(intermediate_steps[-1])
            p_INS_temp = prompt_Ins.format(thought=match.group(1).strip(), query=match.group(3).strip(), observation=my_list[1])
            my_list[1] = llm(p_INS_temp)
            my_tuple = tuple(my_list)            
            intermediate_steps[-1] = my_tuple
            
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f" {observation}\nThought:"
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        return self.template.format(**kwargs)
    
prompt = CustomPromptTemplate(input_variables=["input", "intermediate_steps"],
                              template=template,validate_template=False)

In [51]:
class CustomOutputParser(AgentOutputParser):
    
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        # Check if agent should finish
        if "Final Answer:" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        # Parse out the action and action input
        regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        #print(match)
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        # Return the action and action input
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
output_parser = CustomOutputParser()

In [52]:
import os
os.environ["SERPER_API_KEY"] = ''
search = GoogleSerperAPIWrapper()
tool_google = Tool(
        name="Google Search",
        func=search.run,
        description="useful for when you need to ask with search"
    )
tool_names = [tool_google]

In [54]:
llm_chain = LLMChain(llm=llm, prompt=prompt)
agent = LLMSingleActionAgent(
    llm_chain=llm_chain, 
    output_parser=output_parser,
    stop=["\nObservation:", "\nObservations:"], 
    allowed_tools=tool_names
)
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tool_names, verbose=True)

In [55]:
agent_executor.run("which phone has top DXOmark score for camera in android in 2023?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Thought: First, I need to understand what DXOmark means and how it rates cameras.
Action: Google Search
Action Input: What is DXOmark and how does it rate cameras?
Observation:[0m

Observation:[36;1m[1;3mFor our DxOMark camera sensor reviews, we measure the image quality performance only of camera sensors that are capable of capturing images in RAW format, ... Missing: Observation. The restructured scoring system (see above: zoom and wide are now combined in the Zoom sub-score, which is on par now with Photo and Video) means that devices ... DXOMARK's Camera (previously called DxOMark Mobile) scores and rankings are based on testing every camera using an identical, extensive process ... Missing: Observation. First, apertures can be larger, which means more information can be captured in a given exposure, which in turn means less noise in captured images. Second, the ... The new DXOMARK Camera ranking. Just like Exposure, 

'According to DXOmark ratings, the Huawei P60 Pro is the phone with the top DXOmark score for camera in Android in 2023.'

In [59]:
agent_executor.run("who was captain of team that won the first season of ICC Men's T20 World Cup?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Thought: First, I need to make sure it's the correct tournament and year.
Action: Google Search
Action Input: When did the first season of ICC Men's T20 World Cup happen?
Observation:[0m

Observation:[36;1m[1;3mThe first season of Twenty20 in England was a relative success. The first Twenty20 match held at Lord's, on 15 July 2004 between Middlesex and Surrey, attracted ... The first official Twenty20 matches were played on 13 June 2003 between the English counties in the T20 Blast. The first season of Twenty20 in England was a ... The inaugural match of the T20 World Cup was held in 2007, hosted by South Africa. The rest, as we know, is history. No, the ODI World Cup has been held in Australia in 1992 and 2015, but this is the first time they are hosting the T20 World Cup. It was ... Missing: Observation. Everything you need to know for Sunday's ICC Men's T20 World Cup final between Pakistan and England at the MCG. The fi

"Mahendra Singh Dhoni was the captain of the winning team in the first season of ICC Men's T20 World Cup."

In [60]:
llm.predict("""A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions/tasks.
USER: Summarize the conversation between 2 people given inside tripple backticks ```

```Sam: hey overheard rick say something 
Sam: i don't know what to do :-/ 
Naomi: what did he say?? 
Sam: he was talking on the phone with someone 
Sam: i don't know who 
Sam: and he was telling them that he wasn't very happy here 
Naomi: damn!!! 
Sam: he was saying he doesn't like being my roommate 
Naomi: wow, how do you feel about it? 
Sam: i thought i was a good rommate 
Sam: and that we have a nice place 
Naomi: that's true man!!! 
Naomi: i used to love living with you before i moved in with me boyfriend 
Naomi: i don't know why he's saying that 
Sam: what should i do??? 
Naomi: honestly if it's bothering you that much you should talk to him
Naomi: see what's going on 
Sam: i don't want to get in any kind of confrontation though 
Sam: maybe i'll just let it go 
Sam: and see how it goes in the future 
Naomi: it's your choice sam 
Naomi: if i were you i would just talk to him and clear the air.```
ASSISTANT:"""
)

' Sam overhears Rick saying something negative about their living situation on the phone. Naomi suggests talking to Rick to address the issue, but Sam is hesitant due to potential conflict.'