In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### Making Jupyter Notebook eligible for the code run

In [130]:
# !pip install playwright
# !playwright install

### Installing Dependencies

In [8]:
#!pip install torch --index-url https://download.pytorch.org/whl/cu118
#!pip install langchain tiktoken wikipedia  duckduckgo-search bitsandbytes trl
#!pip install -q accelerate==0.21.0

In [3]:
print('Successfully installed bitsandbytes-0.41.3.post2 curl-cffi-0.5.10 duckduckgo-search-4.1.0 jsonpatch-1.33 langchain-0.0.351 langchain-community-0.0.4 langchain-core-0.1.1 langsmith-0.0.72 packaging-23.2 shtab-1.6.5 tiktoken-0.5.2 trl-0.7.4 tyro-0.6.0 wikipedia-1.4.0')

Successfully installed bitsandbytes-0.41.3.post2 curl-cffi-0.5.10 duckduckgo-search-4.1.0 jsonpatch-1.33 langchain-0.0.351 langchain-community-0.0.4 langchain-core-0.1.1 langsmith-0.0.72 packaging-23.2 shtab-1.6.5 tiktoken-0.5.2 trl-0.7.4 tyro-0.6.0 wikipedia-1.4.0


In [114]:
device='cuda'
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0


### Parameters to quantize the model

In [6]:
import torch
import transformers
from transformers import BitsAndBytesConfig

use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = True #double quantization

compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

### Readying the model

In [7]:
from transformers import AutoModelForCausalLM
model_name = "/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1"

model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    quantization_config = bnb_config, #quantization config file
    do_sample=True,
    device_map='auto'
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Tokenization

In [75]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name,
                                         add_eos_token=True,
                                         eos_token_id=2)
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"

### Creating a text generation transformers(HF) pipeline

In [153]:
from transformers import pipeline
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.1,
    return_full_text=False,
    max_new_tokens=200,
)

from langchain.llms import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

### Instantiating the langchain utilities

In [185]:
from langchain.utilities import WikipediaAPIWrapper
wikipedia = WikipediaAPIWrapper(top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=800)

from langchain.tools import DuckDuckGoSearchRun
search = DuckDuckGoSearchRun()

### Instantiating the langchain tools

In [186]:
from langchain.agents import Tool 
tools = []

wikipedia_tool = Tool(    
    name="wikipedia",    
    func=wikipedia.run,    
    description="wikipedia tool" )

duckduckgo_tool = Tool(    
    name="search",    
    func=search.run,    
    description="web search tool")

tools.append(wikipedia_tool)
tools.append(duckduckgo_tool) 

In [187]:
print(tools)

[Tool(name='wikipedia', description='wikipedia search tool', func=<bound method WikipediaAPIWrapper.run of WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from '/opt/conda/lib/python3.10/site-packages/wikipedia/__init__.py'>, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=800)>), Tool(name='search', description='web search tool', func=<bound method BaseTool.run of DuckDuckGoSearchRun()>)]


In [188]:
duckduckgo_tool?

[0;31mSignature:[0m      [0mduckduckgo_tool[0m[0;34m([0m[0mtool_input[0m[0;34m:[0m [0;34m'str'[0m[0;34m,[0m [0mcallbacks[0m[0;34m:[0m [0;34m'Callbacks'[0m [0;34m=[0m [0;32mNone[0m[0;34m)[0m [0;34m->[0m [0;34m'str'[0m[0;34m[0m[0;34m[0m[0m
[0;31mType:[0m           Tool
[0;31mString form:[0m    name='search' description='web search tool' func=<bound method BaseTool.run of DuckDuckGoSearchRun()>
[0;31mFile:[0m           /opt/conda/lib/python3.10/site-packages/langchain_core/tools.py
[0;31mDocstring:[0m      Tool that takes in function or coroutine directly.
[0;31mInit docstring:[0m Initialize tool.
[0;31mCall docstring:[0m Make tool callable.

### Initializaing the Agent for zero shot Retrieval Action Chain

In [193]:
from langchain.agents import initialize_agent 


N=3
zero_shot_agent = initialize_agent(   
    tools=tools,    
    llm=llm,    
    verbose=True,    
    max_interations=2, 
    handle_parsing_errors=True
)

In [194]:
initialize_agent?

[0;31mSignature:[0m
[0minitialize_agent[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mtools[0m[0;34m:[0m [0mSequence[0m[0;34m[[0m[0mlangchain_core[0m[0;34m.[0m[0mtools[0m[0;34m.[0m[0mBaseTool[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mllm[0m[0;34m:[0m [0mlangchain_core[0m[0;34m.[0m[0mlanguage_models[0m[0;34m.[0m[0mbase[0m[0;34m.[0m[0mBaseLanguageModel[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0magent[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mlangchain[0m[0;34m.[0m[0magents[0m[0;34m.[0m[0magent_types[0m[0;34m.[0m[0mAgentType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcallback_manager[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mlangchain_core[0m[0;34m.[0m[0mcallbacks[0m[0;34m.[0m[0mbase[0m[0;34m.[0m[0mBaseCallbackManager[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0magent_path[0m[0;34m:[0m [0mOptional[0m[0;34m[[0

In [195]:
print(zero_shot_agent)

verbose=True tags=['zero-shot-react-description'] agent=ZeroShotAgent(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['agent_scratchpad', 'input'], template='Answer the following questions as best you can. You have access to the following tools:\n\nwikipedia: wikipedia search tool\nsearch: web search tool\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [wikipedia, search]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: {input}\nThought:{agent_scratchpad}'), llm=HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x78c6091077f0>)), output_parser=MRKLOutputParser(), allowed_tools=['wi

In [196]:
zero_shot_agent.run?

[0;31mSignature:[0m
[0mzero_shot_agent[0m[0;34m.[0m[0mrun[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0margs[0m[0;34m:[0m [0mAny[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcallbacks[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mList[0m[0;34m[[0m[0mlangchain_core[0m[0;34m.[0m[0mcallbacks[0m[0;34m.[0m[0mbase[0m[0;34m.[0m[0mBaseCallbackHandler[0m[0;34m][0m[0;34m,[0m [0mlangchain_core[0m[0;34m.[0m[0mcallbacks[0m[0;34m.[0m[0mbase[0m[0;34m.[0m[0mBaseCallbackManager[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtags[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mList[0m[0;34m[[0m[0mstr[0m[0;34m][0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmetadata[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mDict[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mAny[0m[0;34m][0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;

### Examples

In [197]:
product='SageMaker'

In [198]:
zero_shot_agent.run(product)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should use the wikipedia search tool to find out more about SageMaker
Action: wikipedia
Action Input: SageMaker[0m

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Observation: [36;1m[1;3mPage: Amazon SageMaker
Summary: Amazon SageMaker is a cloud based machine-learning platform that allows the creation, training, and deployment by developers of machine-learning (ML) models on the cloud. It can be used to deploy ML models on embedded systems and edge-devices. SageMaker was launched in November 2017.[0m
Thought:[32;1m[1;3m I should use the wikipedia search tool to find out more about SageMaker
Action: wikipedia
Action Input: SageMaker[0m

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Observation: [36;1m[1;3mPage: Amazon SageMaker
Summary: Amazon SageMaker is a cloud based machine-learning platform that allows the creation, training, and deployment by developers of machine-learning (ML) models on the cloud. It can be used to deploy ML models on embedded systems and edge-devices. SageMaker was launched in November 2017.[0m
Thought:[32;1m[1;3m I should use the web search tool to find out more about SageMaker
Action: search
Action Input: SageMaker[0m

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Observation: [33;1m[1;3mAmazon SageMaker Feature Store is a fully managed, purpose-built repository to store, share, and manage features for machine learning (ML) models. Features are inputs to ML models used during training and inference. For example, in an application that recommends a music playlist, features could include song ratings, listening duration, and ... SageMaker HyperPod accelerates FM training at scale: Many organizations want to train their own models using graphics processing units (GPU)-based and Trainium-based compute instances at low cost. However, the volume of data, size of the models, and time required for training models has exponentially increased the complexity of training a model ... The SageMaker Studio UI enables you to access and discover SageMaker resources and ML tooling such as Jobs, Endpoints, and Pipelines in a consistent manner, regardless of your IDE of choice. SageMaker Studio contains a default private space that only you can access and run in

'Amazon SageMaker is a cloud based machine-learning platform that allows the creation, training, and deployment by developers of machine-learning (ML) models on the cloud. It can be used to deploy ML models on embedded systems and edge-devices. SageMaker was launched in November 2017. It has many features such as Amazon SageMaker Feature Store, SageMaker HyperPod accelerates FM training at scale, and SageMaker Studio UI.'

### Examples

In [199]:
import time

In [205]:
product2 = 'AWS lambda'

In [None]:
def generate_description(product):
    start_time = time.time()
    description = zero_shot_agent.run(f"Fetch basic information on {product}. \
    and write an introductory three-line description on it.")
    print(description)
    end_time = time.time()
    print(f'\nTime Taken= {end_time - start_time}')
    return description

In [209]:
generate_description(product2)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should start by searching for the software on wikipedia.
Action: wikipedia
Action Input: AWS lambda[0m

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Observation: [36;1m[1;3mPage: AWS Lambda
Summary: AWS Lambda is an event-driven, serverless computing platform provided by Amazon as a part of Amazon Web Services. It is designed to enable developers to run code without provisioning or managing servers. It executes code in response to events and automatically manages the computing resources required by that code. It was introduced on November 13, 2014.Node.js, Python, Java, Go, Ruby, and C# (through .NET) are all officially supported as of 2018. In late 2018, custom runtime support was added to AWS Lambda.
AWS Lambda supports running native Linux executables by calling them from a supported runtime, such as Node.js. For example, Haskell code can be run on Lambda.AWS Lambda was primarily designed for tasks such as handling image and object uploads to Amazon S3, up[0m
Thought:[32;1m[1;3m I should also search for the software on the web.
Action: search
Action Input: AWS lambda[0m

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Observation: [33;1m[1;3mAWS Lambda Last updated September 27, 2023 Implementation Enter the Lambda console Select a Lambda blueprint and configure it Review Lambda settings Invoke Lambda function and verify results Monitor your metrics Delete the Lambda function Conclusion Congratulations! You have created your first AWS Lambda function. Learn how to use AWS Lambda, a serverless computing platform that executes your code based on events from AWS services. See how to upload code, trigger events, use concepts, compare with AWS EC2 and Elastic Beanstalk, and explore use cases and best practices of AWS Lambda function. AWS Lambda is an event-driven computing service that runs code and manages the underlying resources automatically. It supports different programming languages, triggers, and integrations with other AWS services. Learn what AWS Lambda is, how it works, its features, pricing, and examples. AWS Lambda is a compute service that runs your code in response to events and automat

'AWS Lambda is an event-driven, serverless computing platform provided by Amazon as a part of Amazon Web Services. It is designed to enable developers to run code without provisioning or managing servers. It executes code in response to events and automatically manages the computing resources required by that code. It supports different programming languages, triggers, and integrations with other AWS services. AWS Lambda now supports Amazon Linux 2023 (AL2023) as a managed runtime and container base image.'

### Final Comments

- The above code is not full proof. The tools and agents developed by LangChain are not full proof. As of 20/12/23, they seem to be mostly in development phase. 
- I ran the code on Kaggle. But using paid GPUs of collab would have given my the functionality of working in a browser. The Retrival Tools needs to fetch data from external websites. And it seems after some time, they forget access.
- I could haveused Google Search API from langchain as well, but it is paid. 
- But the code is a good exmaple of two things:
    - One, langchain is not full proof developer ready. We depend too much on the prompts already generated for us.
    - The workflow of a typical LLM based search query is easily visible in this code.