In [1]:
#pip install --upgrade openai
#pip install --upgrade transformers

### Understanding LangChain: A Modular Framework for LLMs

* LangChain is fundamentally a framework designed for Large Language Models (LLMs).

* It enables the development of various applications such as chatbots, Generative Question-Answering (GQA), content summarization, and beyond.

* The essence of the framework lies in its ability to "chain" diverse components, facilitating the creation of sophisticated functionalities utilizing LLMs.
  * Chains are composed of various elements across different modules, including:

* These are pre-designed templates tailored for specific interactions, ranging from chatbot dialogues to Explain Like I'm Five (ELI5) question-responding formats.

* This encompasses a range of Large Language Models such as ChatGPT, Bard, Claude, etc.
* Agents leverage LLMs to determine necessary actions. They can employ tools like web search or calculators, integrated into a cohesive operational loop.
* Incorporating both short-term and long-term memory functionalities.

* Our primary aim here is to delve into the functionality that enables the transformation of unstructured text into structured data, extracting valuable insights.

### Core Components of LangChain

* Chains are composed of various modules that can be combined to enhance the capabilities of LLMs.

Key Modules Include:

  * Prompt Templates: Customizable templates suited for different interaction styles, including chatbot  conversations.
  * LLMs: Incorporation of various Large Language Models such as ChatGPT, Bard, Claude, etc.
  *  Agents: Agents utilize LLMs to determine the necessary actions, employing tools like web searches or calculators within a logical operational loop.
  * Memory Modules: These include both short-term and long-term memory functionalities.



In [8]:
from openai import OpenAI
client = OpenAI()


prompt = """ What is the most populated city in the state of Hawaii. 
Provide city name and no additional information."""


response = client.chat.completions.create(
  model="gpt-4o",
  messages=[
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": prompt
        }
      ]
    },
    {
      "role": "assistant",
      "content": [
        {
          "type": "text",
          "text": "Honolulu"
        }
      ]
    }
  ],
  temperature=0.5,
  max_tokens=2048,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0,
  response_format={
    "type": "text"
  }
)
print(response)

ChatCompletion(id='chatcmpl-AOVj1u94YOzazBYUVVsKuWk15QdQP', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Honolulu', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1730405187, model='gpt-4o-2024-08-06', object='chat.completion', service_tier=None, system_fingerprint='fp_159d8341cc', usage=CompletionUsage(completion_tokens=2, prompt_tokens=34, total_tokens=36, completion_tokens_details=CompletionTokensDetails(audio_tokens=None, reasoning_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=0)))


In [13]:
response.choices[0].message.content

'Honolulu'

In [14]:
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
model = ChatOpenAI()

In [15]:
prompt_str = """What is the most populated city in the state of Hawaii. 
Provide city name and no additional information."""

prompt = PromptTemplate.from_template(prompt_str)


In [16]:
chain = prompt | model


In [17]:
chain.invoke({})

AIMessage(content='Honolulu', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 2, 'prompt_tokens': 28, 'total_tokens': 30, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-70a704b7-a9de-4866-9d9e-570e9ca46ecf-0', usage_metadata={'input_tokens': 28, 'output_tokens': 2, 'total_tokens': 30})

### Prompts Are First Class objects in LangChain

* Prompts can be easily tailored to incorporate runtime variables.
* They can also be customized with examples for more precise and context-relevant responses.

In [18]:
prompt_str = """What is the most populated city in the state of {state}.

Provide city name and no additional information."""

prompt = PromptTemplate.from_template(prompt_str)

In [19]:
chain = prompt | model

In [20]:
response = chain.invoke({"state": "Hawaii"})
response.content

'Honolulu'

In [21]:
response = chain.invoke({"state": "California"})
response.content

'Los Angeles'

In [22]:
response = chain.invoke({"state": "Georgia"})
response.content

'Atlanta'

In [23]:
prompt_str = """What is the most populated city in the state provided below.

Provide city name and no additional information. 

Examples:

State: Hawaii
City: Honolulu

State: California
City: Los Angeles

State: {state}
"""

prompt = PromptTemplate.from_template(prompt_str)

chain = prompt | model


In [24]:
response = chain.invoke({"state": "Georgia"})

response

AIMessage(content='City: Atlanta', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 3, 'prompt_tokens': 51, 'total_tokens': 54, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-a7b1eb9d-51a4-4b0d-a271-dd391f3e4170-0', usage_metadata={'input_tokens': 51, 'output_tokens': 3, 'total_tokens': 54})

In [25]:
response.content

'City: Atlanta'

In [26]:
prompt_str = """What is the most populated city in the state provided below.

Provide city name and no additional information. 

Examples:

State: Hawaii
{{"City": "Honolulu"}}

State: California
{{"City": "Los Angeles"}}

State: {state}
"""

prompt = PromptTemplate.from_template(prompt_str)

chain = prompt | model


In [27]:
response = chain.invoke({"state": "Georgia"})

response

AIMessage(content='{"City": "Atlanta"}', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 56, 'total_tokens': 62, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ff3d71f4-dce8-4295-8756-dc7044a7fa64-0', usage_metadata={'input_tokens': 56, 'output_tokens': 6, 'total_tokens': 62})

In [28]:
response.content

'{"City": "Atlanta"}'

In [29]:
import json
data = json.loads(response.content)
data

{'City': 'Atlanta'}

In [30]:
data["City"]

'Atlanta'

In [38]:
prompt_prefix = """What is the most populated city in the state provided below. 
Provide city name and no additional information. """


In [31]:
prompt_examples = [
    {"ExampleState": "Hawaii", "ExampleCity": "Honolulu"},
    {"ExampleState": "California", "ExampleCity": "Los Angeles"}   
]
prompt_examples

[{'ExampleState': 'Hawaii', 'ExampleCity': 'Honolulu'},
 {'ExampleState': 'California', 'ExampleCity': 'Los Angeles'}]

In [33]:
example_prompt_str ="State: {ExampleState}\nCity: {ExampleCity}"
print(example_prompt_str)

State: {ExampleState}
City: {ExampleCity}


In [34]:
example_prompt = PromptTemplate(input_variables=["ExampleState", "ExampleCity"], template = example_prompt_str)

example_prompt


PromptTemplate(input_variables=['ExampleCity', 'ExampleState'], input_types={}, partial_variables={}, template='State: {ExampleState}\nCity: {ExampleCity}')

In [35]:
print(example_prompt.format(**prompt_examples[0]))

State: Hawaii
City: Honolulu


In [36]:
print(example_prompt.format(**prompt_examples[1]))

State: California
City: Los Angeles


In [39]:
from langchain.prompts.few_shot import FewShotPromptTemplate

execute_fewshot_prompt = FewShotPromptTemplate(
    prefix = prompt_prefix,
    input_variables=["state"],
    examples= prompt_examples,
    example_prompt = example_prompt,
    example_separator="\n\n",
    suffix = "State: {state}"
)

In [40]:
data = {"state": "Georgia"}
print(execute_fewshot_prompt.format(**data))

What is the most populated city in the state provided below. 
Provide city name and no additional information. 

State: Hawaii
City: Honolulu

State: California
City: Los Angeles

State: Georgia


In [41]:
chain = execute_fewshot_prompt | model
chain.invoke(data)

AIMessage(content='City: Atlanta', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 3, 'prompt_tokens': 49, 'total_tokens': 52, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-cc4b1420-0625-431b-8c9b-163d895504a8-0', usage_metadata={'input_tokens': 49, 'output_tokens': 3, 'total_tokens': 52})

In [45]:
from pydantic import BaseModel, Field


In [46]:
class CityParser(BaseModel):
    """
    this object holds information about the most populated city in the 
    given state.
    """
    City: str = Field(..., description="The name of the most populous city") 

In [47]:
from langchain.output_parsers import PydanticOutputParser

cityParser = PydanticOutputParser(pydantic_object=CityParser)


In [52]:
output = cityParser.parse("""{"City": "Atlanta"}""")
output


CityParser(City='Atlanta')

In [53]:
output.City

'Atlanta'

In [60]:
prompt

PromptTemplate(input_variables=['state'], input_types={}, partial_variables={}, template='What is the most populated city in the state provided below.\n\nProvide city name and no additional information. \n\nExamples:\n\nState: Hawaii\n{{"City": "Honolulu"}}\n\nState: California\n{{"City": "Los Angeles"}}\n\nState: {state}\n')

In [63]:
structured_llm = model.with_structured_output(CityParser)
structured_chain = prompt | structured_llm
structured_chain.invoke({"state": "Georgia"})


CityParser(City='Atlanta')

In [None]:
pip install huggingface_hub

In [65]:
# from getpass import getpass

# HUGGINGFACEHUB_API_TOKEN = getpass()

In [67]:
from langchain.llms import HuggingFaceHub
repo_id_flan = "google/flan-t5-xxl" 


llm_google_flan = HuggingFaceHub(
    repo_id= repo_id_flan, model_kwargs={"temperature": 1, "max_length": 64},
    huggingfacehub_api_token = HUGGINGFACEHUB_API_TOKEN
)

In [None]:
data = {"state": "Georgia"}

data

In [None]:
print(execute_fewshot_prompt.format(**data))

In [None]:
chain = execute_fewshot_prompt | llm_google_flan 
reponse = chain.invoke(data)


In [None]:
reponse

In [None]:
from langchain.llms import HuggingFaceHub
# repo_id_llama_2 = "meta-llama/Llama-2-13b-chat-hf"
repo_id_mistral = "mistralai/Mistral-7B-Instruct-v0.1" 


llm_mistral = HuggingFaceHub(
    repo_id= repo_id_mistral, model_kwargs={"temperature": 1, "max_length": 64},
    huggingfacehub_api_token = HUGGINGFACEHUB_API_TOKEN
)

chain = execute_fewshot_prompt | llm_mistral

reponse = chain.invoke(data)

reponse

In [None]:
chain = execute_fewshot_prompt | llm_mistral.bind(stop="\n")

reponse = chain.invoke(data)

reponse

In [None]:
chain = execute_fewshot_prompt | llm_mistral.bind(stop="\n") | cityParser

reponse = chain.invoke(data)
reponse

In [None]:
# from langchain.llms import Ollama
# from langchain.callbacks.manager import CallbackManager
# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# ollama_llama_llm = Ollama(
#     model="llama2", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),    
# )

In [None]:
data

In [None]:
chain = execute_fewshot_prompt | ollama_llama_llm

reponse = chain.invoke(data)
reponse