## LCEL

In [3]:
from langchain_google_genai import ChatGoogleGenerativeAI


import os
from dotenv import load_dotenv
load_dotenv()
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=GOOGLE_API_KEY)


In [9]:
response = llm.invoke("Tell me a joke")
display(response.content)

"Why don't scientists trust atoms? \n\nBecause they make up everything!\n"

## Output parsers

In [11]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()
chain = llm | output_parser

result = chain.invoke("Tell me a joke")
print(result)

Why don't scientists trust atoms? 

Because they make up everything!



## Structured Output

In [14]:
from typing import List
from pydantic import BaseModel, Field

class MobileReview(BaseModel):
    phone_model: str = Field(description="Name and model of the phone")
    rating: float = Field(description="Rating of the phone")
    pros: List[str] = Field(description="List of positive aspects")
    cons: List[str] = Field(description="List of negative aspects")
    summary: str = Field(description="Brief summary of the review")
    
class ResponseFormatter(BaseModel):
    answer: str = Field(description="The answer to the user's question")
    followup_question: str = Field(description="A followup question the user could ask")

model = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=GOOGLE_API_KEY)
model_with_tools = model.bind_tools([ResponseFormatter])
model_with_tools = model_with_tools | output_parser
ai_msg = model_with_tools.invoke("What is the powerhouse of the cell?") 
ai_msg   

'The powerhouse of the cell is the mitochondrion.\n'

- One of the most useful features of LangChain is the ability to create <strong>prompt templates</strong>.
- A prompt template is a string that contains a placeholder for input variable(s).

- A prompt template refers to a reproducible way to generate a prompt


## Prompt templates can contain the following:
    - Instructions to the language model.
    - A set of few shot examples to help the language model generate a better response.
    - A question to the language model.

In [7]:
from langchain import PromptTemplate
 
template = """
<s>[INST] <<SYS>>
Act as an Astronomer engineer who is teaching high school students.
<</SYS>>
 
{text} [/INST]
"""
 
prompt = PromptTemplate(
    input_variables=["text"],
    template=template,
)

The variable must be surrounded by `{}`. The `input_variables` argument is a list of variable names that will be used to format the template.

In [8]:
text = "Explain what is the solar system in 2-3 sentences"
print(prompt.format(text=text))


<s>[INST] <<SYS>>
Act as an Astronomer engineer who is teaching high school students.
<</SYS>>
 
Explain what is the solar system in 2-3 sentences [/INST]



- Streaming text output is gaining popularity among large language models (LLMs) and chatbots, offering a more dynamic experience for users.
- Unlike traditional generation methods that wait for completion, streaming sends text incrementally, allowing for a more interactive experience.
- With Lanchain we can turn on streaming token using the `StreamingStdOutCallbackHandler` handler. 

In [6]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [45]:
import os
from os.path import abspath as abspath

In [43]:
[abspath(model_path) for model_path in (os.listdir(os.path.join(os.pardir, "models")))]

['/home/feba6204/AI/RAG/gpt4all-master/api/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf',
 '/home/feba6204/AI/RAG/gpt4all-master/api/llama-2-7b-chat.Q2_K.gguf',
 '/home/feba6204/AI/RAG/gpt4all-master/api/llama-3.2-1b-instruct-q8_0.gguf']

In [48]:
model_dir = (abspath(os.path.join(os.pardir, "models")))
model_dir

'/home/feba6204/AI/RAG/gpt4all-master/models'

In [50]:
for model_path in (os.listdir(model_dir)):
    full_path = os.path.join(model_dir, model_path)
    print(full_path)

/home/feba6204/AI/RAG/gpt4all-master/models/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf
/home/feba6204/AI/RAG/gpt4all-master/models/llama-2-7b-chat.Q2_K.gguf
/home/feba6204/AI/RAG/gpt4all-master/models/llama-3.2-1b-instruct-q8_0.gguf


In [53]:
import os
model_dir = os.path.join(os.pardir, "models")
model_paths = [os.path.join(model_dir,model_path) for model_path in os.listdir(model_dir)]
print(model_paths)
print(model_paths[1])

['../models/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf', '../models/llama-2-7b-chat.Q2_K.gguf', '../models/llama-3.2-1b-instruct-q8_0.gguf']
../models/llama-2-7b-chat.Q2_K.gguf


In [68]:
model_path = "../models/llama-3.2-1b-instruct-q8_0.gguf"
llm = LlamaCpp(
    model_path=model_paths[0],
    temperature=0.5,
    max_tokens=1024,
    top_p=1,
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
)

output = llm.invoke(prompt.format(text=text))
print(output)

llama_model_loader: loaded meta data with 29 key-value pairs and 291 tensors from ../models/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = Mistral-7B-Instruct-v0.3
llama_model_loader: - kv   2:                          llama.block_count u32              = 32
llama_model_loader: - kv   3:                       llama.context_length u32              = 32768
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.attention.head_count u32              = 32
llama_model_loader: - kv   7:              llama

llama_model_loader: - kv  10:                          general.file_type u32              = 15
llama_model_loader: - kv  11:                           llama.vocab_size u32              = 32768
llama_model_loader: - kv  12:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv  13:                       tokenizer.ggml.model str              = llama
llama_model_loader: - kv  14:                         tokenizer.ggml.pre str              = default
llama_model_loader: - kv  15:                      tokenizer.ggml.tokens arr[str,32768]   = ["<unk>", "<s>", "</s>", "[INST]", "[...
llama_model_loader: - kv  16:                      tokenizer.ggml.scores arr[f32,32768]   = [0.000000, 0.000000, 0.000000, 0.0000...
llama_model_loader: - kv  17:                  tokenizer.ggml.token_type arr[i32,32768]   = [2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...
llama_model_loader: - kv  18:                tokenizer.ggml.bos_token_id u32              = 1
llama_model_loader: 

The solar system is a collection of celestial bodies, including the Sun, planets (Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune), dwarf planets (Pluto), asteroids, comets, and other small bodies. It extends outward from the Sun, which is a star with significant impact on the existence of life on Earth. The solar system formed approximately 4.6 billion years ago from the gravitational collapse of a giant molecular cloud.

llama_perf_context_print:        load time =    3559.59 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    49 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   107 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   26986.61 ms /   156 tokens


The solar system is a collection of celestial bodies, including the Sun, planets (Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune), dwarf planets (Pluto), asteroids, comets, and other small bodies. It extends outward from the Sun, which is a star with significant impact on the existence of life on Earth. The solar system formed approximately 4.6 billion years ago from the gravitational collapse of a giant molecular cloud.


In [69]:
output

'The solar system is a collection of celestial bodies, including the Sun, planets (Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune), dwarf planets (Pluto), asteroids, comets, and other small bodies. It extends outward from the Sun, which is a star with significant impact on the existence of life on Earth. The solar system formed approximately 4.6 billion years ago from the gravitational collapse of a giant molecular cloud.'

In [57]:
print(type(llm))

<class 'langchain_community.llms.llamacpp.LlamaCpp'>


In [60]:
from langchain_community.llms import GPT4All
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from typing import List
from langchain_core.documents import Document
import os
from chroma_utils import vectorstore
from pydantic_models import ModelName

# create retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

output_parser = StrOutputParser()


contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)


# reformulate the user's question based on chat history
contextualize_q_prompt = ChatPromptTemplate.from_messages([
    ("system", contextualize_q_system_prompt),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}"),
])

# used to generalize the final answer based on the retrieved context and chat history
qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant. Use the following context to answer the user's question."),
    ("system", "Context: {context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}")
])

def get_rag_chain(model=ModelName.LLAMA_3B.value):
    history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
    question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
    rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)    
    return rag_chain

from langchain import PromptTemplate
 
template = """
<s>[INST] <<SYS>>
Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, just reformulate it if needed and otherwise return it as is.
<</SYS>>
 
{text} [/INST]
"""
 
prompt = PromptTemplate(
    input_variables=["text"],
    template=template,
)

# if __name__ == '__main__':
#     print("testing")

  from .autonotebook import tqdm as notebook_tqdm


In [67]:
print(qa_prompt)

input_variables=['chat_history', 'context', 'input'] input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.Annotated[langchain_

In [63]:
rag_chain = get_rag_chain()
chat_history = []
rag_chain.invoke({
    "input": "What is the solar system?",
    "chat_history": chat_history
})['answer']

Llama.generate: 1 prefix-match hit, remaining 460 prompt tokens to eval



Assistant: The solar system is a collection of celestial bodies, which revolve around the Sun. It consists
of eight planets (Mercury, Venus, Earth, Mars, Jupiter, Saturn), five dwarf planets (

llama_perf_context_print:        load time =    4127.23 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   460 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    50 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   68497.88 ms /   510 tokens


'\nAssistant: The solar system is a collection of celestial bodies, which revolve around the Sun. It consists\nof eight planets (Mercury, Venus, Earth, Mars, Jupiter, Saturn), five dwarf planets ('