In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

In [None]:
from langchain_openai import AzureChatOpenAI

gpt_4_turbo = AzureChatOpenAI(
    api_version =os.getenv("AZURE_OPENAI_API_VERSION"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    azure_deployment=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT"),
    temperature=0.4,
    streaming=True,
)

In [None]:
from langchain_core.prompts import ChatPromptTemplate
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
"""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        ("human", "{question}")
    ])

In [None]:
chain = prompt | gpt_4_turbo
chain.invoke("How many paws a dog has?")

## Building RAG

### Document loading and processing

Loading PDF file

In [None]:
filepath = './data/'

with open(f"{filepath}/Jak_zbudować_chatbot.txt") as file:
    webinar_1 = file.read()

In [None]:
import re
webinar_1_sentences = re.split(r'[.!?]+', webinar_1)
len(webinar_1_sentences)

In [None]:
webinar_1_sentences

In [None]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import TextLoader

loader = DirectoryLoader(filepath, glob="**/*.txt", loader_cls=TextLoader)
text_docs = loader.load()
len(text_docs)

In [None]:
from langchain_community.document_loaders import PyPDFLoader
pdf_filepath = f"{filepath}Manual-Leadership-2024.pdf"
loader = PyPDFLoader(pdf_filepath)
pdf_document = loader.load()
len(pdf_document)

In [None]:
pdf_document

In [None]:
from pdf2image import convert_from_path

images=convert_from_path(pdf_path=pdf_filepath)
len(images)

...and now choose your OCR library and perform OCR

#### Chunking text into smaller pieces

In [None]:
import tiktoken
def tiktoken_len(text):
    """calculating length of text in tokens not words/characters"""
    tokens = tiktoken.encoding_for_model("gpt-4").encode(
        text,
    )
    return len(tokens)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 300,
    chunk_overlap = 30,
    length_function = tiktoken_len,
)

In [None]:
docs = text_splitter.split_documents(text_docs)
len(docs)

In [None]:
docs[-1]

TIP: *When processing multiple documents, worth to save your chunks (saves time)*

In [None]:
from datetime import datetime
now = datetime.now()
timestamp = datetime.timestamp(now)
with open(f'langchain_documents-{timestamp}.jsonl', 'w', encoding="UTF-8") as jsonl_file:
    for doc in docs:
        jsonl_file.write(doc.json() + '\n')

## Vector Store

In [None]:
from langchain_qdrant import QdrantVectorStore
from langchain_community.vectorstores import Qdrant

Adding embedding model

In [None]:
from langchain_openai import AzureOpenAIEmbeddings
azure_embeddings = AzureOpenAIEmbeddings(
    azure_deployment=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT"),
)

Creating new qdrant collection

In [None]:
collection_name = "langchain-demo"
url=os.getenv("QDRANT_URL")

In [None]:
qdrant = QdrantVectorStore.from_documents(
    docs,
    azure_embeddings,
    url = url,
    prefer_grpc=False,
    collection_name=collection_name
)

We can also connect to an existing collection

Qdrant < v1.10 

In [None]:
import qdrant_client

client = qdrant_client.QdrantClient(url=url)

qdrant2 = Qdrant(
    client=client,
    collection_name=collection_name,
    embeddings=azure_embeddings,
    )

For qdrant > 1.10

In [None]:
qdrant2 = QdrantVectorStore.from_existing_collection(collection_name, azure_embeddings, url=url)

### Similarity Search
Now let us run the most basic search over our vector stores

In [None]:
query = "Which large language models (llm) are good?"

qdrant2.embeddings.embed_query(query)

In [None]:
results = qdrant2.similarity_search_with_score(query=query,score_threshold=0.44, k=5)
results

### Pipeline
Time to finally create our RAG application pipeline

In [None]:
retriever = qdrant2.as_retriever(
    search_type="similarity_score_threshold", 
    search_kwargs={
        "score_threshold": 0.44, 
        "k": 5
        })

retriever.invoke(query)

In [None]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import chain


@chain
def retriever_with_score(query: str) -> List[Document]:
    docs, scores = zip(*qdrant2.similarity_search_with_score(query))
    for doc, score in zip(docs, scores):
        doc.metadata["score"] = score

    return docs

retriever_with_score.invoke(query)

In [None]:
qa_system_prompt = """You are an assistant for question-answering tasks. \
The questions you receive should be in regards to artifical intelligence and generative AI\
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
    
{context}
"""
rag_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        ("human", "{question}")
    ])

In [None]:
rag_prompt.input_schema.schema()

In [None]:
def format_docs(docs):
    context = ""
    for doc in docs:
        context += f'<quote source={doc.metadata["source"]}>{doc.page_content}</quote>\n\n'
        
    return context

In [None]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

pipeline = (
    {
        "context": retriever | format_docs, 
        "question": RunnablePassthrough()
    }
    | rag_prompt
    | gpt_4_turbo
    | StrOutputParser()
)

In [None]:
from pprint import pprint

pprint(pipeline.invoke("What is the difference between various LLMs?"))

### Multiquery 

In [None]:
from langchain.retrievers.multi_query import MultiQueryRetriever

In [None]:
new_retriever = MultiQueryRetriever.from_llm(retriever=retriever,llm=gpt_4_turbo)
unique_docs = new_retriever.invoke(query)
unique_docs

In [None]:
multi_query_pipeline = (
    {
        "context": new_retriever | format_docs, 
        "question": RunnablePassthrough()
    }
    | rag_prompt
    | gpt_4_turbo
    | StrOutputParser()
)

pprint(multi_query_pipeline.invoke("What is the difference between various LLMs?"))

## AWS Bedrock (CLaude Opus)

In [None]:
from langchain_aws import ChatBedrock

claude_opus = ChatBedrock(
    region_name="us-west-2",
    model_id="anthropic.claude-3-opus-20240229-v1:0",
    model_kwargs=dict(temperature=0),
)

In [None]:
multi_query_pipeline_opus = (
    {
        "context": new_retriever | format_docs, 
        "question": RunnablePassthrough()
    }
    | rag_prompt
    | claude_opus
    | StrOutputParser()
)

pprint(multi_query_pipeline.invoke("What is the difference between various LLMs?"))

### Tavily Search

In [None]:
from langchain_community.retrievers import TavilySearchAPIRetriever

tavily_retriever = TavilySearchAPIRetriever(k=3)

tavily_retriever.invoke("What is the difference between various LLMs?")

### Ensemble Retriever
Time to connect information from two various retrievers and rerank 

In [None]:
from langchain.retrievers import EnsembleRetriever

ensemble_retriever = EnsembleRetriever(retrievers=[new_retriever, tavily_retriever], weights=[0.5, 0.5])

ensemble_retriever.invoke("What is the difference between various LLMs?")

In [None]:
ensemble_pipeline = (
    {
        "context": ensemble_retriever | format_docs, 
        "question": RunnablePassthrough()
    }
    | rag_prompt
    | gpt_4_turbo
    | StrOutputParser()
)

pprint(ensemble_pipeline.invoke("What is the difference between various LLMs?"))