# Lanchain OpenAI Setup

In [1]:
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader

In [2]:
# Caching
from langchain.cache import InMemoryCache
from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache


cacheType = 'in_memory'

if cacheType == 'in_memory':
    set_llm_cache(InMemoryCache())
elif cacheType == 'sqlite':
    set_llm_cache(SQLiteCache(database_path=".langchain.db"))

# Setup LLM - Google Flan T5 Large

In [4]:
# import torch
# import transformers
# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
# from langchain.llms import HuggingFacePipeline

# tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")

# # torch_dtype=torch.float16, low_cpu_mem_usage=True,
                                            
# model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large",
#                                               load_in_8bit=False,
#                                               device_map='cuda:0'
#                                               )

# pipe = pipeline(
#     "text2text-generation",
#     model=model, 
#     tokenizer=tokenizer, 
#     max_length=2048,
#     temperature=0,
#     repetition_penalty=1.15,
#     batch_size=10
# )

In [5]:
import torch
import transformers
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline
from langchain.llms import HuggingFacePipeline

tokenizer = LlamaTokenizer.from_pretrained("TheBloke/wizardLM-7B-HF")

model = LlamaForCausalLM.from_pretrained("TheBloke/wizardLM-7B-HF",
                                              load_in_4bit=True,
                                              device_map='cuda:0',
                                              torch_dtype=torch.float16,
                                              low_cpu_mem_usage=True
                                              )

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=1024,
    temperature=0,
    top_p=0.95,
    repetition_penalty=1.15
)

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=True`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


bin C:\Users\Tapiwa\AppData\Roaming\Python\Python311\site-packages\bitsandbytes\libbitsandbytes_cuda118_nocublaslt.dll


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
chat = HuggingFacePipeline(pipeline=pipe)

In [7]:
chat('What is the capital on Canada')

  warn_deprecated(


'?\nCanada does not have a capital city. Instead, Ottawa is the capital of Canada and serves as the seat of government for both the federal and provincial/territorial governments.'

## HF Instructor Embeddings

In [14]:
from langchain.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings

model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cuda:1"}
encode_kwargs = {"normalize_embeddings": True}
embedding = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)


# Load Multiple documents and process

In [16]:
# Load all files in dirl
loader = DirectoryLoader('./new_articles/', glob="./*.txt", loader_cls=TextLoader)

documents = loader.load()

In [18]:
from langchain.schema import AIMessage, HumanMessage, SystemMessage

# Import chat templates
from langchain.prompts import (
    ChatPromptTemplate,
    PromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)


In [37]:
from langchain.chains.summarize import load_summarize_chain
chain = load_summarize_chain(llm=chat, chain_type="refine")



In [40]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
texts = text_splitter.split_documents([documents[0]])

chain.run(texts)



''

## Pydantic 

In [None]:
from pydantic import BaseModel, Field
from typing import List
from langchain.output_parsers import PydanticOutputParser

In [None]:
class QAModel(BaseModel):
    question: str = Field(description="Question")
    answer: str = Field(description="Answer to question")

class QADocument(BaseModel):
    metadata: dict = None
    original: str= None
    source: str = Field(description="Metdata for the source document")
    summary: str = Field(description="Original content being processed")
    items: List[QAModel] = []

In [None]:
from langchain.schema import Document

def creat_document_lc(page_content:str, metadata:dict):
    return Document(page_content=page_content, metadata=metadata)

def format_page_content(response: QAModel):
    return f"""Question: {response.question}\nAnswer: {response.answer}"""

def format_page_metadata(result: QADocument):
    qaCount = 0 if not result.items else len(result.items)
    return {
        'source': result.source,
        'qaCount': qaCount
        # Additional properties here if necessary
    }

def export_document(result: QADocument) -> list :
    metadata = format_page_metadata(result)
    final_str = f"***Summary***:\n\n{result.summary}\n\n***Possible QA***\n"
    
    for item in result.items:
        qa = format_page_content(item)
        final_str += f"\n{qa}\n"
    
    final_str += f"\n***Original***\n\n{result.original}"
    
    doc = creat_document_lc(page_content=final_str, metadata=metadata)
    return doc
        

In [None]:
human_template = """{request}\n{format_instructions}"""
human_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

In [None]:
def generate_request_for_questions(source_name: str, summary: str):
    q_template1 = f"""Create a few questions and answers covering the below text starting and ending with ### :
    ###{summary}###

    Text metadata to be included in results:
    source: {source_name}
    summary: [This should be the text provided igenerate_request_for_questionsn above with ###]
    original: [This can be left blank]
    metadata: [This can be left blank]
    """

    request = chat_prompt.format_prompt(request=q_template1,
                                       format_instructions=parser.get_format_instructions()).to_messages()
    
    return request


In [None]:
parser = PydanticOutputParser(pydantic_object=QADocument)
# parser.get_format_instructions()

In [None]:
# This will genrate a Summary -> Generate Questions -> Create new document wrapper
def process_document(document: Document) -> Document:
    print("Processing document: ", document.metadata['source'])
    
    try:
        # Create summary
        doc_name = document.metadata['source']
        summary_resp = chain.run([document])

        # Create request for questions
        question_request = generate_request_for_questions(source_name=doc_name, summary=summary_resp)
        question_response = chat(question_request, temperature=0.0)
        parsed_questions_qadoc = parser.parse(question_response.content)

        # Set core details
        parsed_questions_qadoc.original = document.page_content

        # Generate new doc
        final_doc = export_document(parsed_questions_qadoc)

    except Exception as error:
        # handle the exception
        print("An exception occurred:", error)
    
    print("Done processing document: ", document.metadata['source'])
    
    return final_doc
    
    
    

In [None]:
# question_request[0].content
chat(question_request[1].content, temperature=0.0)


In [None]:
document = documents[1]

# Create summary
doc_name = document.metadata['source']
summary_resp = chain.run([document])

# Create request for questions
question_request = generate_request_for_questions(source_name=doc_name, summary=summary_resp)
question_response = chat(question_request, temperature=0.0)
parsed_questions_qadoc = parser.parse(question_response.content)

# Set core details
parsed_questions_qadoc.original = document.page_content

# Generate new doc
final_doc = export_document(parsed_questions_qadoc)

In [None]:
# Run through all documents

enriched_documents = []
for doc in documents:
    enriched = process_document(document=doc)
    enriched_documents.append( enriched )
    

In [None]:
enriched_documents[2].page_content[:300]

# Retriever

In [None]:
# split 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(enriched_documents)

In [None]:
texts[3]

In [None]:
persist_dir = 'articles_db_flant5'

vecordb = Chroma.from_documents(documents=texts,
                                embedding=embedding,
                                persist_directory=persist_dir)

vecordb.persist()

In [None]:
retriever = vecordb.as_retriever(search_kwargs={"k": 5})

In [None]:
docs = retriever.get_relevant_documents("Databricks Okera")

In [None]:
len(docs)

# Make Chain

In [None]:
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type="refine",
                                       retriever=retriever,
                                       return_source_documents=True)

In [None]:
## Cite sources
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [None]:
# full example
query = "How much money did Pando raise?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
# break it down
query = "What is the news about Pando?"
llm_response = qa_chain(query)
process_llm_response(llm_response)
# llm_response

In [None]:
query = "Who led the round in Pando?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
query = "What did databricks acquire?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
query = "What is generative ai?"
llm_response = qa_chain(query)
process_llm_response(llm_response)