# this is Pixegami tutorial
### from this video :https://www.youtube.com/watch?v=tcqEUSNCn8I

In [31]:
from dotenv import load_dotenv
import os
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores.chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from datetime import datetime
import json
load_dotenv()
directory_path='/Users/matansharon/python/chat_with_docs/data/text'
def get_all_docs():
    path='/Users/matansharon/python/chat_with_docs/AI_Apps/chat_with_txt/docs.json'
    with open(path,'r') as f:
        data=json.load(f)
        docs=data['docs']
    return docs
def load_and_split_documents():
    loader = DirectoryLoader(directory_path)
    documents = loader.load()
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True
    )
    chunks=text_splitter.split_documents(documents)
    return chunks
def create_db(chunks):
    path='chroma_db'
    if not os.path.exists(path):
        
        db=Chroma.from_documents(documents=chunks,embedding=OpenAIEmbeddings(),persist_directory=path)
        return db
    return load_db()
def load_db():
    db = Chroma(persist_directory="chroma_db",embedding_function=OpenAIEmbeddings())
    return db
def get_results_with_scores(query,db):
    bar=0.5
    res=db.similarity_search_with_relevance_scores(query,k=3)
    
    return res
def get_prompt_template(results,query):
    template="""
    answer the question base only on the following context:
    {context}
    answer the question base on the above context: {query}
    
    """
    context_texts = []
    for i in range(len(results)):
        context_texts.append(results[i][0].page_content)
    temp = "\n\n---\n\n".join(context_texts)
    prompt_tamplate=ChatPromptTemplate.from_template(template)
    res=prompt_tamplate.format(context=temp,query=query)
    return res
def get_response(query,db,model):
    results=get_results_with_scores(query,db)
    prompt_template=get_prompt_template(results,query)
    response=model.invoke(prompt_template)
    return response.content
def main_app():
    # pass
    chunks=load_and_split_documents()
    db=create_db(chunks)
    #for GPT-4 use this: 'gpt-4-turbo-preview'
    model=ChatOpenAI()
    
    return db,model


828

In [20]:
from langchain_community.document_loaders import PyPDFLoader
path='/Users/matansharon/python/chat_with_docs/data/pdf/qlora.pdf'
doc=PyPDFLoader(path).load()


In [21]:
chunks=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=100,length_function=len,add_start_index=True).split_documents(doc)
splited_str=[]
for doc in chunks:
    splited_str.append(doc.page_content)

In [8]:
print(get_response('who is the authors of the paper "Attention Is All You Need"',db,model))

The authors of the paper "Attention Is All You Need" are Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Łukasz Kaiser, and Illia Polosukhin.


In [12]:
chat=ChatOpenAI(model='gpt-4-turbo-preview')
print(chat.invoke('what is qlora?"'))

content='As of my last update in 2023, "Qlora" does not appear to be a widely recognized term or brand in major global contexts, including technology, science, health, or entertainment. It\'s possible that "Qlora" could be a specific product, service, or concept that is localized, newly introduced, or not broadly known outside a particular niche or industry.\n\nIf you are referring to a specific product, brand, or concept named "Qlora," it would be helpful to have more context or details to provide a more accurate and helpful response. It\'s also possible that new developments related to "Qlora" could have emerged after my last update. Please check the most recent sources for the latest information.'


In [22]:
db2=Chroma.from_documents(documents=chunks,embedding=OpenAIEmbeddings(),persist_directory='chorma_test')

In [23]:
print(get_response('what is qlora?',db2,chat))

QLORA is an efficient finetuning approach designed to reduce memory usage significantly, enabling the finetuning of a 65B parameter model on a single 48GB GPU while maintaining full 16-bit finetuning task performance. It operates by backpropagating gradients through a frozen, 4-bit quantized pretrained language model into Low Rank Adapters (LoRA). The approach incorporates several innovations to conserve memory without compromising performance, including the introduction of a new data type (4-bit NormalFloat), Double Quantization to save on bits per parameter, and Paged Optimizers to manage memory spikes. QLORA allows for an extensive study of instruction finetuning and chatbot performance across a wide range of model sizes and architectures, making the finetuning of high-quality Large Language Models (LLMs) more accessible and widely available.


In [30]:
#read the json file
import json
with open('docs.json') as f:
    data = json.load(f)
    print(data['documents'])


['attention', 'pytorch', 'qlora']
