# this is Pixegami tutorial
### from this video :https://www.youtube.com/watch?v=tcqEUSNCn8I

In [50]:
from dotenv import load_dotenv
import os
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import TextSplitter, RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores.chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from datetime import datetime
import json
import PyPDF2
import chromadb
chroma_client = chromadb.Client()

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
DIRECTORY_PATH='/Users/matansharon/python/chat_with_docs/AI_Apps/chat_with_txt/data'





def get_documents_names():
    documents_names = os.listdir(DIRECTORY_PATH)
    return documents_names
def load_all_docs_in_data_folder(documents_names):
    data_documents= []
    for doc in documents_names:
        path=os.path.join(DIRECTORY_PATH+'/',doc)
        Document=PyPDF2.PdfReader(path)
        text = ''
        for page in Document.pages:
            text += page.extract_text()
        data_documents.append(text)
        
    return data_documents
    
    

def split_text(text:str):
    
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True
    )
    chunks=text_splitter.split_text(text)
    return chunks

def create_new_db(chunks):
    
    path='chroma_db'
    if chunks:
        print('here')
        db=Chroma.from_texts(texts=chunks,embedding=OpenAIEmbeddings(model='text-embedding-3-small'),persist_directory=path)
        return db
    if not os.path.exists(path):
        
        db=Chroma.from_texts(texts=[''],embedding=OpenAIEmbeddings(model='text-embedding-3-small'),persist_directory=path)
        return db
    return load_db()

def load_db():
    db = Chroma(persist_directory="chroma_db",embedding_function=OpenAIEmbeddings(model='text-embedding-3-small'))
    return db

def get_results_with_scores(query,db):
    
    res=db.similarity_search_with_relevance_scores(query,k=3)
    
    return res
def get_prompt_template(results,query):
    template="""
    answer the question base only on the following context:
    {context}
    answer the question base on the above context: {query}
    
    """
    context_texts = []
    for i in range(len(results)):
        context_texts.append(results[i][0].page_content)
    temp = "\n\n---\n\n".join(context_texts)
    prompt_tamplate=ChatPromptTemplate.from_template(template)
    res=prompt_tamplate.format(context=temp,query=query)
    return res

def get_response(query,db,model):
    results=get_results_with_scores(query,db)
    prompt_template=get_prompt_template(results,query)
    response=model.invoke(prompt_template)
    return response.content

def main_app():

    documents_names=get_documents_names()
    docs=load_all_docs_in_data_folder(documents_names=documents_names)
    splited_docs=[]
    for doc in docs:
        chunks=split_text(doc)
        for chunk in chunks:
            splited_docs.append(chunk)
            
    print('splited_docs',len(splited_docs))
    db=create_new_db(splited_docs)
    model=ChatOpenAI()
    
    return db,model
db,model=main_app()


splited_docs 101
here


In [46]:
get_results_with_scores('what is Finetuning?',db)

[(Document(page_content='Our QLORAfinetuning method is the first method that enables the finetuning of 33B parameter\nmodels on a single consumer GPU and 65B parameter models on a single professional GPU, while\nnot degrading performance relative to a full finetuning baseline. We have demonstrated that our\nbest 33B model trained on the Open Assistant dataset can rival ChatGPT on the Vicuna benchmark.\nSince instruction finetuning is an essential tool to transform raw pretrained LLMs into ChatGPT-like\nchatbots, we believe that our method will make finetuning widespread and common in particular for\nthe researchers that have the least resources, a big win for the accessibility of state of the art NLP\ntechnology. QLORAcan be seen as an equalizing factor that helps to close the resource gap between\nlarge corporations and small teams with consumer GPUs.\nAnother potential source of impact is deployment to mobile phones. We believe our QLORAmethod'),
  0.21952786243575118),
 (Document(pa

In [47]:

temp=get_prompt_template(get_results_with_scores('what is Finetuning?',db),'what is Finetuning?')
temp

'Human: \n    answer the question base only on the following context:\n    Our QLORAfinetuning method is the first method that enables the finetuning of 33B parameter\nmodels on a single consumer GPU and 65B parameter models on a single professional GPU, while\nnot degrading performance relative to a full finetuning baseline. We have demonstrated that our\nbest 33B model trained on the Open Assistant dataset can rival ChatGPT on the Vicuna benchmark.\nSince instruction finetuning is an essential tool to transform raw pretrained LLMs into ChatGPT-like\nchatbots, we believe that our method will make finetuning widespread and common in particular for\nthe researchers that have the least resources, a big win for the accessibility of state of the art NLP\ntechnology. QLORAcan be seen as an equalizing factor that helps to close the resource gap between\nlarge corporations and small teams with consumer GPUs.\nAnother potential source of impact is deployment to mobile phones. We believe our QL

In [51]:
model.invoke(temp)

AIMessage(content='Finetuning is the process of adjusting and optimizing a pre-trained language model by further training it on a specific dataset or task, in order to improve its performance on that particular task. In the context provided, finetuning is used to transform raw pretrained large language models (LLMs) into chatbot models like ChatGPT, with the goal of achieving state-of-the-art natural language processing (NLP) technology.')

In [52]:
model2=ChatOpenAI()
model2.invoke("what is qlora?")

AIMessage(content='There is no widely known or established meaning for the term "qlora." It is possible that it could be a unique or made-up word with no specific definition. If you have more context or information about the term, please provide it so that a more accurate answer can be given.')

In [55]:
get_response("what is qlora?",db,model)

'QLORA is a technique that achieves high-fidelity 4-bit finetuning using 4-bit NormalFloat (NF4) quantization and Double Quantization. It also introduces Paged Optimizers to prevent memory spikes during gradient checkpointing. It has one low-precision storage data type (4-bit) and one computation data type (BFloat16). When using a QLORA weight tensor, the tensor is dequantized to BFloat16 and matrix multiplication is performed in 16-bit.'

In [None]:
# len(chunks)

# model=ChatOpenAI()
# query='what is qlora?'

# response=get_response(query,db,model)
# print(response)
# db=create_new_db('')

In [None]:
import chromadb.utils.embedding_functions as embedding_functions
import chromadb
from PyPDF2 import PdfReader
path='/Users/matansharon/python/chat_with_docs/data/pdf/qlora.pdf'
doc=PdfReader(path)
#get the name of the document
doc_name=path.split('/')[-1]

text=''
for page in doc.pages:
    text+=page.extract_text()



chunks=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=100,length_function=len,add_start_index=True).split_text(text)




In [None]:
print(get_response('who is the authors of the paper "Attention Is All You Need"',db,model))

In [None]:
chat=ChatOpenAI(model='gpt-4-turbo-preview')


In [None]:
print(chat.invoke(f'base on this: {content} what is qlora?'))