In [8]:
import torch

from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_huggingface.llms import HuggingFacePipeline

from langchain.memory import ConversationBufferMemory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain.chains import ConversationalRetrievalChain

from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain import hub

## Read pdf file

In [9]:
Loader = PyPDFLoader
FILE_PATH = (
    "./docs/Writing_a_scientific_article__A_step-by-step_guide_for_beginners_.pdf"
)
loader = Loader(FILE_PATH)
documents = loader.load()

print("Number of documents: ", len(documents))
documents[0]

Number of documents:  7


Document(metadata={'source': './docs/Writing_a_scientific_article__A_step-by-step_guide_for_beginners_.pdf', 'page': 0}, page_content='Research  paper\nWriting  a  scientiﬁc  article:  A  step-by-step  guide  for  beginners\nF.  Ecarnot *,  M.-F.  Seronde,  R.  Chopard,  F.  Schiele,  N.  Meneveau\nEA3920,  Department  of  Cardiology,  University  Hospital  Jean-Minjoz,  3,  Boulevard  Fleming,  25000  Besanc ¸on,  France\n1.  Background\nEvery  researcher  has  been  face  to  face  with  a  blank  page  at  some\nstage  of  their  career,  wondering  where  to  start  and  what  to  write\nﬁrst.  Describing  one’s  research  work  in  a  format  that  is\ncomprehensible  to  others,  and  acceptable  for  publication  is  no\neasy  task.  When  you  invest  a  lot  of  time,  energy  and  often  money  in\nyour  research,  you  become  intimately  and  emotionally  involved.\nNaturally,  you  are  convinced  of  the  value  of  your  research,  and  of\nits  importance  for  the  sci

## Initialize text splitter and split document

In [10]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

In [11]:
docs = text_splitter.split_documents(documents)

print("Number of mini-documents: ", len(docs))
docs[0]

Number of mini-documents:  59


Document(metadata={'source': './docs/Writing_a_scientific_article__A_step-by-step_guide_for_beginners_.pdf', 'page': 0}, page_content='Research  paper\nWriting  a  scientiﬁc  article:  A  step-by-step  guide  for  beginners\nF.  Ecarnot *,  M.-F.  Seronde,  R.  Chopard,  F.  Schiele,  N.  Meneveau\nEA3920,  Department  of  Cardiology,  University  Hospital  Jean-Minjoz,  3,  Boulevard  Fleming,  25000  Besanc ¸on,  France\n1.  Background\nEvery  researcher  has  been  face  to  face  with  a  blank  page  at  some\nstage  of  their  career,  wondering  where  to  start  and  what  to  write\nﬁrst.  Describing  one’s  research  work  in  a  format  that  is\ncomprehensible  to  others,  and  acceptable  for  publication  is  no\neasy  task.  When  you  invest  a  lot  of  time,  energy  and  often  money  in\nyour  research,  you  become  intimately  and  emotionally  involved.\nNaturally,  you  are  convinced  of  the  value  of  your  research,  and  of\nits  importance  for  the  sci

## Initialize text vectorization

In [12]:
embedding = HuggingFaceEmbeddings()

2024-09-11 15:51:31 - Use pytorch device_name: cuda
2024-09-11 15:51:31 - Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2


## Create vector database and retriever

In [14]:
vector_db = Chroma.from_documents(documents=docs, embedding=embedding)

retriever = vector_db.as_retriever()

QUERY = "How to write abstract?"
result = retriever.invoke(QUERY)

print("Number of relevant documents: ", len(result))
result[0]

Number of relevant documents:  4


Document(metadata={'page': 4, 'source': './docs/Writing_a_scientific_article__A_step-by-step_guide_for_beginners_.pdf'}, page_content='preparation.\nThere  are  a  few  main  points  to  remember  for  the  preparation  of\nthe  abstract,  but  space  is  limited,  so  you  must  keep  it  short.  The\nmain  pointers  for  the  abstract  are  outlined  in  Table  4.  If  you  have\ngiven  sufﬁcient  time  and  thought  to  preparing  your  project,  and\nwriting  the  resulting  article,  the  preparation  of  the  abstract  should\nnot  be  time-consuming.  You  will  easily  ﬁnd  a  sentence  or  two  in\nthe  introduction  that  can  be  re-used  in  the  abstract  (perhaps  with\nsome  shortening  necessary).  Similarly,  the  results  will  be  mainly\ncopy-and-pasted  from  the  results  section  of  the  article.  The\nconclusion  can  be  formulated  as  the  main  take-home  message  to\ncome  out  of  your  work.  Indeed,  the  hardest  part  of  the  abstract  is\noften  sho

## Load LLMs (Vicuna)

In [16]:
nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

MODEL_NAME = "microsoft/phi-2"

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, quantization_config=nf4_config, low_cpu_mem_usage=True
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

model_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    pad_token_id=tokenizer.eos_token_id,
    device_map="auto",
)

llm = HuggingFacePipeline(
    pipeline=model_pipeline,
)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading shards: 100%|██████████| 2/2 [05:53<00:00, 176.98s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.29s/it]


In [18]:
from pprint import pprint

prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

USER_QUESTION = "How to write abstract?"
output = rag_chain.invoke(USER_QUESTION)
pprint(output)

('Human: You are an assistant for question-answering tasks. Use the following '
 "pieces of retrieved context to answer the question. If you don't know the "
 "answer, just say that you don't know. Use three sentences maximum and keep "
 'the answer concise.\n'
 'Question: How to write abstract? \n'
 'Context: preparation.\n'
 'There  are  a  few  main  points  to  remember  for  the  preparation  of\n'
 'the  abstract,  but  space  is  limited,  so  you  must  keep  it  short.  '
 'The\n'
 'main  pointers  for  the  abstract  are  outlined  in  Table  4.  If  you  '
 'have\n'
 'given  sufﬁcient  time  and  thought  to  preparing  your  project,  and\n'
 'writing  the  resulting  article,  the  preparation  of  the  abstract  '
 'should\n'
 'not  be  time-consuming.  You  will  easily  ﬁnd  a  sentence  or  two  in\n'
 'the  introduction  that  can  be  re-used  in  the  abstract  (perhaps  '
 'with\n'
 'some  shortening  necessary).  Similarly,  the  results  will  be  mainly\n'
 'cop