In [1]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain import PromptTemplate
from langchain.document_loaders import PyPDFLoader
from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
from langchain.vectorstores import Chroma
import chromadb

import sys
import os


class SuppressStdout:
    def __enter__(self):
        self._original_stdout = sys.stdout
        self._original_stderr = sys.stderr
        sys.stdout = open(os.devnull, 'w')
        sys.stderr = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout
        sys.stderr = self._original_stderr

# Load the PDF and convert it to text
loader = PyPDFLoader("/workspaces/codespaces-blank/pdfs2/Nuno_CV.pdf")
documents = loader.load()

# Split the documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(documents)

# Now you can proceed with your vectorstore creation
with SuppressStdout():
    embeddings = HuggingFaceEmbeddings(model_name="thenlper/gte-small")
    client = chromadb.Client()
    vectorstore = Chroma.from_documents(
        documents=all_splits,
        embedding=embeddings,
        client=client
    )




In [2]:
query = "Tell me about the candidate's CV experience with the Python programming language."

# Prompt
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible and format it in a way that it can be easily read by the user.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(
    input_variables=["context", "question"],
    template=template,
)

# Create a custom callback handler
class MultiLineStreamingCallbackHandler(StreamingStdOutCallbackHandler):
    def __init__(self):
        self.current_line = ""

    def on_llm_new_token(self, token: str, **kwargs) -> None:
        self.current_line += token
        print(token, end="", flush=True)
        
        if token in ['.', '!', '?']:
            print("\n")
            self.current_line = ""

# Initialize the Ollama model with the custom callback handler
llm = Ollama(
    model="phi3",
    callback_manager=CallbackManager([MultiLineStreamingCallbackHandler()]),
    num_ctx=2048
)

# Create the RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)

# Run the query and print the result
print("\nQuestion:", query)
print("\nAnswer:\n")
result = qa_chain({"query": query})
print("\n")  # Add a newline at the end for better readability


Question: Tell me about the candidate's CV experience with the Python programming language.

Answer:



  warn_deprecated(


 The candidate is proficient in using Python for various tasks such as stand-ups, retrospective meetings, developing Django Projects integrating Dash and Plotly dashboards, working with genomics data sources like Clinvar, Ensembl, dbSNP, maintaining ETL pipelines utilizing Pandas and PySpark to implement machine learning algorithms for forecasting Laboratory Growth.



