In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Document Paths
cv_doc_path = "docs/CV_june_24.pdf"
linkedin_doc_path = "docs/LinkedIn Profile as of 20-25.pdf"
doc_paths = [cv_doc_path, linkedin_doc_path]

# Load documents
def load_docs(doc_paths):
    docs = []
    for file_path in doc_paths:
        loader = PyPDFLoader(file_path)
        docs.extend(loader.load())  # Extend to handle multiple pages properly
    return docs

# Load and split documents
documents = load_docs(doc_paths)

# Define a semantic character text splitter
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

# Apply splitter to documents
split_docs = splitter.split_documents(documents)

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vector_store = FAISS.from_documents(split_docs, embeddings)
retrieved_docs = vector_store.similarity_search("Where did oli go University")
print(retrieved_docs)


  from .autonotebook import tqdm as notebook_tqdm


[Document(id='9904b230-f3b8-40c4-9ff7-eafef507e661', metadata={'source': 'docs/LinkedIn Profile as of 20-25.pdf', 'page': 2, 'page_label': '3'}, page_content='foundation and transformative experiences I gained at Bristol, for which I am immensely grateful. \nPost Aug/2023 \nI have recently returned from the European Universities Futsal Championship in Split, Croatia, where \nI had the privilege to represent the University of Bristol on an international stage. The experience \nwas both; thoroughly enjoyable and one from which I learned a lot. \nExperience: \nSSE \nData and Analytics GraduateData and Analytics Graduate'), Document(id='2545a0cc-d8de-4aa0-8ed9-c8cc4c1a231e', metadata={'source': 'docs/LinkedIn Profile as of 20-25.pdf', 'page': 1, 'page_label': '2'}, page_content='Playing for SUFC was incredibly enjoyable and offered me an outlook to make many friends outside \nof my course.  \n \nI’m deeply grateful to the University of Southampton and ECS for their incredible support, and 

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface.llms import HuggingFacePipeline
from langchain_core.prompts import PromptTemplate

# Load embeddings and vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vector_store = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

prompt_template = PromptTemplate.from_template(
    """
    You are an assistant for answering questions about Oliver Simmonds (Oli). 
    Use the following pieces of retrieved context to answer the question. 
    If you don't know the answer, just say that you don't know. 
    Use three sentences maximum and keep the answer concise.
    Question: {question} 
    Context: {context} 
    Answer:
    """
)

llm = HuggingFacePipeline.from_model_id(
    model_id="gpt2",
    task="text-generation",
    pipeline_kwargs={"max_new_tokens": 10},
)
def ai_response(question):
    context = "\n\n".join(doc.page_content for doc in vector_store.similarity_search(question))
    messages = prompt_template.invoke({"question": question, "context": context})
    response = llm.invoke(messages)
    return response.split("Answer:")[1].strip()

response = ai_response('what university did oli go to')

In [13]:
response

'Question: • What degree of computer science was'

In [5]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface.llms import HuggingFacePipeline
from langchain_core.prompts import PromptTemplate
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Load embeddings and vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vector_store = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

prompt_template = PromptTemplate.from_template(
    """
    You are an assistant for answering questions about Oliver Simmonds (Oli). 
    Use the following pieces of retrieved context to answer the question. 
    If you don't know the answer, just say that you don't know. 
    Use three sentences maximum and keep the answer concise.
    Question: {question} 
    Context: {context} 
    Answer:
    """
)

pipe = pipeline("text2text-generation", model="google/flan-t5-large")
llm = HuggingFacePipeline(pipeline=pipe)

def ai_response(question):
    context = "\n\n".join(doc.page_content for doc in vector_store.similarity_search(question))
    messages = prompt_template.invoke({"question": question, "context": context})
    response = llm.invoke(messages)
    return response

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")


In [6]:
response = ai_response("What university did oli go to")
response



'University of Bristol'