## Load savas.me web pages

In [None]:
from langchain_community.document_loaders import WebBaseLoader

# Load the savas.me/about web page
loader = WebBaseLoader(["https://savas.me", "https://savas.me/about", "https://savas.me/cv", "https://savas.me/contact", "https://savas.me/publications"])
docs = loader.load()

## Load LinkedIn profile data from csv files

In [None]:
import glob

from langchain_community.document_loaders import CSVLoader

docs = []
for file in glob.glob("../savas/*.csv"):
    loader = CSVLoader(
        file_path = file,
        encoding="utf-8",
        csv_args={'delimiter': ','})
    docs += loader.load()

## Load the json docs

In [None]:
import glob

from langchain_community.document_loaders import JSONLoader

docs = []
#for file in glob.glob("savas/*.json"):
filenames = ["../savas/Profile.json", "../savas/Positions.json", "../savas/Education.json", "../savas/Email Addresses.json"]
for file in filenames:
    loader = JSONLoader(file_path=file, jq_schema=".[]",text_content=False)
    docs += loader.load()

## Set up the LLM

In [None]:
system_template = """
You are Savas Parastatidis.
You always respond in the first person as if all the questions were about Savas.
Use one sentence answers when possible. Be brief.
It is ok to say "I don't know" if you don't know the answer.
Only use information that you find in the context.
For example, if you are asked for Savas' email address, look into the context for the answer.
If you can't find it there, then say "I don't know".
"""

prompt_template ="""
<context>
{context}
</context>

Question: {input}
"""
                  
model = "digital_twin"

from os import system
from typing import Any, Dict, List
from langchain.callbacks.base import BaseCallbackHandler
from langchain.llms import Ollama
from langchain.prompts import PromptTemplate

class MyPromptHandler(BaseCallbackHandler):
    def on_llm_start(self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any) -> Any:
        print(prompts)

prompt_handler = MyPromptHandler()

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "input"])

llm = Ollama(model=model, callbacks=[prompt_handler])

## Set up a retrieval-based chain with a vector db

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

embeddings = OllamaEmbeddings(model = "mistral")
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
vector = FAISS.from_documents(documents, embeddings)
doc_chain = create_stuff_documents_chain(llm, prompt)

# set up retrieval
from langchain.chains import create_retrieval_chain

retriever = vector.as_retriever()

chain = create_retrieval_chain(retriever, doc_chain)


## Set up a chain with direct context

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain

chain = create_stuff_documents_chain(llm, prompt)

In [None]:
questions = [
    "What is your current job?",
    "What is your email address?",
    "What is your phone number?",
    "What is your date of birth?",
    "When did you work at microsoft?",
    "What is your last name?",
    "Where do you work now?",
    "What do you currently do at Meta?",
    "How can I contact you?",
    
    # Microsoft copilot-generated questions (prompt: "list 10 questions that you would ask to a person about their professional life")
    "What is your current role and how long have you been in this position?",
    "Can you describe a typical day at work for you?",
    "What motivated you to choose your current career path?",
    "What are the most rewarding aspects of your job?",
    "What challenges have you faced in your profession and how did you overcome them?",
    "How do you stay updated with the trends and changes in your industry?",
    "Can you share a significant achievement or project you’ve worked on?",
    "How do you balance your professional and personal life?",
    "Where do you see yourself in the next five years professionally?",
    "What advice would you give to someone starting out in your field?",
    
    # Microsoft copilot-generated questions (prompt: "list 10 questions that you would ask a person about their professional profile on LinkedIn")
    "How does your LinkedIn headline reflect your current professional role?",
    "What key skills and endorsements can be found on your profile?",
    "Can you walk me through your work experience as listed on LinkedIn?",
    "How do you approach networking and building connections on LinkedIn?",
    "What type of content do you typically share or engage with on LinkedIn?",
    "How has LinkedIn helped you in your professional development?",
    "What strategies do you use to make your LinkedIn profile stand out?",
    "Have you received any recommendations on LinkedIn, and what do they say about your work ethic and professional contributions?",
    "How do you utilize LinkedIn’s features, such as groups or LinkedIn Learning, for career growth?",
    "What advice would you give to someone optimizing their LinkedIn profile for job searching or networking?",
    
    # Microsoft copilot-generated questions (prompt: "list 10 questions that you would expect a person to answer based on information found in their CV or LinkedIn profile")
    "What are your main skills and qualifications for the position you are applying for?",
    "How do you measure your performance and achievements in your previous or current roles?",
    "What are some of the projects or tasks that you are most proud of and why?",
    "How do you approach problem-solving and decision-making in your work?",
    "How do you communicate and collaborate with others in your team or organization?",
    "How do you handle feedback and criticism in your work?",
    "How do you balance competing priorities and manage your time effectively?",
    "What are some of the professional goals that you have set for yourself and how are you pursuing them?",
    "What are some of the challenges or difficulties that you have faced or overcome in your work?",
    "How do you cope with stress and pressure in your work environment?",
]

In [None]:
# retrieval-based
from typing import Any, List, Dict
for q in questions[0:10]:
    response = chain.invoke({"input": q})
    print(q)
    print(response["answer"])
    print()

In [None]:
from langchain_core.output_parsers import StrOutputParser

chain = create_stuff_documents_chain(llm, prompt, output_parser=StrOutputParser())

# in-context-based
for q in questions:
    response = chain.invoke({"input": q, "context": docs})
    print(q)
    print(response)
    print()