In [1]:
ollama_model_name = "llama3.1"

In [2]:
from langchain_community.llms import Ollama

llm = Ollama(model=ollama_model_name, temperature=0.1)

In [3]:
llm.invoke("why is the sky blue?")

"The sky appears blue to us because of a phenomenon called scattering, which occurs when sunlight interacts with the tiny molecules of gases in the atmosphere. Here's a simplified explanation:\n\n1. **Sunlight enters Earth's atmosphere**: When the sun shines, it emits a wide range of electromagnetic radiation, including visible light.\n2. **Light scatters off gas molecules**: As sunlight travels through the atmosphere, it encounters tiny molecules of gases like nitrogen (N2) and oxygen (O2). These molecules are much smaller than the wavelength of light, so they scatter the light in all directions.\n3. **Short wavelengths scattered more**: The shorter wavelengths of visible light, such as blue and violet, are scattered more than the longer wavelengths, like red and orange. This is because the smaller gas molecules are more effective at scattering the shorter wavelengths.\n4. **Blue light reaches our eyes**: As a result of this scattering, the blue light is distributed throughout the atm

### Loading the data

In [32]:
## Loading the entire directory

from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import TextLoader

loader = DirectoryLoader('../data/', glob="**/*.txt", loader_cls=TextLoader)

In [33]:
docs = loader.load()

In [34]:
## Loading by pointing the document

from langchain_community.document_loaders import TextLoader

loader = TextLoader("../data/data.txt")
docs = loader.load()

In [35]:
len(docs)

1

In [22]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)

In [None]:
from langchain_community.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings(model=ollama_model_name)

In [23]:
from langchain_community.vectorstores import FAISS
import faiss

vector = FAISS.from_documents(documents, embeddings)

In [24]:

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

if ollama_model_name == "phi" or ollama_model_name == "phi:chat":
    # Phi-2 prompt is less flexible
    prompt_template = """Instruct: With this context\n\n{context}\n\nQuestion: {input}\nOutput:"""

else:
    prompt_template = """ Answer the question based on the context below. If the question cannot be answered using the information provided answer with "I don't know
    <context>
    {context}
    </context>

    Question: {input}"""

prompt = ChatPromptTemplate.from_template(prompt_template)
document_chain = create_stuff_documents_chain(llm, prompt)

In [25]:
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), config={'run_name': 'format_inputs'})
| ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template=' Answer the question based on the context below. If the question cannot be answered using the information provided answer with "I don\'t know\n    <context>\n    {context}\n    </context>\n\n    Question: {input}'))])
| Ollama(model='llama3.1', temperature=0.1)
| StrOutputParser(), config={'run_name': 'stuff_documents_chain'})

In [26]:
from langchain.chains import create_retrieval_chain

retriever = vector.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [27]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x121eace10>), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), config={'run_name': 'format_inputs'})
            | ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template=' Answer the question based on the context below. If the question cannot be answered using the information provided answer with "I don\'t know\n    <context>\n    {context}\n    </context>\n\n    Question: {input}'))])
            | Ollama(model='llama3.1', temperature=0.1)
            | StrOutpu

In [28]:
TestQuestions = [
    "What did the author do growing up",
    "What computer did he convince his father to buy?",
    "What did he do for his undergraduate thesis ?"
]

In [29]:
from langchain.globals import set_debug

# set_debug(True)

In [30]:
qa_pairs = []

for index, question in enumerate(TestQuestions, start=1):
    question = question.strip() # Clean up

    print(f"\n{index}/{len(TestQuestions)}: {question}")

    response = retrieval_chain.invoke({"input": question})

    qa_pairs.append((question.strip(), response["answer"])) # Add to our output array


1/3: What did the author do growing up

2/3: What computer did he convince his father to buy?

3/3: What did he do for his undergraduate thesis ?


In [31]:
for index, (question, answer) in enumerate(qa_pairs, start=1):
    print(f"{index}/{len(qa_pairs)} {question}\n\n{answer}\n\n--------\n")

1/3 What did the author do growing up

The text doesn't explicitly state what the author did growing up, but based on the context and the mention of "hack" as one of the things they wanted to do when starting YC, it can be inferred that the author was interested in programming or computer-related activities from a young age. However, there is no specific information about their childhood or teenage years provided in the text.

--------

2/3 What computer did he convince his father to buy?

Unfortunately, the text doesn't mention what computer he convinced his father to buy. It does mention that he was a graduate student at the time and had a "grad student lifestyle", but it doesn't specify what kind of computer or technology he was using.

--------

3/3 What did he do for his undergraduate thesis ?

Unfortunately, the provided text does not mention what he did for his undergraduate thesis. However, it is mentioned that he later became a graduate student (presumably at Stanford) and sta