In [1]:
# Import deps
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

In [2]:
query = "Who is the CEO of OpenAI?"
model_path="../models/llama-2-7b.Q5_K_M.gguf"

In [3]:
# Load document
loader = TextLoader('../docs/raw.txt')
docs = loader.load()

In [4]:
# Transform into chunks
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
texts = text_splitter.split_documents(docs)

Created a chunk of size 637, which is longer than the specified 100
Created a chunk of size 412, which is longer than the specified 100
Created a chunk of size 316, which is longer than the specified 100
Created a chunk of size 748, which is longer than the specified 100
Created a chunk of size 411, which is longer than the specified 100
Created a chunk of size 648, which is longer than the specified 100
Created a chunk of size 527, which is longer than the specified 100
Created a chunk of size 308, which is longer than the specified 100
Created a chunk of size 458, which is longer than the specified 100
Created a chunk of size 116, which is longer than the specified 100
Created a chunk of size 595, which is longer than the specified 100
Created a chunk of size 257, which is longer than the specified 100
Created a chunk of size 393, which is longer than the specified 100
Created a chunk of size 434, which is longer than the specified 100
Created a chunk of size 584, which is longer tha

In [5]:
# Compare the size of docs and texts
len(docs)

1

In [6]:
len(texts)

31

In [7]:
# Visualize chunks and doc
print(docs)

[Document(page_content='OpenAI is an American artificial intelligence (AI) research organization consisting of the non-profit OpenAI, Inc.[4] registered in Delaware and its for-profit subsidiary OpenAI Global, LLC.[5] OpenAI researches artificial intelligence with the declared intention of developing "safe and beneficial" artificial general intelligence, which it defines as "highly autonomous systems that outperform humans at most economically valuable work".[6] OpenAI has also developed several large language models, such as ChatGPT and GPT-4, as well as advanced image generation models like DALL-E 3,[7] and in the past published open-source models.[8]\n\nThe organization was founded in December 2015 by Ilya Sutskever, Greg Brockman, Trevor Blackwell, Vicki Cheung, Andrej Karpathy, Durk Kingma, Jessica Livingston, John Schulman, Pamela Vagata, and Wojciech Zaremba, with Sam Altman and Elon Musk serving as the initial board members.[9][10][11] Microsoft provided OpenAI Global LLC with 

In [8]:
print(texts[0])

page_content='OpenAI is an American artificial intelligence (AI) research organization consisting of the non-profit OpenAI, Inc.[4] registered in Delaware and its for-profit subsidiary OpenAI Global, LLC.[5] OpenAI researches artificial intelligence with the declared intention of developing "safe and beneficial" artificial general intelligence, which it defines as "highly autonomous systems that outperform humans at most economically valuable work".[6] OpenAI has also developed several large language models, such as ChatGPT and GPT-4, as well as advanced image generation models like DALL-E 3,[7] and in the past published open-source models.[8]' metadata={'source': './docs/raw.txt'}


In [9]:
# Import deps
from langchain.embeddings import LlamaCppEmbeddings

In [10]:
embeddings = LlamaCppEmbeddings(model_path=model_path)

ValidationError: 1 validation error for LlamaCppEmbeddings
__root__
  Could not load Llama model from path: ./models/llama-2-7b.Q5_K_M.gguf. Received error Model path does not exist: ./models/llama-2-7b.Q5_K_M.gguf (type=value_error)

In [None]:
# Convert langchain docs to str
_texts = []
for i in range(len(texts)):
    _texts.append(texts[i].page_content)

In [None]:
texts[0]

Document(page_content='OpenAI is an American artificial intelligence (AI) research organization consisting of the non-profit OpenAI, Inc.[4] registered in Delaware and its for-profit subsidiary OpenAI Global, LLC.[5] OpenAI researches artificial intelligence with the declared intention of developing "safe and beneficial" artificial general intelligence, which it defines as "highly autonomous systems that outperform humans at most economically valuable work".[6] OpenAI has also developed several large language models, such as ChatGPT and GPT-4, as well as advanced image generation models like DALL-E 3,[7] and in the past published open-source models.[8]', metadata={'source': './docs/raw.txt'})

In [None]:
_texts[0]

'OpenAI is an American artificial intelligence (AI) research organization consisting of the non-profit OpenAI, Inc.[4] registered in Delaware and its for-profit subsidiary OpenAI Global, LLC.[5] OpenAI researches artificial intelligence with the declared intention of developing "safe and beneficial" artificial general intelligence, which it defines as "highly autonomous systems that outperform humans at most economically valuable work".[6] OpenAI has also developed several large language models, such as ChatGPT and GPT-4, as well as advanced image generation models like DALL-E 3,[7] and in the past published open-source models.[8]'

In [None]:
# Embed list of texts
embedded_texts = embeddings.embed_documents(_texts)

: 

In [None]:
# Import deps
from langchain.vectorstores import Chroma

In [None]:
# Create a chroma vectorstore from a list of documents
db = Chroma.from_documents(texts, embeddings)

In [None]:
# Perform similarity search with the query over db
docs = db.similarity_search(query, k=1)
docs

[Document(page_content='The company then distributed equity to its employees and partnered with Microsoft,[37] announcing an investment package of $1 billion into the company. Since then, OpenAI systems have run on an Azure-based supercomputing platform from Microsoft.[38][39][40]', metadata={'source': './docs/raw.txt'})]

In [None]:
# Search for documents using query vector
query_vector = embeddings.embed_query(query)
docs = db.similarity_search_by_vector(query_vector, k=1)
docs

[Document(page_content='The company then distributed equity to its employees and partnered with Microsoft,[37] announcing an investment package of $1 billion into the company. Since then, OpenAI systems have run on an Azure-based supercomputing platform from Microsoft.[38][39][40]', metadata={'source': './docs/raw.txt'})]

In [None]:
# Import deps
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

In [None]:
# Craft prompt template that works best for our LLM
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say you don't know, don't try to make up an answer.
{context}
Question: {question}
Answer:"""

In [None]:
# Context is the similar document we find in the vector db, question will be query
prompt = PromptTemplate .from_template(template)
prompt.input_variables

['context', 'question']

In [None]:
similar_doc = db.similarity_search(query, k=1)
context = similar_doc[0].page_content
context

'The company then distributed equity to its employees and partnered with Microsoft,[37] announcing an investment package of $1 billion into the company. Since then, OpenAI systems have run on an Azure-based supercomputing platform from Microsoft.[38][39][40]'

In [None]:
# Import deps
from langchain.llms import LlamaCpp

In [None]:
# Import LLM
llm = LlamaCpp(model_path=model_path)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [None]:
# Use LLM to generate an answer from the context
query_llm = LLMChain(prompt=prompt, llm=llm)
response = query_llm.run({"context": context, "question": query})
response

' Sam Altman (Co-founder and chairman)[41]\nSamuel Gregg Altman is an American venture capitalist, entrepreneur, and businessperson who has been Chairman at OpenAI since April 2015. He has over 10 years of experience in the private equity industry and has worked at a number of companies including Lightspeed Ventures, Sequoia Capital, Y Combinator and StartX.[42]\nSam is also an active investor with the firm. He was previously a co-founder of PayPal and has led investments into companies such as Reddit Inc., Airbnb Inc., Twitter Inc., Pandora Media Inc., Facebook Inc., Slack Technologies Inc., Stripe, Uber Technologies Inc., Square Inc., Palantir Technologies Inc.\nOpenAI is a research company that develops open-source artificial intelligence (AI) technologies with the goal of creating advanced AI systems and enabling their deployment in real-world applications.[4] The OpenAI project was founded in December 2015 by Sam Altman, Elon Musk, Peter Thiel and other entrepreneurs,[5][6'