In [None]:
!pip install langchain chromadb sentence-transformers transformers
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python

In [6]:
import json

with open("./data/courses.json") as f:
    data = json.load(f)

print(data[0])

{'doc': '{"code": "AE102", "title": "Data Analysis and Interpretation", "department": {"name": "Aerospace Engineering", "slug": "aerospace-engineering"}, "description": "", "credits": 6, "semester": [{"year": 2023, "season": "autumn", "timetable": []}, {"year": 2023, "season": "spring", "timetable": []}], "tags": ["Theory"]}'}


In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="jinaai/jina-embeddings-v2-base-en")
# reduce vector length to 2048 and 8192 requires a ton of space/memory
embeddings.client.max_seq_length = 2048

In [None]:
import chromadb
from langchain.vectorstores import Chroma

texts = []
metadatas = []
for item in data:
  texts.append(item["doc"])

retriever = Chroma.from_texts(texts=texts, embedding=embeddings).as_retriever(
    search_kwargs={"k": 5}
)

In [None]:
!wget https://huggingface.co/TheBloke/openchat_3.5-GGUF/resolve/main/openchat_3.5.Q5_0.gguf
# !wget https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF/resolve/main/mistral-7b-v0.1.Q5_0.gguf
# !wget https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf

In [None]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path="/content/mistral-7b-v0.1.Q5_0.gguf",
    temperature=0.5,
    n_gpu_layers=40,
    n_batch=512,
    n_ctx=8192,
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
)

In [None]:
# Custom prompts which are supposed to give better results
# llama_prompt = """
# [INST]<<SYS>> You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.<</SYS>>

# Question: {question}

# Context: {context}

# Answer: [/INST]
# """

# mistral_prompt = """
# <s> [INST] You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise. [/INST] </s>
# [INST] Question: {question}
# Context: {context}
# Answer: [/INST]
# """

# prompt = mistral_prompt

In [None]:
from langchain.chains import RetrievalQA

chain = RetrievalQA.from_llm(llm=llm, retriever=retriever)

# To use custom prompts which are supposed to give better results
# from langchain.chains import LLMChain, StuffDocumentsChain
# from langchain.prompts import PromptTemplate
# llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt))
# combine_documents_chain = StuffDocumentsChain(
#       llm_chain=llm_chain,
#       document_variable_name="context",
#       document_prompt=PromptTemplate.from_template("{page_content}"),
#     )
# chain = RetrievalQA(combine_documents_chain=combine_documents_chain, retriever=retriever)

In [None]:
chain.run({"query": "What is the code of the fluid mechanics course?"})

In [None]:
with open("./data/questions.json") as f:
    questions = json.load(f)

In [None]:
answers = []
for question in questions:
    answers.append({"question": question, "answer": chain.run({"query": question})})

In [None]:
with open("./results/<MODEL-NAME>.json", "w") as f:
    json.dump(answers, f)