## Load Data

In [None]:
from langchain.document_loaders import JSONLoader
from pathlib import Path


data_path = Path(
    "/mnt/c/Users/davis/OneDrive/Documents/grabeklis/data/lsm_articles_all_20230924.json"
)

loader = JSONLoader(
    file_path=data_path,
    jq_schema=".[]",
    text_content=False,
)

documents = loader.load()

In [None]:
documents[0]

## Embed Data

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf_embedder = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [None]:
from langchain.vectorstores import FAISS

db = FAISS.from_documents(documents[:10], hf_embedder)

In [None]:
db

## Retriever

From: https://python.langchain.com/docs/modules/data_connection/retrievers/

In [None]:
retriever = db.as_retriever()

In [None]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

# TODO: Resolve API key stuff

qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever)