In [1]:
from langchain_community.document_loaders import JSONLoader

# Load data from a local JSON file without filtering
loader = JSONLoader(
    file_path="projects_data.json",   # Path to your JSON file
    jq_schema=".",                    # Load the entire JSON
    text_content=False                # Do not wrap as Document text
)

# Load documents (this will be raw data from the JSON)
docs = loader.load()

# Check the type and content of the first document
print(type(docs[0]))  # This will likely be a dict or list, depending on your JSON structure
print(docs[0])        # This will print the raw content (e.g., dict or list) of the first document


<class 'langchain_core.documents.base.Document'>
page_content='[{"title": "Personal Portfolio Website", "start_date": "Jan 2025", "end_date": "Present", "association": "Associated with University of California, Davis", "description": "N/A", "skills": ["React.js", "Next.js", "TypeScript", "Tailwind CSS"]}, {"title": "GitHub Issue Ticket Bot", "start_date": "Feb 2025", "end_date": "Feb 2025", "association": "Associated with University of California, Davis", "description": "Created a bot that helps match users' preferences to Issue Tickets they want using LLMs", "skills": "N/A"}, {"title": "Movie Recommendation System", "start_date": "Oct 2024", "end_date": "Dec 2024", "association": "Associated with University of California, Davis", "description": "N/A", "skills": ["PyTorch", "Variational Autoencoders (VAEs)", "Python (Programming Language)"]}, {"title": "Dog Breed Image Classification", "start_date": "Mar 2024", "end_date": "Mar 2024", "association": "Associated with University of Calif

In [2]:
import json
from langchain_core.documents import Document

# Unwrap the stringified list of dictionaries
raw_data = json.loads(docs[0].page_content)


In [3]:
# Option 1: Serialize each project dictionary as a whole
doc_strings = [json.dumps(entry, ensure_ascii=False) for entry in raw_data]


In [4]:
print(doc_strings)

['{"title": "Personal Portfolio Website", "start_date": "Jan 2025", "end_date": "Present", "association": "Associated with University of California, Davis", "description": "N/A", "skills": ["React.js", "Next.js", "TypeScript", "Tailwind CSS"]}', '{"title": "GitHub Issue Ticket Bot", "start_date": "Feb 2025", "end_date": "Feb 2025", "association": "Associated with University of California, Davis", "description": "Created a bot that helps match users\' preferences to Issue Tickets they want using LLMs", "skills": "N/A"}', '{"title": "Movie Recommendation System", "start_date": "Oct 2024", "end_date": "Dec 2024", "association": "Associated with University of California, Davis", "description": "N/A", "skills": ["PyTorch", "Variational Autoencoders (VAEs)", "Python (Programming Language)"]}', '{"title": "Dog Breed Image Classification", "start_date": "Mar 2024", "end_date": "Mar 2024", "association": "Associated with University of California, Davis", "description": ">= 90% Model Accuracy", 

In [5]:
from langchain_core.documents import Document

documents = [Document(page_content=entry) for entry in doc_strings]


In [6]:
type(documents[0])

langchain_core.documents.base.Document

In [7]:
documents = [
    Document(
        page_content=json.dumps(entry, ensure_ascii=False),
        metadata={
            "title": entry.get("title", ""),
            "start_date": entry.get("start_date", ""),
            "end_date": entry.get("end_date", ""),
            "association": entry.get("association", "")
        }
    )
    for entry in raw_data
]


In [8]:
from dotenv import load_dotenv
import os

load_dotenv()

False

In [9]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model="llama3.1")

In [10]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="llama3.1")

In [11]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [12]:
print(vector_store)

<langchain_core.vectorstores.in_memory.InMemoryVectorStore object at 0x10962b500>
