## Defining imports and environment variables

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

True

## Selecting model

In [2]:
from langchain.chat_models import init_chat_model

llm = init_chat_model(model="gpt-5-nano", model_provider="openai")

## Selecting embedding model

In [3]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

## Selecting vector store

In [4]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embedding=embeddings)

### CV RAG

In [5]:
from langchain import hub
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

In [6]:
markdown_path = "./sources/cv.md"
loader = UnstructuredMarkdownLoader(markdown_path)

data = loader.load()
print(f"Total documents: {len(data)}")

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(data)
print(f"Total chunks: {len(all_splits)}")

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

KeyboardInterrupt: 

## Testing of the graph

In [None]:
question = "Who is Jose Fernando Gonzales?"
response = graph.invoke({"question": question})
print(response)

In [None]:
print(f"{response['answer']}")

In [None]:
question = "How long is his experience?"
response = graph.invoke({"question": question})
print(f"{response['answer']}")

In [None]:
question = "How long is his experience as a Technical Solutions Engineer?"
response = graph.invoke({"question": question})
print(f"{response['answer']}")

In [None]:
question = "Is he married?"
response = graph.invoke({"question": question})
print(f"{response['answer']}")

In [None]:
question = "What his biggest projects in the recent years?"
response = graph.invoke({"question": question})
print(f"{response['answer']}")

In [None]:
question = "What companies has he worked for?"
response = graph.invoke({"question": question})
print(f"{response['answer']}")

In [None]:
response

---

# Part-by-part testing of modules and adapters

In [1]:
from core.config import Settings

settings = Settings()

In [2]:
# Verify that the database URL is correctly set from the environment variable
print(f"Using database URL: {settings.get_database_url()}")

Using database URL: postgresql+psycopg://solvin:Solvin2025!@localhost:5432/rag


In [3]:
from modules.rag.adapters import get_llm, get_embeddings, get_vector_store

In [4]:
get_vector_store.cache_clear()

In [6]:
llm = get_llm()
embeddings = get_embeddings()
vector_store = get_vector_store()

In [None]:
from langchain import hub
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

In [None]:
markdown_path = "./sources/cv.md"
loader = UnstructuredMarkdownLoader(markdown_path)

data = loader.load()
print(f"Total documents: {len(data)}")

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(data)
print(f"Total chunks: {len(all_splits)}")

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

Total documents: 1
Total chunks: 22


In [None]:
question = "Who is Jose Fernando Gonzales?"
response = graph.invoke({"question": question})
print(response["answer"])

{'question': 'Who is Jose Fernando Gonzales?', 'context': [Document(id='f99960e2-133d-46c7-bf03-211aef37f470', metadata={'source': './sources/cv.md'}, page_content='Jose Fernando A. Gonzales\n\nTechnical Solutions Engineer\n\nContact Information\n\n📞 +63 936 505 5435\n\n📧 josefernando.a.gonzales@gmail.com\n\n🌐 LinkedIn\n\n💻 Github\n\n📍 Makati City, Philippines\n\nProfessional Summary'), Document(id='2e44f2a4-59be-4c0b-80a0-58cee7cde8e9', metadata={'source': './sources/cv.md'}, page_content='Collaborated with developers to ensure system requirements were met, enhancing system functionality and user satisfaction\n\nMain Projects in EightD Corporation\n\nNepal AFCS Implementation\n\nTechnical Business Analyst\n\nCollaborated with stakeholders to elicit requirements, resulting in comprehensive functional and technical specifications for system developers\n\nActively participated in Sprint Planning, creating Jira Tickets that accurately reflected user stories\n\nServed as the Project Owner 