# Inspirational Developer Helper: A RAG System

In [8]:

import os
from pprint import pprint
from typing import List
import faiss
from huggingface_hub import InferenceClient
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


from IPython.core.formatters import BaseFormatter
from IPython.display import display, HTML

class MultilineStringFormatter(BaseFormatter):
    def __call__(self, obj):
        if isinstance(obj, str) and '\n' in obj:
            return f'<pre>{obj}</pre>'
        return None

# Register the custom formatter
ip = get_ipython()
ip.display_formatter.formatters['text/html'].for_type(str, MultilineStringFormatter())



<__main__.MultilineStringFormatter at 0x780c954e3770>

In [10]:
# set up FAISS index

def prep_documents(pdf_path: str) -> List[Document]:
    loader = PyPDFLoader(pdf_path)
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_documents(docs)
    return chunks
    
# would need to change these to match your local file system store for data
pdf_path = "../data/raw_documents/The Pragmatic Programmer, 20th Anniversary Edition.pdf"
index_path = "../data/faiss_index"

embedding_function = HuggingFaceEmbeddings()
docs = prep_documents(pdf_path)
vectorstore = FAISS.from_documents(
    docs, 
    embedding_function
)

# Save the vector store
vectorstore.save_local(index_path)


In [11]:
# load existing faiss index

vectorstore = FAISS.load_local(
       index_path, embedding_function, allow_dangerous_deserialization=True
   )


In [13]:

PROMPT_TEMPLATE = """
Use the following pieces of information enclosed in <context> tags to provide an answer to the question enclosed in <question> tags. 
<context>
{context}
</context>
<question>
{question}
</question>
"""

q = """What is the best choice for a developer to get reinspired in their career"""

# retrieve context

retrieved_docs = vectorstore.similarity_search(query=q, k=5)
retrieved_content = [(d.page_content) for d in retrieved_docs]
context = "\n".join([r for r in retrieved_content])
prompt = PROMPT_TEMPLATE.format(context=context, question=q)


In [17]:
# remote llm
remote_chat_completion_llm = "meta-llama/Llama-3.1-8B-Instruct"
llm_client = InferenceClient(remote_chat_completion_llm)

response = llm_client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": prompt
        }
    ],
)

pprint(response.choices[0].message.content)

('According to the text, the best choice for a developer who feels stuck in '
 'their career is to "try to fix it" by making a change, rather than simply '
 'hoping things will get better. This can involve changing their organization, '
 'learning new skills, or seeking out new opportunities. The text suggests '
 'that developers have agency and control over their careers, and that they '
 'should take proactive steps to address their frustrations and create the '
 'career they want.\n'
 '\n'
 'Specifically, the text suggests the following options:\n'
 '\n'
 "* Try a sophisticated IDE with cutting-edge features if you've only used "
 'makefiles and an editor\n'
 "* Read news and posts online on technology different from what you're "
 'currently working with\n'
 '* Take classes or participate in local user groups and meetups to learn new '
 'skills and stay current\n'
 '* Experiment with different environments, such as switching from Windows to '
 'Linux\n'
 '* Seek out new opportuniti

In [15]:
# call model with context: local
LOCAL_LLM_MODEL = os.getenv('LLM_MODEL', 'gemma3n')
local_OLLAMA_HOST = os.getenv('OLLAMA_HOST', 'http://localhost:11434')

from openai import OpenAI

llm_client = OpenAI(
    base_url = 'http://localhost:11434/v1',
    api_key='ollama', # required, but unused
)


response = llm_client.chat.completions.create(
  model=LOCAL_LLM_MODEL,
    messages=[
            {
                "role": "user",
                "content": prompt
            }
        ],
)
pprint(response.choices[0].message.content)


('According to the provided text, when a developer feels stuck in their '
 'career, the best choice is to **"Why can’t you change it?"** This means '
 'being proactive and taking control of the situation. The text suggests '
 'several ways to do this:\n'
 '\n'
 '*   **Fix it:** If your work environment is unsatisfactory, try to improve '
 'it.\n'
 '*   **Invest in yourself:** Dedicate time to learn new technologies and stay '
 'current, even outside of work hours.\n'
 '*   **Seek new opportunities:** If necessary, find a new job with a better '
 'environment or explore remote work options.\n'
 '*   **Take courses:** Enroll in classes to gain new skills.\n'
 '*   **Participate in user groups and meetups:** Network with other '
 'developers to stay informed and avoid isolation.\n'
 '*   **Experiment with different environments:** Try new technologies and '
 'approaches to broaden your skillset.\n'
 '\n'
 "The core message is that developers have agency and shouldn't passively "
 'accept 

# Potential Next Steps
1. Evaluation

    a. Build an evaluation dataset for what answers are good enough

   
    b. Set up tracking on runtime and memory usage

   
    c. Evaluate other model and parameter choices to keep baseline performance and optimize on memory and time costs

2. In particular I would like to play around with CoRAG

3. Move from a notebook interface into files and functions and a flask app

4. Wrap everything using smolagents and add some tooling and routing to build out a little agent

1. Evaluation

    a. Build an evaluation dataset for what answers are good enough

   
    b. Set up tracking on runtime and memory usage

   
    c. Evaluate other model and parameter choices to keep baseline performance and optimize on memory and time costs

         In particular I would like to play around with CoRAG and some complicated asks

3. Move from a notebook interface into files and functions and a flask app

4. Wrap everything using smolagents and add some tooling and routing to build out a little agent

5. Use langchain to cleanup the manual steps

6. Create a security harness