In [1]:
from getpass import getpass
GOOGLE_API_KEY = getpass()

BASE_URL = 'https://www.noobscience.in/blog/'
BLOGS = ['awesome-functions', 'letting-things-sit', 'stability', 'vim', 'consistency']

 ········


In [2]:
!pip install langchain-community tqdm langchain-google-genai ipywidgets langchain-text-splitters lxml



In [3]:
from langchain_community.document_loaders import WebBaseLoader

blog_urls = [(BASE_URL + blog) for blog in BLOGS]

loader = WebBaseLoader(web_paths=blog_urls)

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
docs = []
async for doc in loader.alazy_load():
    docs.append(doc)

assert len(docs) == len(blog_urls)

Fetching pages: 100%|#####################################################################################| 5/5 [00:02<00:00,  2.20it/s]


In [5]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", google_api_key=GOOGLE_API_KEY)

In [6]:
len(embeddings.embed_query("What are some awesome fish shell functions?"))

768

In [7]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

In [8]:
all_splits[0]

Document(metadata={'source': 'https://www.noobscience.in/blog/awesome-functions', 'title': '\n            Awesome Fish functions\n        ', 'description': 'Hi! I am Ishan. I am a Student and Open Source Enthusiast. This is my personal portfolio website. I Like to Code Stuff. Check out my work!', 'language': 'en'}, page_content='Awesome Fish functions\n        \n\n\n    <><NoobScience%projects?blog!now+stuff@contact    Awesome Fish functions   Some awesome fish functions that I have accumalated over the years.  \nPosted on Fri Jan 03 2025       I use fish shell as my default shell and I have recently started writing and accumulating some fish shell functions that you might like\nfor your own config.\nSo here are some of them:\nAutomagical Tmux Session\nfunction tm\n    set session_name (basename (pwd))\n\n    if tmux has-session -t $session_name\n        echo "Tmux session \'$session_name\' already exists."\n        tmux attach-session -t $session_name\n    else\n        tmux new-sessi

In [9]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [10]:
vector_store.add_documents(documents=all_splits)

['d760bbc4-953f-4555-94a6-d77ad8a2e604',
 '22cddee7-c56a-4fc8-bd2f-35669c780366',
 '9ee4f638-2396-4b56-9e1e-fae426704853',
 '4d6d5c0d-d318-4b75-aa80-7ec55126689d',
 '53691820-1e46-4905-ba8e-7ca287b029e5',
 '11b0321e-317a-432a-b2e3-e31388ba506f',
 '1b700b45-6e55-4e15-9490-30cb857c9958',
 '2f5971b2-4a53-4d11-97fc-1c49f316b266',
 'e36c8db9-2040-4ee6-9278-74d491822180',
 '29c3d90a-bd7f-4def-b0a9-d5cc46975811',
 'bf0a0f46-5a06-408c-ae3a-76833bb460b9',
 'c51b3962-6cb1-44d8-8c03-c856c98ede96',
 '39ec22db-cb43-4d94-ade0-92a9e50b92a1',
 'ff49f70a-7284-4000-b007-82a10a88129f',
 '06cc379b-7da5-4afd-9c8e-647a529c1665',
 'f52869ce-bf86-4af4-9f66-0216b76f826d',
 '53eafe68-6085-4dc3-afaa-7a911113f264',
 '2578f49c-fef9-42d6-bcc8-a6a3a00d48bf',
 'ca342159-89b9-4f09-9c1e-4e5bfb67b7a5',
 '4b2ab3ff-9a10-4fc2-acc2-ebe6ef6d459f',
 '0abcf6f4-67fd-4d3c-aa7f-1da3b9a4787d']

In [21]:
results = vector_store.similarity_search_with_score(query="My 2025 resolutions",k=1)
for doc, score in results:
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

* [SIM=0.656170] 🪑NoobScience%projects?blog!now+stuff@contact    Letting Things Sit   My resolution for 2025  
Posted on Thu Jan 02 2025       I have a very bad habit of not letting things sitting that I want to try getting rid of in 2025.
By letting things sit I mean consistently sticking to the things that I decide to do and not change it.
It means actually having patience and giving it time.
The most annoying example I can think of that comes under this category is my habit of changing my neovim config way too often.
So often that I feel that the very purpose of customizing my neovim config is lost.
Something feels like ours only when you give it time.
This applies not only to my editor example, but also to relationships, electronics and most importantly routines.
The art of maintaining a routine has been ever elusive to me and I believe that I finally know why: Consistency and Predictability.
I think I am finally at a stage where I can stop experimenting and stick to something. [{'

In [12]:
retriever = vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
)
retriever.invoke("My 2025 resolution")

[Document(id='11b0321e-317a-432a-b2e3-e31388ba506f', metadata={'source': 'https://www.noobscience.in/blog/letting-things-sit', 'title': '\n            Letting Things Sit\n        ', 'description': 'Hi! I am Ishan. I am a Student and Open Source Enthusiast. This is my personal portfolio website. I Like to Code Stuff. Check out my work!', 'language': 'en'}, page_content='🪑NoobScience%projects?blog!now+stuff@contact    Letting Things Sit   My resolution for 2025  \nPosted on Thu Jan 02 2025       I have a very bad habit of not letting things sitting that I want to try getting rid of in 2025.\nBy letting things sit I mean consistently sticking to the things that I decide to do and not change it.\nIt means actually having patience and giving it time.\nThe most annoying example I can think of that comes under this category is my habit of changing my neovim config way too often.\nSo often that I feel that the very purpose of customizing my neovim config is lost.\nSomething feels like ours onl

In [13]:
from langchain_core.globals import set_llm_cache
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.caches import InMemoryCache

llm = GoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=GOOGLE_API_KEY)

set_llm_cache(InMemoryCache())

In [14]:
llm.invoke("Hello! Who are you?")

'I am a large language model, trained by Google.\n'

In [15]:
from langchain_core.prompts import PromptTemplate
template = """Answer the following question to a user {question} 
with the following context {context}"""

prompt = PromptTemplate.from_template(template)

In [16]:
from typing import List, Any

class CustomBlogRAG:
    """
    CustomBlogRAG makes the whole lang chain come together
    """
    def __init__(self, embeddings, vector_store, llm, prompt):
        self.embeddings = embeddings
        self.vector_store = vector_store
        self.llm = llm
        self.prompt = prompt

    def search(self, query: str) -> List[Any]:
        ret = self.vector_store.similarity_search_with_score(query=query,k=1)
        return [a[0] for a in ret if a[1] > 0.5] 
    
    def ask(self, question: str)->str:
        rel_docs = self.search(question)
        context = "\n\n".join(doc.page_content for doc in rel_docs)
        print(f'Using content of {",".join([doc.metadata['source'] for doc in rel_docs])} ')
        final_prompt = self.prompt.invoke({"question": question, "context": context})
        response = self.llm.invoke(final_prompt)
        
        return response

In [17]:
rag = CustomBlogRAG(embeddings, vector_store, llm, prompt)

In [19]:
print(rag.ask("What is my 2025 resolution"))

Using content of https://www.noobscience.in/blog/letting-things-sit 
Your 2025 resolution is to **stop changing things so often and to be more consistent, especially with routines, relationships, electronics, and your neovim configuration.** You want to develop patience and "let things sit."

