In [1]:
import os
from dotenv import load_dotenv

In [2]:
load_dotenv()
API_KEY = os.getenv('GOOGLE_API_KEY')
os.environ["http_proxy"] = "http://127.0.0.1:2081"

In [3]:
# from langchain.chat_models import init_chat_model
# llm = init_chat_model("gemini-2.5-flash", model_provider='google_genai')

In [3]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    api_key=API_KEY
)

In [5]:
from langchain_core.messages import HumanMessage
llm.invoke([HumanMessage('hello there')])

AIMessage(content='Hello there! How can I help you today?', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--33e5f36a-92e7-472f-9094-8c54e88a2741-0', usage_metadata={'input_tokens': 3, 'output_tokens': 35, 'total_tokens': 38, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 25}})

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm
  return torch._C._cuda_getDeviceCount() > 0


In [5]:
from langchain_chroma import Chroma
vector_store = Chroma(
    collection_name='rag_rl',
    embedding_function=embeddings,
    persist_directory='../storage'
)

In [7]:
from langchain_community.document_loaders import WebBaseLoader, OnlinePDFLoader
import bs4

blog_urls = [
    "https://medium.com/@cedric.vandelaer/reinforcement-learning-an-introduction-part-1-4-866695deb4d1",
    "https://arjun-sarkar786.medium.com/reinforcement-learning-for-beginners-introduction-concepts-algorithms-and-applications-3f805cbd7f92",
    "https://thomassimonini.medium.com/an-introduction-to-deep-reinforcement-learning-17a565999c0c",
    "https://medium.com/analytics-vidhya/introduction-to-reinforcement-learning-rl-in-pytorch-c0862989cc0e"
]

pdf_urls = [
    "https://arxiv.org/pdf/cs/9605103.pdf",  # Reinforcement Learning: A Survey
    "https://arxiv.org/pdf/2312.14925.pdf",   # A Survey of Reinforcement Learning from Human Feedback
    "https://arxiv.org/pdf/2308.14328.pdf",   # Reinforcement Learning for Generative AI: A Survey
    "https://arxiv.org/pdf/2312.10256.pdf"    # Multi-agent Reinforcement Learning: A Comprehensive Survey
]

In [8]:
loader = WebBaseLoader(
    web_paths=(blog_urls[0],),
)
loader.load()[0].page_content

'Reinforcement Learning: An introduction (Part 1/4) | by Cédric Vandelaer | MediumSitemapOpen in appSign upSign inMedium LogoWriteSign upSign inReinforcement Learning: An introduction (Part 1/4)Cédric Vandelaer10 min read·Aug 20, 2022--2ListenShareHi and welcome to the first part of a series on Reinforcement Learning.Press enter or click to view image in full sizeIf you somehow ended up here without having heard of Reinforcement Learning (RL) before, then let me summarize it as follows: “RL is a general framework for training an artificial intelligence model to solve a certain task or goal” … or in layman’s terms, we make AI do cool things!The goal of this blog series is to learn about RL and simultaneously explore some of the more recent research later on. We will start from the very basics and work our way towards more advanced topics. Even if you have almost no prior programming and/or mathematics knowledge, you should be able to follow along pretty smoothly.The first mini-series wi

In [9]:
loader = OnlinePDFLoader(pdf_urls[0])
loader.load()[0].page_content



'Journal of Artificial Intelligence Research 4 (1996) 237-285 Submitted 9/95; published 5/96\n\nReinforcement Learning: A Survey\n\nLeslie Pack Kaelbling LPK@CS.BROWN.EDU Michael L. Littman MLITTMAN @Cs .BROWN.EDU Computer Science Department, Box 1910, Brown University\n\nProvidence, RI 02912-1910 USA\n\nAndrew W. Moore AWM@CS.CMU.EDU Smith Hall 221, Carnegie Mellon University, 5000 Forbes Avenue Pittsburgh, PA 15213 USA\n\nAbstract\n\nThis paper surveys the field of reinforcement learning from a computer-science per- spective. It is written to be accessible to researchers familiar with machine learning. Both the historical basis of the field and a broad selection of current work are summarized. Reinforcement learning is the problem faced by an agent that learns behavior through trial-and-error interactions with a dynamic environment. The work described here has a resemblance to work in psychology, but differs considerably in the details and in the use of the word “reinforcement.” The 

In [8]:
# Step: Ingesting data from Medium blogs and PDF papers on Reinforcement Learning
blog_docs = []
for url in blog_urls:
    loader = WebBaseLoader(url)
    blog_docs.extend(loader.load())

pdf_docs = []
for url in pdf_urls:
    loader = OnlinePDFLoader(url)
    pdf_docs.extend(loader.load())



In [9]:
# Preprocess medium blogs
from langchain.schema import Document
import re

def clean_medium_content(doc: Document) -> Document:
    text = doc.page_content
    # Remove common Medium UI/boilerplate
    lines = text.split('\n')
    cleaned_lines = []
    skip_patterns = [
        r'Sitemap|Open in app|Sign up|Sign in|Medium Logo|Write|Listen|Share',
        r'Press enter or click to view image in full size',
        r'followers|following|Responses \(\d+\)|See all responses|Help|Status|About|Careers|Press|Blog|Privacy|Rules|Terms|Text to speech',
        r'Written by .*?Medium',  # Author footer
        r'^\s*$'  # Empty lines
    ]
    for line in lines:
        for pattern in skip_patterns:
            line = re.sub(pattern, '', line, flags=re.IGNORECASE)
        # Then apply the length check and append if it passes
        if len(line.strip()) > 20 and not re.match(r'^--?\d+$', line.strip()):
            cleaned_lines.append(line.strip())

    cleaned = ' '.join(cleaned_lines)
    # Remove extra spaces/multiple newlines
    cleaned = re.sub(r'\s+', ' ', cleaned)

    return Document(page_content=cleaned.strip(), metadata=doc.metadata)

cleaned_blog_docs = []
for doc in blog_docs:
    cleaned_blog_docs.append(clean_medium_content(doc))

In [10]:
cleaned_blog_docs[0]

Document(metadata={'source': 'https://medium.com/@cedric.vandelaer/reinforcement-learning-an-introduction-part-1-4-866695deb4d1', 'title': 'Reinforcement Learning: An introduction (Part 1/4) | by Cédric Vandelaer | Medium', 'description': 'Reinforcement Learning: An introduction (Part 1/4) Hi and welcome to the first part of a series on Reinforcement Learning. If you somehow ended up here without having heard of Reinforcement Learning …', 'language': 'en'}, page_content='Reinforcement Learning: An introduction (Part 1/4) | by Cédric Vandelaer | MediumReinforcement Learning: An introduction (Part 1/4)Cédric Vandelaer10 min read·Aug 20, 2022--2Hi and welcome to the first part of a series on Reinforcement Learning.If you somehow ended up here without having heard of Reinforcement Learning (RL) before, then let me summarize it as follows: “RL is a general framework for training an artificial intelligence model to solve a certain task or goal” … or in layman’s , we make AI do cool things!Th

In [11]:
# Preprocessing function
def clean_pdf_content(doc: Document) -> Document:
    text = doc.page_content

    # Remove metadata (journal, authors, copyright)
    text = re.sub(r"Journal of Artificial Intelligence Research.*?\n", "", text, flags=re.DOTALL)
    text = re.sub(r"Leslie Pack Kaelbling.*?(?=\nAbstract)", "", text, flags=re.DOTALL)
    text = re.sub(r"©1996 AI Access Foundation.*?\n", "", text)

    # Remove references section
    text = re.sub(r"References\n.*", "", text, flags=re.DOTALL)

    # Remove figure and table captions
    text = re.sub(r"Figure \d+:.*?\n", "", text)
    text = re.sub(r"Table \d+:.*?\n", "", text)

    # Remove inline citations
    text = re.sub(r"\(\w+ et al., \d{4}\)", "", text)
    text = re.sub(r"\(\w+, \d{4}\)", "", text)

    # Remove footnotes
    text = re.sub(r"\d+\.\s.*?\n", "", text)

    # Normalize special characters and line breaks
    text = re.sub(r"\s+", " ", text).strip()

    # Return a new Document object with the cleaned text and original metadata
    return Document(page_content=text, metadata=doc.metadata)

cleaned_pdf_docs = []
for doc in pdf_docs:
    cleaned_pdf_docs.append(clean_pdf_content(doc))

In [12]:
cleaned_pdf_docs[0]

Document(metadata={'source': '/tmp/tmp5wv6xw6u/tmp.pdf'}, page_content='Reinforcement Learning: A Survey Abstract This paper surveys the field of reinforcement learning from a computer-science per- spective. It is written to be accessible to researchers familiar with machine learning. Both the historical basis of the field and a broad selection of current work are summarized. Reinforcement learning is the problem faced by an agent that learns behavior through trial-and-error interactions with a dynamic environment. The work described here has a resemblance to work in psychology, but differs considerably in the details and in the use of the word “reinforcement.” The paper discusses central issues of reinforcement learning, including trading off exploration and exploitation, establishing the foundations of the field via Markov decision theory, learning from delayed reinforcement, constructing empirical models to accelerate learning, making use of generalization and hierarchy, and coping 

In [13]:
# Combine all documents
all_documents = cleaned_blog_docs + cleaned_pdf_docs

# Print summary to verify
print(f"Loaded {len(all_documents)} documents.")

Loaded 8 documents.


In [14]:
all_documents

[Document(metadata={'source': 'https://medium.com/@cedric.vandelaer/reinforcement-learning-an-introduction-part-1-4-866695deb4d1', 'title': 'Reinforcement Learning: An introduction (Part 1/4) | by Cédric Vandelaer | Medium', 'description': 'Reinforcement Learning: An introduction (Part 1/4) Hi and welcome to the first part of a series on Reinforcement Learning. If you somehow ended up here without having heard of Reinforcement Learning …', 'language': 'en'}, page_content='Reinforcement Learning: An introduction (Part 1/4) | by Cédric Vandelaer | MediumReinforcement Learning: An introduction (Part 1/4)Cédric Vandelaer10 min read·Aug 20, 2022--2Hi and welcome to the first part of a series on Reinforcement Learning.If you somehow ended up here without having heard of Reinforcement Learning (RL) before, then let me summarize it as follows: “RL is a general framework for training an artificial intelligence model to solve a certain task or goal” … or in layman’s , we make AI do cool things!T

In [15]:
# Chunk contents of the blog
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,  # Increased to capture more context in academic text
    chunk_overlap=300,  # Increased overlap to preserve technical terms and context
    separators=["\n\n", "\n", ".", " ", ""],  # Respect paragraph and sentence boundaries
    keep_separator=True  # Retain separators to maintain readability
)

# Split the preprocessed documents
all_splits = text_splitter.split_documents(all_documents)

In [16]:
# Index chunks
_ = vector_store.add_documents(documents=all_splits)

In [17]:
vector_store.get()

{'ids': ['6cf7cc01-3426-424c-a081-21682395d956',
  '821a0748-6b34-45b1-91ca-86b2f7e7d5cd',
  '27c32eed-f4a5-4c93-af59-ddf42cb4c154',
  '32f0bb12-13f9-4f24-ba26-7ce70ce60447',
  '0248c9ff-77d7-4063-8f70-13852a89664c',
  'ac024acd-bea8-4222-9ccd-dce1e1b8d00d',
  'a9477599-843f-45b8-92ed-1f71968f56a3',
  '88f20146-27b3-4be4-bc21-30816a8bd250',
  'faf595c5-6cb6-40c7-97f8-adddfa29dc58',
  'caedd48b-ee44-4f9e-ad75-9830d7061699',
  '334972fa-053d-4a76-a0b4-256f86c3356f',
  'a2c33f88-f122-4e98-b641-1a4caaef01fc',
  'cdf24552-5528-472b-bb2b-daaa621f83ef',
  'fe37d719-2249-4349-b618-ce6c52ec3675',
  '444ab40c-752d-4852-853e-a75cf7ee01d9',
  'a4a285f5-78d4-41cc-86c2-c8d33c43779a',
  '72b97d1f-25b1-4526-af0c-d96fe8dcfc49',
  '52da0d24-fa98-4ce3-add0-195fdc10f46e',
  'cdf61d39-f105-4ab5-936c-af33dc5c8534',
  '12ee80e6-2969-407a-85ac-15155eb208c8',
  '9b7a67b1-a6f3-4da7-bea1-2f5904f6d875',
  '6936a3db-60a5-4d25-a942-0d00067b3a3f',
  '46df795a-a0a7-4a82-9c9d-1ea68d4bfd8f',
  'cf634165-cf42-46ff-b187-