# Document Loaders

## CSV Loader

In [1]:
from langchain.document_loaders import CSVLoader

In [2]:
csv_loader = CSVLoader(file_path = "HP_data/titanic.csv")

In [3]:
csv_data = csv_loader.load()

In [4]:
csv_data[0]

Document(page_content='survived: 0\npclass: 3\nsex: male\nage: 22.0\nsibsp: 1\nparch: 0\nfare: 7.25\nembarked: S\nclass: Third\nwho: man\nadult_male: True\ndeck: \nembark_town: Southampton\nalive: no\nalone: False', metadata={'source': 'HP_data/titanic.csv', 'row': 0})

## HTML Loader

In [5]:
from langchain.document_loaders import UnstructuredHTMLLoader

In [6]:
loader = UnstructuredHTMLLoader(file_path = "HP_data/harry_potter_html/001.html")

In [7]:
data = loader.load()

## Markdown Loader

In [8]:
from langchain.document_loaders import UnstructuredMarkdownLoader

In [9]:
md_filepath = 'HP_data/harry_potter_md/001.md'

In [10]:
md_loader = UnstructuredMarkdownLoader(file_path=md_filepath)

In [11]:
md_data = loader.load()
md_data

[Document(page_content='A Day of Very Low Probability\n\nBeneath the moonlight glints a tiny fragment of silver, a fraction of a line…\n\n(black robes, falling)\n\n…blood spills out in litres, and someone screams a word.\n\nEvery inch of wall space is covered by a bookcase. Each bookcase has six shelves, going almost to the ceiling. Some bookshelves are stacked to the brim with hardback books: science, maths, history, and everything else. Other shelves have two layers of paperback science fiction, with the back layer of books propped up on old tissue boxes or lengths of wood, so that you can see the back layer of books above the books in front. And it still isn’t enough. Books are overflowing onto the tables and the sofas and making little heaps under the windows.\n\nThis is the living-room of the house occupied by the eminent Professor Michael Verres-Evans, and his wife, Mrs. Petunia Evans-Verres, and their adopted son, Harry James Potter-Evans-Verres.\n\nThere is a letter lying on th

In [12]:
len(md_data)

1

In [13]:
print(md_data[0])

page_content='A Day of Very Low Probability\n\nBeneath the moonlight glints a tiny fragment of silver, a fraction of a line…\n\n(black robes, falling)\n\n…blood spills out in litres, and someone screams a word.\n\nEvery inch of wall space is covered by a bookcase. Each bookcase has six shelves, going almost to the ceiling. Some bookshelves are stacked to the brim with hardback books: science, maths, history, and everything else. Other shelves have two layers of paperback science fiction, with the back layer of books propped up on old tissue boxes or lengths of wood, so that you can see the back layer of books above the books in front. And it still isn’t enough. Books are overflowing onto the tables and the sofas and making little heaps under the windows.\n\nThis is the living-room of the house occupied by the eminent Professor Michael Verres-Evans, and his wife, Mrs. Petunia Evans-Verres, and their adopted son, Harry James Potter-Evans-Verres.\n\nThere is a letter lying on the living-r

## PDF Loader

In [14]:
from langchain.document_loaders import PyPDFLoader

In [15]:
pdf_filepath = "HP_data/harry_potter_pdf/hpmor-trade-classic.pdf"

In [16]:
pdf_loader = PyPDFLoader(file_path = pdf_filepath)

In [17]:
data = pdf_loader.load()
data

[Document(page_content='Harry Potter and the Methods of Rationality', metadata={'source': 'HP_data/harry_potter_pdf/hpmor-trade-classic.pdf', 'page': 0}),
 Document(page_content='', metadata={'source': 'HP_data/harry_potter_pdf/hpmor-trade-classic.pdf', 'page': 1}),
 Document(page_content='Harry Potter and the Methods of Rationality\nBy Eliezer Ydkowsky\niii', metadata={'source': 'HP_data/harry_potter_pdf/hpmor-trade-classic.pdf', 'page': 2}),
 Document(page_content='', metadata={'source': 'HP_data/harry_potter_pdf/hpmor-trade-classic.pdf', 'page': 3}),
 Document(page_content='Contents\n1 A Day of Very Low Probability 1\n2 Everything I Believe Is False 9\n3 Comparing Reality To Its Alternatives 15\n4 The Efﬁcient Market Hypothesis 21\n5 The Fundamental Attribution Error 27\n6 The Planning Fallacy 37\n7 Reciprocation 61\n8 Positive Bias 87\n9 Title Redacted, Part I 101\n10 Self-Awareness, Part II 107\n11 Omake Files 1, 2, 3 117\n12 Impulse Control 127\n13 Asking the Wrong Questions 137\

In [18]:
len(data)

1357

In [19]:
print(data[0].page_content)

Harry Potter and the Methods of Rationality


In [20]:
data[0].metadata

{'source': 'HP_data/harry_potter_pdf/hpmor-trade-classic.pdf', 'page': 0}

## Wikipedia Loader

In [21]:
from langchain.document_loaders import WikipediaLoader

In [22]:
loader = WikipediaLoader(query = 'India', load_max_docs =1)

In [23]:
wiki_data = loader.load()

In [24]:
print(wiki_data)

[Document(page_content="India, officially the Republic of India (ISO: Bhārat Gaṇarājya), is a country in South Asia.  It is the seventh-largest country by area; the most populous country as of June 2023; and from the time of its independence in 1947, the world's most populous democracy. Bounded by the Indian Ocean on the south, the Arabian Sea on the southwest, and the Bay of Bengal on the southeast, it shares land borders with Pakistan to the west; China, Nepal, and Bhutan to the north; and Bangladesh and Myanmar to the east. In the Indian Ocean, India is in the vicinity of Sri Lanka and the Maldives; its Andaman and Nicobar Islands share a maritime border with Thailand, Myanmar, and Indonesia.\nModern humans arrived on the Indian subcontinent from Africa no later than 55,000 years ago.\nTheir long occupation, initially in varying forms of isolation as hunter-gatherers, has made the region highly diverse, second only to Africa in human genetic diversity. Settled life emerged on the su

In [25]:
print(wiki_data[0])

page_content="India, officially the Republic of India (ISO: Bhārat Gaṇarājya), is a country in South Asia.  It is the seventh-largest country by area; the most populous country as of June 2023; and from the time of its independence in 1947, the world's most populous democracy. Bounded by the Indian Ocean on the south, the Arabian Sea on the southwest, and the Bay of Bengal on the southeast, it shares land borders with Pakistan to the west; China, Nepal, and Bhutan to the north; and Bangladesh and Myanmar to the east. In the Indian Ocean, India is in the vicinity of Sri Lanka and the Maldives; its Andaman and Nicobar Islands share a maritime border with Thailand, Myanmar, and Indonesia.\nModern humans arrived on the Indian subcontinent from Africa no later than 55,000 years ago.\nTheir long occupation, initially in varying forms of isolation as hunter-gatherers, has made the region highly diverse, second only to Africa in human genetic diversity. Settled life emerged on the subcontinent

In [26]:
import ssl
ssl._create_default_https_context = ssl._create_stdlib_context

In [27]:
from langchain_community.document_loaders import ArxivLoader

In [28]:
arxiv_loader = ArxivLoader(query='2402.14483', load_max_docs = 1)

In [29]:
data = arxiv_loader.load()
data

[Document(page_content='1\nMR-ARL: Model Reference Adaptive Reinforcement Learning for\nRobustly Stable On-Policy Data-Driven LQR\nMarco Borghesi, Alessandro Bosso, Giuseppe Notarstefano\nAbstract\nThis article introduces a novel framework for data-driven linear quadratic regulator (LQR) design. First, we\nintroduce a reinforcement learning paradigm for on-policy data-driven LQR, where exploration and exploitation\nare simultaneously performed while guaranteeing robust stability of the whole closed-loop system encompassing\nthe plant and the control/learning dynamics. Then, we propose Model Reference Adaptive Reinforcement Learning\n(MR-ARL), a control architecture integrating tools from reinforcement learning and model reference adaptive\ncontrol. The approach stands on a variable reference model containing the currently identified value function.\nThen, an adaptive stabilizer is used to ensure convergence of the applied policy to the optimal one, convergence of\nthe plant to the opti

In [30]:
len(data)

1

In [31]:
data[0]

Document(page_content='1\nMR-ARL: Model Reference Adaptive Reinforcement Learning for\nRobustly Stable On-Policy Data-Driven LQR\nMarco Borghesi, Alessandro Bosso, Giuseppe Notarstefano\nAbstract\nThis article introduces a novel framework for data-driven linear quadratic regulator (LQR) design. First, we\nintroduce a reinforcement learning paradigm for on-policy data-driven LQR, where exploration and exploitation\nare simultaneously performed while guaranteeing robust stability of the whole closed-loop system encompassing\nthe plant and the control/learning dynamics. Then, we propose Model Reference Adaptive Reinforcement Learning\n(MR-ARL), a control architecture integrating tools from reinforcement learning and model reference adaptive\ncontrol. The approach stands on a variable reference model containing the currently identified value function.\nThen, an adaptive stabilizer is used to ensure convergence of the applied policy to the optimal one, convergence of\nthe plant to the optim

## Load the Chat Model

In [32]:
import os 
from langchain_openai import ChatOpenAI
from langchain.globals import set_llm_cache
from langchain.cache import InMemoryCache

In [33]:
with open('openai_api_key.txt', 'r') as f:
    api_key = f.read()

os.environ['OPENAI_API_KEY'] = api_key

In [34]:
chat = ChatOpenAI()
set_llm_cache(InMemoryCache())

In [35]:
from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

In [36]:
system_template = "you are a peer reviewer"
human_template = "Read the paper with the title: '{title}'\n\nAnd Content: {content} and critically list down all the issues in the paper"

system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
prompt = chat_prompt.format_prompt(title=data[0].metadata['Title'], content = data[0].metadata['Summary'])

response = chat(messages = prompt.to_messages())
print(response.content)

  warn_deprecated(


Based on the content provided, here are some critical issues in the paper:

1. Lack of Clarity in the Presentation: The paper lacks clarity in explaining the proposed MR-ARL framework. The description of the framework is vague and can be difficult to understand for readers who are not well-versed in the subject matter.

2. Theoretical Robustness Certificates: The paper claims to provide theoretical robustness certificates against real-world perturbations, but it does not provide sufficient details or evidence to support this claim. It is important to provide a clear and detailed explanation of how these certificates are derived and validated.

3. Limited Discussion on Implementation: The paper focuses on the theoretical aspects of the proposed framework but lacks discussion on the practical implementation challenges. It would be valuable to include a section discussing potential implementation issues and how they can be addressed.

4. Validation Methodology: While the paper mentions th

#### Create a bot that can answer questions based on wikipedia articles

In [37]:
def qna_article(topic, question):
    chat = ChatOpenAI(max_tokens = 500)
    loader = WikipediaLoader(query=topic, load_max_docs = 1)
    data = loader.load()
    first_record = data[0]
    page_content = first_record.page_content
    title = first_record.metadata['title']
    summary = first_record.metadata['summary']
    user_question = question

    human_template = "Read the paper with the title: '{title}'\n\n And Content: {content}  and answer the questions {user_question} related to the article"

    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

    chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
    prompt = chat_prompt.format_prompt(title = title, content = summary, user_question = question)

    response = chat(messages = prompt.to_messages())

    return response.content


In [38]:
qna_article('India', 'How many languages are being spoken in India?')

'India is a multilingual country with hundreds of languages spoken across its diverse population. There are 22 officially recognized languages in India, including Hindi, Bengali, Telugu, Marathi, Tamil, Urdu, Gujarati, Kannada, Odia, Malayalam, Punjabi, Assamese, Maithili, Santali, Kashmiri, Nepali, Konkani, Sindhi, Dogri, Manipuri, Bodo, and Sanskrit. Additionally, there are numerous regional languages and dialects spoken throughout the country, making India one of the most linguistically diverse nations in the world.'