In [None]:
# Install pkgs
!pip install sentence_transformers openai unstructured
!pip install plotly
!pip install langchain
!pip install tiktoken
!pip install matplotlib
%pip install -Uqqq rich openai tiktoken wandb langchain unstructured tabulate pdf2image chromadb gradio faiss-gpu

Collecting sentence_transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting openai
  Downloading openai-1.3.6-py3-none-any.whl (220 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m220.9/220.9 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unstructured
  Downloading unstructured-0.11.1-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece (from sentence_transformers)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloadi

In [None]:
import os

# Setup Weights and Biases (tracer) to monitor the langchain stages - query,
# response etc.
os.environ["LANGCHAIN_WANDB_TRACING"] = "true"
os.environ["WANDB_PROJECT"] = "arxiv-paper-search"

In [None]:
import pandas as pd

from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS

path = 'language_papers.csv'

df = pd.read_csv(path)
print(df.columns)

# Use langchain CSVLoader to fetch data
loader = CSVLoader(file_path=path,source_column="title")
data = loader.load()

print(data[0])

# Get a semblence of the data
print('Number of docs = %d' %len(data))
print('Number of characters in doc %s = %d' %(data[0], len(data[0].page_content)))

Index(['id', 'title', 'abstract'], dtype='object')
page_content='id: 2311.10932\ntitle: Cognitive bias in large language models: Cautious optimism meets\n  anti-Panglossian meliorism\nabstract: Traditional discussions of bias in large language models focus on a conception of bias closely tied to unfairness, especially as affecting marginalized groups. Recent work raises the novel possibility of assessing the outputs of large language models for a range of cognitive biases familiar from research in judgment and decisionmaking. My aim in this paper is to draw two lessons from recent discussions of cognitive bias in large language models: cautious optimism about the prevalence of bias in current models coupled with an anti-Panglossian willingness to concede the existence of some genuine biases and work to reduce them. I draw out philosophical implications of this discussion for the rationality of human cognitive biases as well as the role of unrepresentative data in driving model biases.'

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_documents(data)

# Get a semblence of the data
print('Number of docs = %d' %len(texts))
print('Number of characters in doc %s = %d' %(texts[0], len(texts[0].page_content)))

Number of docs = 514
Number of characters in doc page_content='id: 2311.10932\ntitle: Cognitive bias in large language models: Cautious optimism meets\n  anti-Panglossian meliorism\nabstract: Traditional discussions of bias in large language models focus on a conception of bias closely tied to unfairness, especially as affecting marginalized groups. Recent work raises the novel possibility of assessing the outputs of large language models for a range of cognitive biases familiar from research in judgment and decisionmaking. My aim in this paper is to draw two lessons from recent discussions of cognitive bias in large language models: cautious optimism about the prevalence of bias in current models coupled with an anti-Panglossian willingness to concede the existence of some genuine biases and work to reduce them. I draw out philosophical implications of this discussion for the rationality of human cognitive biases as well as the role of unrepresentative data in driving model biases.' m

In [None]:
# Time for embeddings
# Using instructor-xl from HF rather than OpenAI embedding model.
! pip install InstructorEmbedding
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
db = FAISS.from_documents(texts, embeddings)

Collecting InstructorEmbedding
  Downloading InstructorEmbedding-1.0.1-py2.py3-none-any.whl (19 kB)
Installing collected packages: InstructorEmbedding
Successfully installed InstructorEmbedding-1.0.1


  from tqdm.autonotebook import trange


.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]

2_Dense/config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/3.15M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/66.3k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.40k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

load INSTRUCTOR_Transformer
max_seq_length  512


In [None]:
query = """What are recent studies related to RAG"""

In [None]:
os.environ["OPENAI_API_KEY"] = "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"

if os.getenv("OPENAI_API_KEY") is None:
  if any(['VSCODE' in x for x in os.environ.keys()]):
    print('Please enter password in the VS Code prompt at the top of your VS Code window!')
  os.environ["OPENAI_API_KEY"] = getpass("")

assert os.getenv("OPENAI_API_KEY", "").startswith("sk-"), "This doesn't look like a valid OpenAI API key"
print("OpenAI API key configured")

OpenAI API key configured


In [None]:
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI

def get_response_from_query(db, query, k=10):
    """
    Setting the chunksize to 1000 and k to 4 maximizes the number of tokens to analyze.
    """

    docs = db.similarity_search(query, k=k) # extremely important

    docs_page_content = " ".join([d.page_content for d in docs])

    llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k",temperature=0)

    prompt = PromptTemplate(
        input_variables=["question", "docs"],
        template="""
        You are a bot that is open to discussions about research papers related to language models. Use the synopsis of research papers provided by me. Stay truthful and if you weren't provided any resources give your opinion only.
        Answer the following question: {question}
        By searching the following articles: {docs}

        Only use the factual information from the documents. Make sure to mention key phrases from the articles.

        If you feel like you don't have enough information to answer the question, say "I don't know". Keep answer to only four bullet points.

        """,
    )

    chain = LLMChain(llm=llm, prompt=prompt)

    response = chain.run(question=query, docs=docs_page_content,return_source_documents=True)
    r_text = str(response)

    ##evaluation part

    prompt_eval = PromptTemplate(
        input_variables=["question", "docs"],
        template="""
        Your job is to evaluate if the response to a question is similar to the source given.

        for the following: {question}
        By searching the following article: {docs}

       Give a reason why they are similar or not, start with a Yes or a No.

        """,
    )

    chain_part_2 = LLMChain(llm=llm, prompt=prompt_eval)

    evals = chain_part_2.run(question=r_text, docs=docs_page_content)


    return response, docs, evals

In [None]:
answer, sources, evals = get_response_from_query(db, query, 15)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Streaming LangChain activity to W&B at https://wandb.ai/sharatsv-stanford/arxiv-paper-search/runs/s526m51m
[34m[1mwandb[0m: `WandbTracer` is currently in beta.
[34m[1mwandb[0m: Please report any issues to https://github.com/wandb/wandb/issues with the tag `langchain`.


In [None]:
print("\n\n> Question:")
print(query)
print("\n> Answer:")
print(answer)
print("\n> Eval:")
print(evals)

# # Print the relevant sources used for the answer
print("----------------------------------SOURCE DOCUMENTS---------------------------")
for document in sources:
    print("\n> " + document.metadata["source"])
    print(document.page_content[:1000])
print("----------------------------------SOURCE DOCUMENTS---------------------------")




> Question:
What are recent studies related to RAG

> Answer:
Recent studies related to RAG (Retrieval Augmentation in Large Language Models) include:

1. "Utilizing Large Language Models for Questionnaire Generation": This study proposes using large language models to generate diverse questionnaire versions in longitudinal studies. The psychometric testing showed consistent covariation between the external criterion and the LLM-generated questionnaire variants, demonstrating their reliability and validity.

2. "ChatGPT's Ability for Medical Diagnostic Reasoning": This research probes ChatGPT's ability to perform formal, probabilistic medical diagnostic reasoning using Bayes rule. The study shows that the introduction of medical variable names leads to an increase in errors made by ChatGPT, highlighting areas for future research.

3. "Overview of Current Applications of Large Language Models in Various Medical Specialties": This paper provides an overview of the latest applications o

In [None]:
# Finally use gradio to build a GUI for search
import gradio as gr

def greet(query):
    answer,sources,evals = get_response_from_query(db, query, 2)
    return answer, sources, evals

examples = [
    ["Large Language Models research"],
    ["PEFT/Fine-tuning"],
    ["Langchain"],
    ["Low ranking adaptation/LoRA"],
    ["Security related to LLMs"],
    ]

demo = gr.Interface(fn=greet, title="arxiv-search (LLMs)", inputs="text",
                    outputs=[gr.components.Textbox(lines=3, label="Response"),
                             gr.components.Textbox(lines=3, label="Source"),
                             gr.components.Textbox(lines=3, label="Evaluation")],
                   examples=examples)

demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://dff3b65a0f13066ca8.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


