In [2]:
%reload_ext autoreload
%autoreload 2

In [3]:
import json
import re, os
from pydantic import BaseModel
import cohere

from langchain.callbacks import get_openai_callback
from langchain.embeddings.huggingface import HuggingFaceEmbeddings, HuggingFaceInferenceAPIEmbeddings
from langchain.vectorstores.pgvector import PGVector
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain import hub
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank

from dotenv import load_dotenv
load_dotenv()

import utils.paper_utils as pu

## Load Embedding Store

In [11]:
CONNECTION_STRING = (
    f"postgresql+psycopg2://{pu.db_params['user']}:{pu.db_params['password']}"
    f"@{pu.db_params['host']}:{pu.db_params['port']}/{pu.db_params['dbname']}"
)
COLLECTION_NAME = 'arxiv_vectors'
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")


In [None]:
# embeddings = HuggingFaceEmbeddings(model_name="thenlper/gte-large")

embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key=HUGGINGFACE_API_KEY,
    model_name="thenlper/gte-large"
)

store = PGVector(
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
    embedding_function=embeddings,
)

In [27]:
retriever = store.as_retriever(search_type="mmr", search_kwargs={"k": 10})

In [7]:

class CustomCohereRerank(CohereRerank):
    class Config(BaseModel.Config):
        arbitrary_types_allowed = True

CustomCohereRerank.update_forward_refs()
key = os.getenv("COHERE_API_KEY")
co = cohere.Client(key)

compressor = CustomCohereRerank(client=co, top_n=3)
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)

NameError: name 'retriever' is not defined

In [4]:
rag_prompt = hub.pull("rlm/rag-prompt")

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1, max_tokens=900)

template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Provide a thorough, complete but concise answer. Try to be practical and reference any existing libraries or implementations mentioned on the documents if possible.
When providing your answer add citations referencing the relevant arxiv_codes (e.g.: *reference content* (arxiv:1234.5678)).
{context}
Question: {question}
Helpful Answer:"""
rag_prompt_custom = PromptTemplate.from_template(template)

rag_chain = (
    {"context": compression_retriever, "question": RunnablePassthrough()} 
    | rag_prompt_custom 
    | llm 
)

with get_openai_callback() as cb:
    res = rag_chain.invoke("Is there a way to extend the context length of a pre-trained LLM?")
    
print(cb)

NameError: name 'compression_retriever' is not defined

In [5]:
import utils.vector_store as vs
vs.query_llmpedia("What is the main contribution of the CodeLlama model?")

'The main contribution of the CodeLlama model is that it provides a family of large language models for code, including foundation models, Python specializations, and instruction-following models, with different parameter sizes (7B, 13B, and 34B). These models have state-of-the-art performance among open models, support large input contexts, and have the ability to perform tasks such as code generation, code completion, code translation, bug fixing, code refinement, and code question answering. The CodeLlama models also show improvements on inputs with up to 100k tokens and outperform other publicly available models on benchmarks such as HumanEval, MBPP, and MultiPL-E (arxiv:2308.12950).'