In [1]:
from llama_index.llms.openai import OpenAI
from llama_index.query_engine import CitationQueryEngine
from llama_index import VectorStoreIndex, ServiceContext, StorageContext
from llama_index.response.notebook_utils import display_response
from llama_hub.semanticscholar.base import SemanticScholarReader
import openai
from IPython.display import Markdown
import os


In [2]:
from langchain.embeddings import DeepInfraEmbeddings
from langchain.llms import DeepInfra

DEEPINFRA_API_TOKEN = os.environ["DEEPINFRA_API_TOKEN"] 
embed_model = DeepInfraEmbeddings(
    model_id="sentence-transformers/multi-qa-mpnet-base-dot-v1",
    deepinfra_api_token=DEEPINFRA_API_TOKEN
)

llm = DeepInfra(model_id="meta-llama/Llama-2-70b-chat-hf")


In [3]:
service_context = ServiceContext.from_defaults(
        llm = llm,
        embed_model=embed_model
)
s2reader = SemanticScholarReader()

# narrow down the search space
query_space = "biases in large language models"

# increase limit to get more documents
documents = s2reader.load_data(query=query_space, limit=10)

index = VectorStoreIndex.from_documents(documents, service_context=service_context)

query_engine = CitationQueryEngine.from_args(
    index,
    similarity_top_k=3,
    citation_chunk_size=512,
    streaming=True,
)


In [4]:

# query the index
query_string = "explain all the biases in large language models in a markdown table"
# query the citation query engine
response = query_engine.query(query_string)


ValueError: LLM must support streaming.