# LlamaIndex / Gradient Integration

In [None]:
%pip install llama-index --quiet
%pip install gradientai --quiet

In [None]:
import os

os.environ["GRADIENT_ACCESS_TOKEN"] = "{GRADIENT_ACCESS_TOKEN}"
os.environ["GRADIENT_WORKSPACE_ID"] = "{GRADIENT_WORKSPACE_ID}"

## Example 1: Query Gradient LLM directly

In [None]:
from llama_index.llms import GradientBaseModelLLM

# You can also use a model adapter you've trained with GradientModelAdapterLLM
llm = GradientBaseModelLLM(
    base_model_slug="llama2-7b-chat",
    max_tokens=400,
)

In [None]:
result = llm.complete("Can you tell me about large language models?")
print(result)

## Example 2: Retrieval Augmented Generation (RAG) with Gradient embeddings

In [None]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.embeddings import GradientEmbedding

### Load Documents

In [None]:
documents = SimpleDirectoryReader("../paul_graham_essay/data").load_data()
print(f"Loaded {len(documents)} document(s).")

### Configure Gradient embeddings

In [None]:
embed_model = GradientEmbedding(
    gradient_access_token=os.environ["GRADIENT_ACCESS_TOKEN"],
    gradient_workspace_id=os.environ["GRADIENT_WORKSPACE_ID"],
    gradient_model_slug="bge-large",
)

service_context = ServiceContext.from_defaults(
    chunk_size=1024, llm=llm, embed_model=embed_model
)

### Setup and Query Index

In [None]:
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()

In [None]:
response = query_engine.query("What did the author do after his time at Y Combinator?")
print(response)