## Building a RAG Application with Databricks

![rag_app](./Assets/rag_app.png)

### Installing Utilities and Libraries

In [None]:
%pip install databricks-vectorsearch==0.63 openai==1.69.0 mlflow==3.0.1

### Restarting our Python Kernel

In [None]:
dbutils.library.restartPython()

### Creating the RAG Model using MLflow

In [None]:
import mlflow
from mlflow import pyfunc
from openai import OpenAI

class RAGModel(pyfunc.PythonModel):
    def __init__(self, vector_index):
        self.vector_index = vector_index
    
    def retrieve(self, query):
          results_dict = self.vector_index.similarity_search(
            query_text = query,
            columns = ["id", "content_path", "chunk"],
            num_results=10
          )

          return results_dict
    
    def chatCompletionsAPI(self, user_query, supporting_knowledge):
        openai_client = OpenAI(
            api_key = "YOUR_DATABRICKS_ACCESS_TOKEN",
            base_url = "YOUR_DATABRICKS_WORKSPACE_HOSTNAME/serving-endpoints"
        )
        
        completion = openai_client.chat.completions.create(
            model = "databricks-claude-haiku-4-5",
            messages = [
                {
                    "role": "system",
                    "content": [
                        {
                            "type": "text",
                            "text": "You are a helpful assistant. You will be passed the user query and the supporting knowledge that can be used to answer the user_query"
                        }
                    ]
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": f"user query : {user_query} and supporting knowledge: {supporting_knowledge}"
                        }
                    ]
                }
            ]
        )

        return completion.choices[0].message.content
    
    def predict(self, context, data):
          query = data["user_query"].iloc[0]
          text_data = self.retrieve(query)
          return self.chatCompletionsAPI(query, text_data)


### Fetching our Vector Index with Mosaic AI Vector Client

In [None]:
from databricks.vector_search.client import VectorSearchClient

vector_client = VectorSearchClient()

# Use fully qualified index name: catalog.schema.index_name
vector_index = vector_client.get_index(
    index_name="YOUR_UNITY_CATALOG_NAME.rag.rag_vector_index" # make sure this matches your vector index in Unity Catalog
)

### Saving Our Model

In [None]:
test_model = RAGModel(vector_index=vector_index)

In [None]:
from mlflow.models import infer_signature
import pandas as pd

# Sample input
input_example = pd.DataFrame([
    {"user_query": "Hi How are you?"}
])

# Sample output (what your model actually returns)
output_example = pd.DataFrame([
    {
        "predictions": "I am good thank you!"
    }
])

# Infer full signature (input + output)
signature = infer_signature(input_example, output_example)

model_path = "rag-model"

mlflow.pyfunc.save_model(path=model_path, python_model=test_model, signature=signature)

### Loading our Saved Model

In [None]:
# Load our custom model from the local artifact store
loaded_pyfunc_model = mlflow.pyfunc.load_model(model_path)

### Testing our Loaded/Saved Model

In [None]:
model_input = pd.DataFrame([{"user_query": "what is the carbonops ESG Intelligence Model? Give Citations too"}])

model_response = loaded_pyfunc_model.predict(model_input)

print(model_response)

### Logging our Saved Model as an Artifact

In [None]:
import mlflow

run_id = None

# Log the model as an artifact
with mlflow.start_run() as run:
    mlflow.log_artifacts(local_dir=model_path, artifact_path="rag-model")
    print(f"Model logged with run ID: {run.info.run_id}")
    run_id = run.info.run_id
     

### Registering our RAG Model in Unity Catalog

In [None]:
mlflow.register_model(f"runs:/{run_id}/rag-model", "rag-model")

### Inferencing the Deployed Real-Time Endpoint
use the below sample input when testing your endpoint from the UI

In [None]:

{
  "dataframe_split": {
    "columns": [
      "user_query"
    ],
    "data": [
      [
        "tell me something about carbonops ESG intelligence model. Also state the citations"
      ]
    ]
  }
}