In [0]:
# Install required packages
%pip install databricks-langchain databricks-vectorsearch
dbutils.library.restartPython()


In [0]:
from yaml import safe_load
from pyspark.sql.functions import col, lit, concat

with open('../params.yml', 'r') as f:
    params = safe_load(f)

CATALOG = params.get('data_params')['catalog']
SCHEMA = params.get('data_params')['schema']

df = (spark.read.option("readChangeData", "true")
      .option('startingVersion', '1')
      .table(f"{CATALOG}.{SCHEMA}.device_status_triage"))

df = df.withColumn('query_column', concat(col('device'), lit(' message: '), col('telemetry_error') ))

display(df)


In [0]:
from databricks.vector_search.client import VectorSearchClient

# Connect to Vector Search API
# token = "<your_databricks_token>"
vector_search_client = VectorSearchClient()

# Create or connect to a vector search index
vs_index = vector_search_client.get_index(
    index_name=f"{CATALOG}.{SCHEMA}.shn_device_docs_vs",
    endpoint_name="one-env-shared-endpoint-9"
)

# Define schema for the index
vector_search_schema = {
    "primary_key": "chunk_id",
    "chunk_text": "content_chunked",
    "document_uri": "doc_uri"
}

In [0]:
from databricks_langchain.vectorstores import DatabricksVectorSearch

# Turn the index into a retriever
vector_store = DatabricksVectorSearch(vs_index.name, 
                                    #   text_column="content_chunked", 
                                      columns=["chunk_id", "content_chunked", "doc_uri"]
                                      ).as_retriever(search_kwargs={"k": 5})

# Retrieve relevant documents
query = "Explain the expansion of the universe."
retrieved_docs = vector_store.get_relevant_documents(query)
for doc in retrieved_docs:
    print(f"Relevant document: {doc.metadata['doc_uri']} - {doc.page_content}")

In [0]:
def format_context(docs):
    chunk_template = "Passage: {chunk_text}\n"
    return "\n".join([
        chunk_template.format(chunk_text=d.page_content)
        for d in docs
    ])

retrieved_context = format_context(retrieved_docs)
print(retrieved_context)

In [0]:
%sql
select * from telco_demos.self_healing_networks.device_status_triage limit 1

In [0]:
from langchain_core.prompts import PromptTemplate
from pyspark.sql.functions import pandas_udf
from pyspark.sql.types import StringType

question = "Delta D750 DC Power System Message: Battery voltage below critical threshold at 10.8V triggering low power alarm."


def process_text(question):

    llm_prompt_template = PromptTemplate.from_template("""
        Please find an automated and remote fix for this device and error message: {question}. Be as concise as possible, preferably using fewer than 20 words. If such a solution exists without requiring human intervention, start the response with the phrase AUTOMATED FIX followed by a 1-sentence overview. If human intervention is necessary, start the response with the phrase HUMAN INTERVENTION REQUIRED and state a single phrase overview for the course of action needed.
        Context: {retrieved_context}
        Question: {question}
    """)

    
    prompt = llm_prompt_template.format(retrieved_context=retrieved_context, question=question)

    # Assuming you have an LLM endpoint configured
    from databricks_langchain import ChatDatabricks

    chat_model = ChatDatabricks(endpoint="databricks-llama-4-maverick")
    response = chat_model.invoke(prompt)
    return response.content


@pandas_udf('string')
def process_text_pandas_udf(dataframe):
    return dataframe.apply(process_text)

In [0]:
from pyspark.sql.functions import current_timestamp

triage_df = df.withColumn('solution', process_text_pandas_udf('query_column')).select(['tower_id', 'device_id', 'device','solution']).withColumn('event_timestamp', current_timestamp())

triage_df.display()

In [0]:
triage_df.write.mode('append').saveAsTable(f"{CATALOG}.{SCHEMA}.triaged_devices")