In [0]:
%pip install -U -qqqq mlflow-skinny langchain==0.2.16 langgraph-checkpoint==1.0.12 langchain_core langchain-community==0.2.16 langgraph==0.2.16 pydantic langchain_databricks faker unitycatalog-ai[databricks] unitycatalog-langchain[databricks] 

dbutils.library.restartPython()

In [0]:
import yaml

with open('params.yml', 'r') as file:
    params = yaml.safe_load(file)

In [0]:
# from pyspark.sql.functions import col, sqrt, pow, current_date
# from unitycatalog.ai.core.databricks import DatabricksFunctionClient
# from typing import Dict

# client = DatabricksFunctionClient()

data_params = params.get('data_params')

CATALOG = data_params.get('catalog')
SCHEMA = data_params.get('schema')


In [0]:
%sql
create or replace function ${data_params.get('catalog')}.${data_params.get('schema')}.get_tower_location(tower_id STRING)
RETURNS TABLE (lat DOUBLE, lon DOUBLE)
RETURN SELECT lat, lon FROM ${data_params.get('catalog')}.${data_params.get('schema')}.sf_towers WHERE towerid = tower_id

In [0]:
%sql
create or replace table telco_demos.self_healing_networks.devices (
  device_name STRING,
  device_type STRING,
  man_location STRING,
  last_updated TIMESTAMP
);
insert into telco_demos.self_healing_networks.devices (device_name, device_type, man_location, last_updated)
values 

        ('Device1', '5G Installation', 'Volumes/telco_demos/self_healing_networks/device_mans/5G Installation Maintenance Solutions Brochure.pdf', current_timestamp()),
        ('Device2', '5G Gateway 500G', 'Volumes/telco_demos/self_healing_networks/device_mans/5G-gateway-500G-User-manual.pdf', current_timestamp()),
        ('Device3', '5G fwa02', 'Volumes/telco_demos/self_healing_networks/device_mans/5g-fwa02-product-quick-guide.pdf', current_timestamp()),
        ('Device4', '5G Network', 'Volumes/telco_demos/self_healing_networks/device_mans/5g-network-installation-maintenance-solutions-brochures-en.pdf', current_timestamp()),
        ('Device5', 'Cisco Aironet 2.4 GHz: 5 GHz MIMO 4 Element Patch Antenna', 'Volumes/telco_demos/self_healing_networks/device_mans/CiscoAironet-2.4-GHz:5-GHz-MIMO-4-Element-Patch-Antenna.pdf', current_timestamp()),
        ('Device6', 'InHandNetwoks 5G ODU2000', 'Volumes/telco_demos/self_healing_networks/device_mans/InHandNetwoks 5G ODU2000 User Manual V1.1 202305.pdf', current_timestamp()),
        ('Device7', 'InHandNetwoks ODU2002 Install', 'Volumes/telco_demos/self_healing_networks/device_mans/InHandNetwoks ODU2002 Quick Installation Guide V1.3 202305.pdf', current_timestamp()),
        ('Device8', 'RxxxV3 NG-700800 Public Safety BDA Fiber DAS QE', 'Volumes/telco_demos/self_healing_networks/device_mans/User-Manual-RxxxV3-NG-700800-Public-Safety-BDA-Fiber-DAS-QE-1-0-2-.pdf', current_timestamp()),
        ('Device9', 'Air 6449 Radio Description', 'Volumes/telco_demos/self_healing_networks/device_mans/air-6449-radio-description_compress.pdf', current_timestamp()),
        ('Device10', 'ATT Internet Air', 'Volumes/telco_demos/self_healing_networks/device_mans/att-internet-air-en-ug-v5.pdf', current_timestamp());

        -- OneAdvisor-800

In [0]:
# tools required: OneAdvisor-800 which helps diagnose:
# Fiber connections (using OTDR modules)
# Cable and Antenna systems (using CAA modules)
# Over-the-air testing (using spectrum analyzer modules)
# RFoCPRI, PIM detection, and interference testing

In [0]:
%sql
CREATE OR REPLACE FUNCTION telco_demos.self_healing_networks.get_nearest_fieldtech(tech_lat DOUBLE, tech_lon DOUBLE)
RETURNS TABLE (name STRING, stop_number INT, date DATE, lat double, lon double, distance DOUBLE)
return with fielddist as 
(SELECT *, SQRT(POW(lat - tech_lat, 2) + POW(lon - tech_lon, 2)) as distance 
FROM telco_demos.self_healing_networks.fieldtech_route 
WHERE date = current_date() and random_text <> 'not scheduled' )
select name, stop_number, date, lat, lon, distance from fielddist where name = (select name from fielddist order by distance asc limit 1);


In [0]:
%sql
select * from telco_demos.self_healing_networks.get_nearest_fieldtech(37.7749, -122.4194)

In [0]:

result = client.create_python_function(
  func=get_closest_fieldtech,
  catalog=CATALOG,
  schema=SCHEMA,
  replace=True
)

# print(result)
# Example usage
lat, lon = 37.7749, -122.4194  # Example coordinates
closest_fieldtech = get_closest_fieldtech(lat, lon)
display(closest_fieldtech)

In [0]:
result = client.execute_function(
    function_name=f"{CATALOG}.{SCHEMA}.get_closest_fieldtech",
    parameters={"lat": 37.7749, "lon": -122.416}
)
# lat, lon = 37.7749, -122.4194  # Example coordinates

print(result)  # Output: 45861.4 [2][4]


In [0]:
import mlflow
from mlflow.models import ModelConfig

mlflow.langchain.autolog()
config = ModelConfig(development_config="config.yml")

In [0]:
from langchain_community.chat_models import ChatDatabricks
from langchain_community.tools.databricks import UCFunctionToolkit
from databricks.sdk import WorkspaceClient

# Create the llm
llm = ChatDatabricks(endpoint=config.get("llm_endpoint"))

In [0]:
llm.invoke(f"You are a mobile provider support agent reaching out to field technician named {FIELDTECH} to resolve an issue. The telemetry says {TELEMETRY} which can cause an outage for {DEVICEFAILURE}. If the network suffers an outage that would impact up to {IMPACT} people. The documentation says the solution is {RESOLUTION}. Please communicate this to {FIELDTECH}.")

In [0]:

#     mobilelocations = mobilelocations.withColumn(col, mobilelocations[col].alias(col).comment(comment))
# <radio>	string	Network type. One of the strings GSM, UMTS, LTE or CDMA.
# <mcc>	integer	Mobile Country Code, for example 260 for Poland.
# <net>	integer	For GSM, UMTS and LTE networks, this is the Mobile Network Code (MNC).
# For CDMA networks, this is the System IDentification number (SID).
# <area>	integer	Location Area Code (LAC) for GSM and UMTS networks.
# Tracking Area Code (TAC) for LTE networks.
# Network IDenfitication number (NID) for CDMA networks.
# <cell>	integer	Cell ID (CID) for GSM and LTE networks.
# UTRAN Cell ID / LCID for UMTS networks, which is the concatenation of 2 or 4 bytes of Radio Network Controller (RNC) code and 4 bytes of Cell ID.
# Base station IDentifier number (BID) for CDMA networks.
# <unit>	integer	Primary Scrambling Code (PSC) for UMTS networks.
# Physical Cell ID (PCI) for LTE networks. An empty value for GSM and CDMA networks.
# <lon>	double	Longitude in degrees between -180.0 and 180.0
# changeable=1: average of longitude values of all related measurements
# changeable=0: exact GPS position of the cell tower
# <lat>	double	Latitude in degrees between -90.0 and 90.0
# changeable=1: average of latitude values of all related measurements
# changeable=0: exact GPS position of the tower
# <range>	integer	Estimate of cell range, in meters.
# <samples>	integer	Total number of measurements assigned to the cell tower
# <changeable>	integer	Defines if coordinates of the cell tower are exact or approximate.
# changeable=1: the GPS position of the cell tower has been calculated from all available measurements
# changeable=0: the GPS position of the cell tower is precise - no measurements have been used to calculate it.
# <created>	integer	The first time when the cell tower was seen and added to the OpenCellID database.
# A date in timestamp format:
# number of seconds since the UTC Unix Epoch of 1970-01-01T00:00:00Z
# For example 1409522613 is the timestamp for 2014-08-31T22:03:33Z.
# <updated>	integer	The last time when the cell tower was seen and update.
# A date in timestamp format:
# number of seconds since the UTC Unix Epoch of 1970-01-01T00:00:00Z
# For example 1409522613 is the timestamp for 2014-08-31T22:03:33Z.
# <averageSignal>

In [0]:
from pyspark.sql.functions import col, from_unixtime

mobilelocations = spark.table("telco_demos.self_healing_networks.mobilelocations") \
    .withColumn("created", from_unixtime(col("created")).cast("timestamp")) \
    .withColumn("updated", from_unixtime(col("updated")).cast("timestamp"))

display(mobilelocations)

In [0]:
from pyspark.sql.functions import monotonically_increasing_id, row_number
from pyspark.sql.window import Window

sf_sampled = sf_mobilelocations.orderBy(monotonically_increasing_id()).limit(10)
windowSpec = Window.orderBy(monotonically_increasing_id())
sf_sampled = sf_sampled.withColumn("deviceID", row_number().over(windowSpec))
sf_sampled.write.saveAsTable("telco_demos.self_healing_networks.sf_towers")
display(sf_sampled)

Databricks visualization. Run in Databricks to view.

In [0]:
sf_mobilelocations = spark.table("telco_demos.self_healing_networks.sf_towers")
sf_mobilelocations.display()

In [0]:
from pyspark.sql.functions import expr

sf_mobilelocations_with_id = sf_mobilelocations.withColumn("towerid", expr("uuid()"))
sf_mobilelocations_with_id.write.mode('overwrite').option('mergeSchema', 'true').saveAsTable("telco_demos.self_healing_networks.sf_towers")

In [0]:
from yaml import safe_load
with open('scenarios.yml', 'r') as scenarios:
    scenarios_data = safe_load(scenarios)

In [0]:
%sql
drop table if exists telco_demos.self_healing_networks.scenarios

In [0]:
scens = spark.createDataFrame(scenarios_data['prompts']).withColumnRenamed('_1', 'scenario')
scens.write.mode('overwrite').option('mergeSchema', 'true').saveAsTable("telco_demos.self_healing_networks.scenarios")
# .write.mode('overwrite').option('mergeSchema', 'true').saveAsTable("telco_demos.self_healing_networks.scenarios")

In [0]:
%sql
create or replace function telco_demos.self_healing_networks.get_scenario()
RETURNS STRING
RETURN select scenario from telco_demos.self_healing_networks.scenarios order by rand() limit 1

In [0]:
%sql
select telco_demos.self_healing_networks.get_scenario()

For Keerthi
Scenario should contain a device and telemetry to indicate a failure

Goal: 
This should retrieve the information to fix the issue from the VS




In [0]:
%pip install -U -qqqq databricks-agents mlflow langchain==0.2.16 langgraph-checkpoint==1.0.12  langchain_core langchain-community==0.2.16 langgraph==0.2.16 pydantic unitycatalog-ai[databricks] unitycatalog-langchain[databricks] langchain_databricks

dbutils.library.restartPython()

In [0]:
from databricks.agents.evals import generate_evals_df
from pyspark.sql.functions import concat, lit, col, monotonically_increasing_id

# Load in scripts and get a random sample to generate mock examples

device_mans = spark.table('telco_demos.self_healing_networks.device_manuals_chunked_docs').select(col('content_chunked').alias('content'), col('doc_uri'))
display(device_mans)

In [0]:
# Define the synthetic data generation

question_guidelines = """
# User Persona
- A mobile service provider field technician who finds device failures and is looking at the device manual for a fix
- An engineer who is designing protocols for preventative maintenance and faster device repair for better network uptime

# Example statements
- Dirty fiber connectors or end-faces can cause signal impairment. Use a fiber microscope to inspect end-faces. Tools like VIAVI P5000i and Fiberchek probe integrate with OneAdvisor-800 to inspect and certify that connections are clean.
- Antenna alignment can prevent RF coverage from meeting design requirements; the RF Vision tool can help you accurately perform antenna alignment.
- Oscillation Gain Reduction or Shutdown Alarms do not clear themselves and require manual reset and troubleshooting of the underlying issue
- Common Baseband Connection Problems include improper port pairing (ports must be connected in pairs: 1-2, 3-4), mixed mode within port pairs not supported, connection issues between LTE and NR baseband units, and eCPRI interface failures (25 Gbps connection)

# Additional Guidelines
- Statement should be succinct with the goal of expediting the field technician's work.
- The question should be generic, use the documents as a generalized framework to ask questions about maintenance and repairs.
- Don't talk about installations or make references to calling a vendor's sales rep.
- The question should be written in the first person
"""

agent_description = """
The Agent is generating error messages for something that has gone wrong with a mobile service provider's tower devices. The primary job is to identify what telemetry might indicate device failure and what steps need to be taken to diagnose and make the repair. 
"""

eval_df = generate_evals_df(docs=device_mans, num_evals=45, agent_description=agent_description, question_guidelines=question_guidelines)
display(eval_df)

In [0]:
# from pyspark.sql.functions import concat_ws

# spark.createDataFrame(eval_df).select(concat_ws(', ', 'expected_retrieved_context.content').alias('content')).display()

eval_df.display()

In [0]:
%sql
drop table if exists telco_demos.self_healing_networks.review_app_dataset

In [0]:
import mlflow
from mlflow.deployments import get_deploy_client
from databricks.agents import datasets

agent_endpoint = 'databricks-claude-sonnet-4'

guidelines = [
    'The retrieved content from the vendor documents must be contextually relevant to the user request.',
    'The retrieved content must be relevant to conducting device maintenance, diagnostics and repair.',
    'Avoid any talk about weapons or anything that might trigger a guardrail.'
]

request_list = [
    cont['messages'][0]['content'] for cont in eval_df["request"].tolist()
]

client = get_deploy_client()
endpoint = agent_endpoint
agent_output_list = []


In [0]:
# try:
#     print(request_list[0])
#     test_question = request_list[0]  # "How do I perform a factory reset on the device?"
#     response = client.predict(endpoint=endpoint, inputs={'messages': [{'role': 'user', 'content': test_question}]})
#     print(response['choices'][0]['message']['content'])
# except Exception as e:
#     print("An error occurred while processing the request.")
# client.predict(endpoint=endpoint, inputs={'messages': [{'role': 'user', 'content': request_list[0]}]})
for request in request_list:
    try:
        output = client.predict(
            endpoint=endpoint,
            inputs={"messages": [{"role": "user", "content": request}]}
        )
        agent_output_list.append({
            "request": request,
            "response": output['choices'][0]['message']['content']
        })
    except Exception as e:
        continue

evals = [{
    "request": output['request'],
    "response": output['response'],
    "guidelines": guidelines
} for output in agent_output_list]

uc_eval_dataset = 'telco_demos.self_healing_networks.review_app_dataset'
dataset = datasets.create_dataset(uc_eval_dataset)
evals_app = [{
    "request": {'messages': [{"role": "user", "content": output['request']}]},
    "guidelines": guidelines
} for output in agent_output_list]

dataset.insert(evals_app)

In [0]:
mlflow.langchain.autolog()
with mlflow.start_run(run_name="Field-Tech-Instruction"):
    eval_results = mlflow.evaluate(
        data=evals,
        model_type="databricks-agent",
        # extra_metrics= [script_fit_custom_metric], # Leverage defined custom metric
        evaluator_config={
            'databricks-agent': {
                "metrics": [
                "guideline_adherence", # Run the global guidelines defined in `guideliness'
                "relevance_to_query", # Check if the LLM's response is relevant to the user's query
                # "groundedness", # Capture hallucinations

                ]
            }
        }
    )
    display(eval_results.tables['eval_results'])

In [0]:
from databricks import agents

model_name = "telco_demos.self_healing_networks.field_tech_deployment_agent" # Change to a different model name if desired
uc_registered_model_info = mlflow.register_model(model_uri=logged_agent_info.model_uri, name=model_name)
# UC_MODEL_NAME = f"{catalog_name}.{schema_name}.{model_name}"
agents.deploy(model_name, uc_registered_model_info.version, tags = {"endpointSource": "playground"})

In [0]:
my_app = agents.review_app.get_review_app()
my_app.add_agent(agent_name = 'databricks-agent',
                 model_serving_endpoint='databricks-claude-sonnet-4')

In [0]:
from databricks.agents import review_app

my_app = review_app.get_review_app()
my_app.add_agent(agent_name = 'databricks-agent',
                 model_serving_endpoint='databricks-claude-sonnet-4')

In [0]:
print(my_app.url + "/chat") 

In [0]:
mlflow.set_experiment("Field-Tech-Instruction")
