In [0]:
%pip install -U -qqq langchain_core langchain_databricks langchain_community
%restart_python

In [0]:

import pandas as pd
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_databricks import ChatDatabricks
from databricks.sdk import WorkspaceClient
import os


In [0]:
df=pd.read_csv('/Workspace/data/stratified_upsample.csv')

In [0]:
# # Save DataFrame as a table in Unity Catalog
# spark_df = spark.createDataFrame(df)
# spark_df.write.format("delta").mode("overwrite").saveAsTable("workspace.default.stratified_upsample")

In [0]:
%sql
SELECT * FROM workspace.default.stratified_upsample
-- WHERE
--   location = 'Chicago, Illinois, United States'
--   AND host_number_of_reviews > 1000
--   AND EXISTS(reviews, review -> review ILIKE '%wheelchair%')
LIMIT 5;

In [0]:
# configure workspace tokens
w = WorkspaceClient()
os.environ["DATABRICKS_HOST"] = w.config.host
os.environ["DATABRICKS_TOKEN"] = w.tokens.create(comment="for model serving", lifetime_seconds=1200).token_value

llm = ChatDatabricks(endpoint="databricks-llama-4-maverick")

def format_context(df: pd.DataFrame) -> str:
    """
    Converts the DataFrame into a JSON string to ensure all data is passed
    to the model without truncation. JSON is also a great format for structured data
    like you have in 'description_by_sections'.
    """
    return df.to_json(orient='records', indent=2)

def find_accessible_airbnb_properties(location: str) -> pd.DataFrame:
  """
  Queries the patient dataset for patients at risk of osteoporosis based on multiple risk factors.
  """
  query = f"""
    SELECT
      llm_data
    FROM workspace.default.stratified_upsample
    LIMIT 5
  """
  return format_context(spark.sql(query).toPandas())

# Define the prompt template for the LLM
prompt_template = PromptTemplate.from_template(
  """
  You are a medical assistant AI. Read the following information about ONE patient and classify the patient's osteoporosis risk level as exactly ONE of the following: Low, Medium, or High.

Important rules:
- Respond with ONE WORD ONLY: Low, Medium, or High.
- Do NOT provide any explanation, reasoning, or extra text.
- There is only ONE patient — do not describe multiple records.

The patient information includes these details:
- History of hip fracture (Yes/No)
- History of spine fracture (Yes/No)
- Other adult fractures (Yes/No)
- Age at fracture (number)
- Specific bones fractured (list)
- DEXA scan performed (Yes/No, with date and location if available)
- Fragility fracture after age 45 (Yes/No)
- Low-trauma fracture at younger age (Yes/No)
- Menopause status (Yes/No, age of onset, type if specified)
- Ovaries removed (Yes/No)
- Family history of osteoporosis (Yes/No)
- Parental history of hip fracture (Yes/No)
- Current smoking status (Yes/No)
- History of smoking (Yes/No)
- Excessive alcohol intake (Yes/No)
- History of falls (Yes/No)
- High-calcium diet (Yes/No)
- Calcium supplements (Yes/No)
- Vitamin D supplements (Yes/No)
- Long-term steroid use (Yes/No)
- Estrogen therapy (Yes/No or details)
- Osteoporosis medications taken (Yes/No)
- SSRI usage (Yes/No)
- PPI usage (Yes/No)
- Rheumatoid arthritis (Yes/No)
- Hyperthyroidism (Yes/No)
- Crohn’s or celiac disease (Yes/No)
- Kidney disease or dialysis (Yes/No)
- COPD (Yes/No)
- HIV/AIDS (Yes/No)
- Depression (Yes/No)
- Diabetes (Yes/No)
- Recent weight loss (Yes/No)
- Height loss (Yes/No)
- Spine surgery (Yes/No)
- Hip surgery (Yes/No)
- Gastric surgery (Yes/No)
- Currently pregnant (Yes/No or unspecified)

PATIENT INFORMATION:
  Here is the patient data:
  {context}
  """
)

llm = ChatDatabricks(endpoint="databricks-llama-4-maverick")

# This is our simple "agentic" chain
chain = (
    find_accessible_airbnb_properties
    | prompt_template
    | llm
    | StrOutputParser()
)

# Let's run the chain for Chicago!
# result = chain.invoke("Chicago")

# print(result)

In [0]:
# Let's run the chain for Chicago!
result = chain.invoke("The patient has reported a history of hip fracture Yes, spine fracture No, and other adult fractures No, with the fracture occurring at the age of 50. The specific bone that was fractured was the Hip (Proximal Femur). A DEXA scan has previously been performed Yes, with the most recent scan details noted as 2020 at General Hospital. The patient has experienced a fragility fracture after age 45 Yes and a low-trauma fracture at a younger age No. In terms of reproductive history, the patient is currently in menopause , which began at the age of , and is classified as  menopause. The ovaries have been removed . There is a family history of osteoporosis No and a parental history of hip fracture Yes. The patient's smoking status is currently No, with a history of smoking described as No. Alcohol intake is noted as excessive: No, and there is a history of falls Yes. The patient also has a history of No. Nutritional intake includes a high-calcium diet No, along with the use of calcium supplements Yes and vitamin D supplements No. Medically, the patient has a history of long-term steroid use Yes, estrogen therapy , and has taken osteoporosis medications No. The SSRI usage for the patient is No and PPI usage is Yes. Relevant medical conditions include rheumatoid arthritis No, hyperthyroidism No, Crohn’s or celiac disease No, kidney disease or dialysis No, COPD No, HIV/AIDS No, depression No, and diabetes No. The patient has recently experienced weight loss No and height loss No, and has undergone spine surgery No or hip surgery No or gastric surgery Yes. Lastly, it is noted whether the patient is currently pregnant .")

In [0]:
result

In [0]:
# Step 1: Install Git (if not already installed)
!sudo apt-get install -y git

In [0]:
# Step 2: Configure Git
!git config --global user.name "sidak1701"
!git config --global user.email "ssidak52@gmail.com"

# Step 3: Initialize a Git repository
!git init

# Step 4: Add files to the repository
!git add .

# Step 5: Commit changes
!git commit -m "Initial commit of the notebook"

# Step 6: Push to GitHub
# Replace <GitHubRepoURL> with your GitHub repository URL
!git remote add origin https://github.com/sidak1701/DataBricks-Hackathon.git
!git branch -M main
!git push -u origin main