In [2]:
# Import necessary libraries
import warnings
warnings.simplefilter("ignore")
import os
import glob
import pandas as pd
import json
import gradio as gr
from langchain.schema import Document
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain.document_loaders import JSONLoader
from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain


In [4]:
df = pd.read_csv('charge-capture-knowledge-base/PB_HB_CPT_gap_byCSN_my.csv')
#C:\Users\tazeb\OneDrive\AtomicHabit\LLM Engineering\LLM_Engineering\charge-capture-knowledge-base
df = df.dropna()
df.head(3)

Unnamed: 0,ReportDate,Location,Department,BillArea,PBDepartment,PBBillingProvider,PBFinClass,PBPayor,HARAcctType,HARPtClass,...,PBChgAmount,PB_CPT,HB_CPT,AttentionIndex,MatchLeft1Digit,MatchLeft2Digit,MatchLeft3Digit,MatchLeft4Digit,Expected_HB_CPT_Chgs,AvgPymt
2,9/22/2024,BELLEVUE HOSPITAL CENTER,BE ADULT BEHAV HEALTH,BE OP BEHAVIORAL HEALTH ADULT,BE ADULT BEHAV HEALTH,"LIU, YUAN, MD",Medicare Managed Care,HEALTHFIRST MCR,OP,Outpatient,...,$181,90832.0,90834,1.0,1.0,1.0,1.0,1.0,$687,$138
6,9/22/2024,BELLEVUE HOSPITAL CENTER,BE ADULT BEHAV HEALTH,BE OP BEHAVIORAL HEALTH ADULT,BE ADULT BEHAV HEALTH,"SCHLECHTER, ALAN, MD",Medicare Managed Care,UBHOPTUM HEALTH MCR,OP,Outpatient,...,$261,90846.0,90847,1.0,1.0,1.0,1.0,1.0,"$1,203",$131
15,9/22/2024,BELLEVUE HOSPITAL CENTER,BE ADULT BEHAV HEALTH,BE OP BEHAVIORAL HEALTH ADULT,BE ADULT BEHAV HEALTH,"SCHLECHTER, ALAN, MD",Medicare Managed Care,UBHOPTUM HEALTH MCR,OP,Outpatient,...,$272,90847.0,90846,1.0,1.0,1.0,1.0,1.0,"$1,213",$228


In [8]:
# Convert to JSON format (list of dictionaries)
json_data = df.to_dict(orient="records")
# Save as a single JSON file
json_file_path = "charge-capture-knowledge-base/charge_capture_data.json"
with open(json_file_path, "w", encoding="utf-8") as json_file:
    json.dump(json_data, json_file, indent=4)

print(f"Merged JSON file saved at: {json_file_path}")


Merged JSON file saved at: charge-capture-knowledge-base/charge_capture_data.json


In [12]:
###### Loading Clinical & Billing Data
##############################################################################

json_file_path = "charge-capture-knowledge-base/charge_capture_data.json"

with open(json_file_path, "r", encoding="utf-8") as file:
    json_data = json.load(file)

def create_document(entry, doc_type="charge_capture"):
    return Document(page_content=json.dumps(entry, indent=2), metadata={"doc_type": doc_type})

documents = [create_document(entry) for entry in json_data]


In [18]:
print(documents[2])

page_content='{
  "ReportDate": "9/22/2024",
  "Location": "BELLEVUE HOSPITAL CENTER",
  "Department": "BE ADULT BEHAV HEALTH",
  "BillArea": "BE OP BEHAVIORAL HEALTH ADULT",
  "PBDepartment": "BE ADULT BEHAV HEALTH",
  "PBBillingProvider": "SCHLECHTER, ALAN, MD",
  "PBFinClass": "Medicare Managed Care",
  "PBPayor": "UBHOPTUM HEALTH MCR",
  "HARAcctType": "OP",
  "HARPtClass": "Outpatient",
  "HARBillStatus": "Closed",
  "HAR": 211442508.0,
  "CSN": 139034464.0,
  "EMPI": 100470387.0,
  "MRN": 4043526.0,
  "PBServiceDate": "1/24/2024",
  "PBProcCategory": "PR MENTAL HEALTH SERVICES",
  "PBCPTDescription": "PR FAMILY PSYCHOTHERAPY W/PATIENT PRESENT 50 MINS",
  "PBChgQty": 1.0,
  " PBChgAmount ": " $272 ",
  "PB_CPT": 90847.0,
  "HB_CPT": "90846",
  "AttentionIndex": 1.0,
  "MatchLeft1Digit": 1.0,
  "MatchLeft2Digit": 1.0,
  "MatchLeft3Digit": 1.0,
  "MatchLeft4Digit": 1.0,
  " Expected_HB_CPT_Chgs ": " $1,213 ",
  " AvgPymt ": " $228 "
}' metadata={'doc_type': 'charge_capture'}


In [15]:
##### Intelligent Chunking for Clinical Notes
########################################################################
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=300, separators=["\n\n", ".", " "])
chunks = text_splitter.split_documents(documents)


In [19]:
print(chunks[2])

page_content='{
  "ReportDate": "9/22/2024",
  "Location": "BELLEVUE HOSPITAL CENTER",
  "Department": "BE ADULT BEHAV HEALTH",
  "BillArea": "BE OP BEHAVIORAL HEALTH ADULT",
  "PBDepartment": "BE ADULT BEHAV HEALTH",
  "PBBillingProvider": "SCHLECHTER, ALAN, MD",
  "PBFinClass": "Medicare Managed Care",
  "PBPayor": "UBHOPTUM HEALTH MCR",
  "HARAcctType": "OP",
  "HARPtClass": "Outpatient",
  "HARBillStatus": "Closed",
  "HAR": 211442508.0,
  "CSN": 139034464.0,
  "EMPI": 100470387.0,
  "MRN": 4043526.0,
  "PBServiceDate": "1/24/2024",
  "PBProcCategory": "PR MENTAL HEALTH SERVICES",
  "PBCPTDescription": "PR FAMILY PSYCHOTHERAPY W/PATIENT PRESENT 50 MINS",
  "PBChgQty": 1.0,
  " PBChgAmount ": " $272 ",
  "PB_CPT": 90847.0,
  "HB_CPT": "90846",
  "AttentionIndex": 1.0,
  "MatchLeft1Digit": 1.0,
  "MatchLeft2Digit": 1.0,
  "MatchLeft3Digit": 1.0,
  "MatchLeft4Digit": 1.0,
  " Expected_HB_CPT_Chgs ": " $1,213 ",
  " AvgPymt ": " $228 "
}' metadata={'doc_type': 'charge_capture'}


In [20]:
##### Embedding and Vector Store
######################################################################
embeddings = OpenAIEmbeddings()
db_name = "charge_capture_vector_db"

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [9]:





##### Embedding and Vector Store
######################################################################
embeddings = OpenAIEmbeddings()
db_name = "charge_capture_vector_db"

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)

##### Retrieval
####################################################################
llm = ChatOpenAI(temperature=0.2, model_name="gpt-4o")  # More accurate, reduced randomness
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# Configure retriever with metadata filtering (HB/PB, Procedure Dates, CPT Codes)
retriever = vectorstore.as_retriever(search_kwargs={"k": 15, "filters": {"doc_type": "billing"}})

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

# Enhanced Prompt for Charge Capture AI
query = """
You are an expert in medical billing charge capture. Your task is to analyze hospital (HB) and professional (PB) billing records.
1. Identify procedures documented in clinical notes but not billed.
2. Detect discrepancies between HB and PB charges.
3. Verify CPT codes, modifiers, and diagnoses.

Provide structured output in this format:
- Missed Transactions: [Procedure Name] - [Suggested CPT]
- Billing Discrepancies: [HB Charge] vs [PB Charge]
- Incomplete Billing: [Incorrect/Missing CPT or Modifier]
"""

def chat(question, history):
    result = conversation_chain.invoke({"question": query + "\n" + question})
    return result["answer"]

view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)


TypeError: JSONLoader.__init__() got an unexpected keyword argument 'glob'