## Keys

In [None]:
import os
os.environ["AZURE_OPENAI_API_KEY"] =
os.environ["AZURE_OPENAI_ENDPOINT"] =
os.environ["PINECONE_API_KEY"] =

## Extracting CPT data

In [None]:
import requests

url = 'https://www.cdc.gov/nhsn/xls/cpt-pcm-nhsn.xlsx'
target_path = 'CPT-cdc-2025.xlsx'

response = requests.get(url, stream=True)
if response.status_code == 200:
    with open(target_path, 'wb') as f:
        f.write(response.raw.read())

In [None]:
# read csv file
import pandas as pd

df = pd.read_excel(target_path, 'ALL 2025 CPT Codes')

In [None]:
df = df[['CPT Codes', 'Procedure Code Descriptions']]
df.columns = ['Code', 'Title']

In [None]:
df = df.loc[~df.Code.isna(), :]

## Indexing data on Pinecone

In [None]:
!pip install langchain_community pinecone langchain_pinecone tiktoken --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m427.3/427.3 kB[0m [31m18.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m39.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m35.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m38.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m415.1/415.1 kB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.5/87.5 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

In [None]:
from langchain.docstore.document import Document
from langchain_community.embeddings import (
    AzureOpenAIEmbeddings,
    HuggingFaceEmbeddings,
)

def embedding_func():

    # device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
    # embedding_func = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5", model_kwargs={"device": device})

    return AzureOpenAIEmbeddings(
        azure_deployment="text-embedding-3-large",
        openai_api_version="2023-05-15",
        chunk_size=2048 #much larger than the longest title
    )


def create_langchain_documents(df, target_col):
    """create langchain documents from data"""
    cols = df.columns
    return df.apply(lambda row: Document(page_content=row[target_col], metadata=row[cols].to_dict()), axis=1).tolist()

In [None]:
target_col = "Title"
docs = create_langchain_documents(df, target_col)

In [None]:
#If error, check on Pinecone
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore

pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])

index_name = "cpt-cdc-2025-text-embedding-3-large" #naming error, should say cpt, but don't want to recreate the index!!!

# if index_name in pc.list_indexes().names():
#   pc.delete_index(index_name)

_embedding_func = embedding_func()

if index_name not in pc.list_indexes().names():
  pc.create_index(
      name=index_name,
      dimension=3072,
      metric="cosine",
      spec=ServerlessSpec(
          cloud='aws',
          region='eu-west-1'
      )
  )
  vectorstore_from_docs = PineconeVectorStore.from_documents(docs, index_name=index_name, embedding=_embedding_func)
else:
  vectorstore_from_docs = PineconeVectorStore(index_name=index_name, embedding=_embedding_func)

  return AzureOpenAIEmbeddings(


## LLM Generation Setup and Prompt

In [None]:
from openai import AzureOpenAI

# Set up clients and specify the chat model
openai_client = AzureOpenAI(
     api_version="2024-06-01",
     azure_endpoint=["AZURE_OPENAI_ENDPOINT"],
     api_key=os.environ["AZURE_OPENAI_API_KEY"]
 )

deployment_name = "gpt-4o-datazone"

def get_completion(prompt, model=deployment_name):
    messages = [{"role": "user", "content": prompt}]
    response = openai_client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0)
    return response.choices[0].message.content

pream= '''
You are an expert medical assistant who extracts a comprehensive list of all procedures, treatments and other services from patient discharge summaries.
The discharge summaries are in markdown and may contain tables and additional metadata. If the input does not look like a discharge summary or some form of patient data, you politely decline the request.
Give only procedures that can be inferred solely based on the information I have given - do not use any other information.
Bear in mind that the list of procedures you give will be used by medical coders later on so make the procedures as detailed and specific as possible given the available patient data.
Before finalising your answer check if you haven't missed any procedures, treatments and other services implied by the patient data. If you did, add them to your response.
List the procedures with supporting evidence in form of a quote from the discharge summary.
Do not generate any other text, notes or disclaimers apart from the list of procedures and one supporting quote for each.
Your list should be numbered and every element in the list should fit into one line.

For example, if the discharge summary mentions:
“
- Plan CT head Lumbar puncture
- Bloods show a raised WCC and CRP
- Decision to perform limited distal gastrectomy and convert to open. ... Antecolic gastrojejunostomy (30-40cm from DJ) TLC-75 side to side stapled anastomosis at dependent part of stomach on greater curve.
- Repeat CTAP was done which showed a leak at the anterior wall of the gastrojejunal anastomosis leading to significant leak of oral contrast into the peritoneal cavity. There was extensive pneumoperitoneum within the abdomen secondary to this anastamotic leak. A drain was inserted. After a scan, the drain was readjusted to be better situated. Repeat scan a few days later revealed ongoing leakage. He was re-scheduled for IR drainage for collection at site of gastrectomy on day 24 post-op. Collection sites considerably reduced after this.
- Following the procedure, he was admitted to ICU and his O2 requirements were managed.
”

then your answer may be:

1. Lumbar puncture for cerebrospinal fluid analysis and/or injection
"Lumbar puncture"

2. Computed tomography (CT) scan of the head
"CT head"

3. Complete blood count
"Bloods show a raised WCC and CRP"

4. Gastrectomy, partial; distal, with gastrojejunostomy
"limited distal gastrectomy and convert to open. ... Antecolic gastrojejunostomy (30-40cm from DJ)"

5. Computed tomography, abdomen; with contrast material(s)
"CTAP was done which showed a leak at the anterior wall of the gastrojejunal anastomosis leading to significant leak of oral contrast"

6. Drainage procedure for fluid collection
"significant leak of oral contrast into the peritoneal cavity. There was extensive pneumoperitoneum within the abdomen secondary to this anastamotic leak. A drain was inserted."

7. Image-guided drainage
"re-scheduled for IR drainage"

8. Emergency department visit for the evaluation and management of a patient
"he was admitted to ICU"
'''

## Example discharge summary

In [None]:
dc5 = '''
<figure>

![](figures/0)

<!-- FigureContent="\+ Indraprastha Apollo Hospitals touching lives DELHI -" -->

</figure>


DISCHARGE SUMMARY
===

|||
| - | - |
| NAME | : MASTER.ANKIT BISWASH |
| UHID | : APD1.0000807307 |

|||
| - | - |
| Age/Sex | : 3 Year(s) Male |
| IPNO | : 208812 |

Date of Admission : 22-Nov-2008

Room/Ward

: 4159 (Recommended-Govt Of Delhi)

Date of Discharge :01-Apr-2009

Consultants

1\. Dr.A.K. Banerji NEUROSURGEON 2. Dr. Varindera Paul Singh NEUROSURGEON

Unit :NEUROSURGERY

DIAGNOSIS : Posterior Third Ventricle Ependymoma Hydrocephalus

HISTORY :- Right side facial weakness - 1 month Headache & vomiting - 20 days Difficulty in walking & frequent falls - 15 days CECT head revealed posterior third ventricle tumor with hydrocephalus

ON EXAMINATION

Child - sick BP - 100/60 mm Pupil - B/L 3 mm, reacting Fundus - B/L mild papilloedema Moving all four limbs

COURSE IN HOSPITAL :-

Pre-operatively was evaluated by Dr Nameet Jerath [Senior Consultant - Paediatric], and was taken up for surgery after his clearance.

OPERATIVE PROCEDURE: Midline suboccipital craniectomy, infratentorial-supracerebellar approach and microsurgical

Contd .. 2 ...

<figure>

![](figures/1)

<!-- FigureContent="P Joint Commission INTERNATIONAL" -->

</figure>


India's First Internationally Accredited Hospital Indraprastha Apollo Hospitals Sarita Vihar, Delhi - Mathura Road, New Delhi - 110 076 (INDIA) Tel. : 91-11- 26925858. 26925801, Fax : 91-11-26823629, Emergency Telephone No. : 1066. Website : www.apollohospdelhi.com
<figure>

![](figures/2)

<!-- FigureContent="Indraprastha Apollo Hospitals touching lives DELHI -" -->

</figure>


tumor decompression done under GA on 14.11.08.

Right parieto-occipital burrhole made and external ventricular drain inserted.

FINDINDS: Soft mildly vascular greyish tumor in posterior third ventricle.

HISTOPATHOLOGY: Ependymoma ( WHO - Gr II, Tanycytic type)

POST-OPERATIVE HOSPITAL STAY:

Was electively ventilated post-operativley in Neuro ICU. Next day weaned off from ventilator & extubated. NCCT head done on 15.11.08 ruled out any operative site haematoma with surgical changes and decrease in mass effect. Developed seizure followed by cardiac arrest. Recovered after resucitation & modification of anticonvulsant drugs. Kept on Midazolam infusion & gradually tapered off & shifted to Paed. ICU. NCCT head on 19.11.08, after removal of external ventricular drain revealed decrease in size of ventricles. Had copious throat secretions, so put on ventilatory support for 48 hour & then weaned off. Improve gradually & shifted out to Govt.ICU ward on 22.11.08. Dr. D.K.Mahajan [ Senior Consultant - Dermatology ] opinion taken for allergic skin lesion. Dr. S. Haldhar [Consultant -Radiation Oncology] opinion was taken for post operative adjuvant radiotherapy. Developed loose motion & fever, was managed with help of Paediatrician.

Contrast MRI brain done on 16.12.08 revealed surgical changes with persistent ventriculomegaly.

OPERATIVE PROCEDURE:

Right VP shunt [Medium Pressure - Pudenz] done under done under GA on 01.01.2009 FINDINGS: Clear CSF came in moderate pressure.

POST-OPERATIVE HOSPITAL STAY

Had mild improvement in the tonic posturing after the shunt surgery. IMRT was started in consultation with Dr Shikha Haldhar (Consultant Radiation Oncology). Had recurrent seizures with respiratory distress which was managed in consultation with Dr. N.Jerath (Senior Consultant,Pediatrics) and Dr. V.B.Gupta( Senior Consultant, Pediatric Neurology). Follow up NCCT head and MRI brain done revealed residual tumour in the region of posterior third ventricle with decreased ventricular size with increase in the bilateral subdural collections with no significant compression. Subdural tap was done and around 150cc of fluid aspirated, no neurological improvement seen after the procedure. Had recurrent episodes of laryngeal stridor. Dr. G. Raheja (Senior Consultant,ENT) was consulted who did tracheostomy on 23.03.2009. Follow up NCCT head done on 23.03.2009 revealed no change in the subdural collection.

COMMENTS:

Long term prognosis guarded. Needs regular follow up and neurorehabilitation.

CONDITION OF THE PATIENT AT THE TIME OF DISCHARGE :- BP - 100/60 mm

<!-- PageNumber="Contd .. 3 ..." -->

<figure>

![](figures/3)

<!-- FigureContent="Joint Commission INTERNATIONAL" -->

</figure>


India's First Internationally Accredited Hospital Indraprastha Apollo Hospitals Sarita Vihar, Delhi - Mathura Road, New Delhi - 110 076 (INDIA) Tel. : 91-11- 26925858, 26925801, Fax : 91-11-26823629, Emergency Telephone No. : 1066 Website : www.apollohospdelhi.com
<figure>

![](figures/4)

<!-- FigureContent="Indraprastha Apollo Hospitals touching lives - DELHI -" -->

</figure>


Afebrile Improving gaze paresis Opening eyes spontaneously, Tonic posturing present. Ryle's tube feed with oral supplement Wound healthy, Sutures removed On Tracheostomy

ADVICE ON DISCHARGE :-

DIET: Ryle's tube feeding 100 ml 2 hourly with oral supplement as tolerated

PHYSICAL ACTIVITY: Wheel chair mobilization

MEDICATIONS: Syp Augmentin 5ml thrice daily X 1 week Gardenal 60mgm twice daily X 1 month Syp Tegrital (100mgm /5ml) 4ml thrice daily X 1 month Frisium 5 mgm in morning and noon, and, 10mgm at bedtime X 1 month Tizan 2mgm thrice daily X 2 weeks Baclofen 7 mgm thrice daily X 2 weeks Diamox 250mgm half tablet twice daily X 2 weeks Rantac 25 mgm twice daily X 2 weeks Syp Visyneral 5ml daily X 2 weeks Asthalin and saline nebulization thrice daily X 1 month Crocin 10 ml SOS for fever/ headache Refresh eye drop both eye four hourly X 1 week

OTHERS:

Physiotherapy and mobilisation as advised Care of Tracheostomy and Ryle's tube

FOLLOW-UP: To follow up with Dr A K Banerji in Neurosciences OPD after 1 month Kindly confirm your appointment before coming. For appointments contact - 9910000409

For any medical problem contact Dr H S Sohal (9818535985)

Dr. A.K. Banerji SR.CONSULTANT NEUROSURGEON

Contd .. 4 ...

<figure>

![](figures/5)

<!-- FigureContent="Joint Commission INTERNATIONAL" -->

</figure>


India's First Internationally Accredited Hospital Indraprastha Apollo Hospitals Sarita Vihar, Delhi - Mathura Road, New Delhi - 110 076 (INDIA) Tel. : 91-11-26925858, 26925801, Fax : 91-11-26823629, Emergency Telephone No .: 1066 Website : www.apollohospdelhi.com

<!-- PageNumber="S2" -->
:selected: :selected: :selected: :selected:<figure>

![](figures/6)

<!-- FigureContent="Indraprastha Apollo Hospitals DELHI - touching lives" -->

</figure>


Dr. Varindera Paul Singh SR.CONSULTANT NEUROSURGEON

<figure>

![](figures/7)

<!-- FigureContent="30/1/09" -->

</figure>


\*HSS

Registrar/Resident/C.M.O

Date

: 30-Mar-2009 10:27 AM.

<figure>

![](figures/8)

<!-- FigureContent="Joint Commission INTERNATIONAL" -->

</figure>


India's First Internationally Accredited Hospital Indraprastha Apollo Hospitals Sarita Vihar, Delhi - Mathura Road, New Delhi - 110 076 (INDIA) Tel. : 91-11- 26925858, 26925801, Fax : 91-11-26823629, Emergency Telephone No. : 1066 Website : www.apollohospdelhi.com

<!-- PageNumber="S .-" -->
'''

## Test

In [None]:
response = get_completion(pream + '\n\nDischarge summary:\n\n' + dc5)
print(response)

1. Midline suboccipital craniectomy with infratentorial-supracerebellar approach and microsurgical tumor decompression
"Midline suboccipital craniectomy, infratentorial-supracerebellar approach and microsurgical tumor decompression done under GA on 14.11.08."

2. Right parieto-occipital burrhole and external ventricular drain insertion
"Right parieto-occipital burrhole made and external ventricular drain inserted."

3. Non-contrast computed tomography (NCCT) scan of the head
"NCCT head done on 15.11.08 ruled out any operative site haematoma with surgical changes and decrease in mass effect."

4. Right ventriculoperitoneal (VP) shunt placement
"Right VP shunt [Medium Pressure - Pudenz] done under done under GA on 01.01.2009."

5. Intensity-modulated radiation therapy (IMRT)
"IMRT was started in consultation with Dr Shikha Haldhar (Consultant Radiation Oncology)."

6. Subdural tap for fluid aspiration
"Subdural tap was done and around 150cc of fluid aspirated."

7. Tracheostomy
"Dr. G. R

## GAVS

In [None]:
print('Step 1: Creating the list of procedures')
response = get_completion(pream + '\n\nDischarge summary:\n\n' + dc5)
diagnosis = response.split('\n\n')
pairs = [diag.split('\n') for diag in diagnosis if diag != '']

list_of_diagnosis = []
list_of_evidence = []
for pair in pairs:
  if '.' in pair[0]:
      list_of_diagnosis.append(pair[0].split('. ')[1])
  elif ':' in pair[0]:
      list_of_diagnosis.append(pair[0].split(': ')[1])
  else:
      print('Issues with list?')
  list_of_evidence.append(pair[1])

print('Step 2: Creating a list of relevant codes using vector search')
relevant_codes = ''
for j, diagnosis in enumerate(list_of_diagnosis):
  diag_description = ''
  diag_description += str(j+1) + '. Condition: ' + diagnosis + '\n'
  diag_description += 'Evidence point: ' + list_of_evidence[j] +'\n'

  # semantic search (Vector store search)
  result = vectorstore_from_docs.similarity_search_with_score(diag_description, k=10) #diag_description or diagnosis
  data = pd.DataFrame([r[0].metadata for r in result])[['Code', 'Title']]

  relevant_codes += diag_description

  for k, row in data.iterrows():
    relevant_codes += ' - ' + str(int(row['Code'])) + ': ' + row['Title'] + '\n'

  relevant_codes += '\n\n'

print('Printing output:')
print()
print(relevant_codes)

Step 1: Creating the list of procedures
Step 2: Creating a list of relevant codes using vector search
Printing output:

1. Condition: Midline suboccipital craniectomy with infratentorial-supracerebellar approach and microsurgical tumor decompression
Evidence point: "Midline suboccipital craniectomy, infratentorial-supracerebellar approach and microsurgical tumor decompression done under GA on 14.11.08."
 - 61521: Craniectomy for excision of brain tumor, infratentorial or posterior fossa; midline tumor at base of skull
 - 61458: Craniectomy, suboccipital; for exploration or decompression of cranial nerves
 - 61345: Other cranial decompression, posterior fossa
 - 61305: Craniectomy or craniotomy, exploratory; infratentorial (posterior fossa)
 - 61520: Craniectomy for excision of brain tumor, infratentorial or posterior fossa; cerebellopontine angle tumor
 - 61343: Craniectomy, suboccipital with cervical laminectomy for decompression of medulla and spinal cord, with or without dural graft