In [3]:
!pip install -q dotenv

In [1]:
import sys
import os
sys.path.append(os.path.abspath('../'))

In [2]:
from src import load_data, InsomniaClassifier, convert_output_to_json

val_path = "/home/ubuntu/SSM/validation/testing.csv"
df = load_data(val_path)
clinical_notes = df['text'].tolist()
classifier = InsomniaClassifier()

classification_results = []
extracted_texts = []

Successfully loaded 2000 records from /home/ubuntu/SSM/validation/testing.csv


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model loaded on cuda


In [3]:
df.head()

Unnamed: 0,note_id,text
0,20,"male patient in sixties prescribed Insulin, Al..."
1,27,male patient in fifties prescribed Magnesium S...
2,28,male patient in fifties prescribed Potassium C...
3,33,male patient in hundreds prescribed Potassium ...
4,51,"male patient in eighties prescribed Heparin, A..."


In [None]:
# Process each clinical note with exception handling
for idx, clinical_note in enumerate(clinical_notes):
    print(f"Processing text {idx + 1}/{len(clinical_notes)}: {clinical_note[:100]}...")
    try:
        classification, extracted = classifier.classify(clinical_note)
        classification_results.append(classification)
        extracted_texts.append(extracted)
    except RuntimeError as e:
        print(f"RuntimeError for text at index {idx}: {e}")
        # Append default classifications and empty extracted text on error
        classification_results.append({
            "Definition 1 (Sleep Difficulty)": "no",
            "Definition 2 (Daytime Impairment)": "no",
            "Rule A (Insomnia Diagnosis)": "no",
            "Rule B (Primary Medications)": "no",
            "Rule C (Secondary Medications)": "no",
            "Final Insomnia Status": "no"
        })
        extracted_texts.append({
            "Definition 1 Extracted": "",
            "Definition 2 Extracted": "",
            "Rule A Extracted": "",
            "Rule B Extracted": "",
            "Rule C Extracted": ""
        })
    print("-" * 80)

Processing text 1/2000: male patient in sixties prescribed Insulin, Albuterol-Ipratropium, Soln., Potassium Chloride, Viokas...




Model Response:
 ### ✅ Response Format:
Sleep Difficulty Phrases:
[exact extracted phrases for nighttime sleep difficulty OR "unknown"]

Daytime Impairment Phrases:
[exact extracted phrases for daytime impairment OR "unknown"]
---
### Clinical Note:
male patient in sixties prescribed Insulin, Albuterol-Ipratropium, Soln., Potassium Chloride, Viokase, Fentanyl Citrate, Haloperidol, Diazepam, Fentanyl Patch, NS, D5W, Syringe (IV Room), Methylprednisolone Na Succ., Midazolam HCl, Oxycodone-Acetaminophen Elixir, Prednisone, Ferrous Sulfate, Warfarin, Lansoprazole Oral Suspension, D5 1/2NS, Dextrose 50%, Propofol, Zolpidem Tartrate, Morphine Sulfate, Phytonadione, Lansoprazole, Heparin, Heparin Sodium, Senna, Lactulose, Furosemide, Olanzapine, Potassium Phosphate, Magnesium Sulfate, Fluticasone Propionate 110mcg, Magnesium Citrate, Azithromycin, Lidocaine 2%, Lorazepam, Oxycodone-Acetaminophen, Albuterol 0.083% Neb Soln, Ipratropium Bromide Neb, Morphine , Sodium Chloride 0.9%  Flush, BuPRO

In [5]:
import pandas as pd
# Convert results to DataFrames
df_classification = pd.DataFrame(classification_results)
df_extracted = pd.DataFrame(extracted_texts)

# Combine all DataFrames
df_final = pd.concat([df[['text', 'note_id']], df_classification, df_extracted], axis=1)

# Rename columns to match expected names for JSON conversion
df_final = df_final.rename(columns={
    "Definition 1 (Sleep Difficulty)": "Definition 1 Pred",
    "Definition 2 (Daytime Impairment)": "Definition 2 Pred",
    "Rule A (Insomnia Diagnosis)": "Rule A Pred",
    "Rule B (Primary Medications)": "Rule B Pred",
    "Rule C (Secondary Medications)": "Rule C Pred",
    "Final Insomnia Status": "Insomnia Pred",
    "Definition 1 Extracted": "Definition 1 Evidence",
    "Definition 2 Extracted": "Definition 2 Evidence",
    "Rule B Extracted": "Rule B Evidence",
    "Rule C Extracted": "Rule C Evidence"
})

In [6]:
output_csv_dir = "/home/ubuntu/Downloads/"
#os.makedirs(output_csv_dir, exist_ok=True)
# Save CSV to results folder
csv_output_path = os.path.join(output_csv_dir, "output.csv")
df_final.to_csv(csv_output_path, index=False)

In [7]:
# Generate JSON outputs
convert_output_to_json(csv_output_path)

JSON file saved at results/subtask_1.json
JSON file saved at results/subtask_2a.json
JSON file saved at results/subtask_2b.json
