In [None]:
import pandas as pd

from tqdm import tqdm

from breastfeeding_nlp.extraction.ner import MedSpaCyLabeler
from breastfeeding_nlp.utils.utils import filter_dataset

%load_ext autoreload
%autoreload 2

## Load the dataset

In [2]:
# Load data here
data = [
    {"row_ix": 1, "NOTE_TEXT": "The infant latched and suckled vigorously for three minutes before pausing to breathe. He resumed feeding with a slower, rhythmic pattern and appeared satiated after eight minutes."},
    {"row_ix": 2, "NOTE_TEXT": "The baby displayed weak sucking reflexes and frequently detached from the nipple."},
    {"row_ix": 3, "NOTE_TEXT": "During bottle feeding, the infant took short bursts of milk and then released the nipple to swallow. He displayed mild fussiness between bursts but calmed with gentle burping. No signs of aspiration or cough were noted. Overall intake appeared adequate for age. Weight gain remains on track."},
    {"row_ix": 4, "NOTE_TEXT": "The infant preferred the left breast and fed for approximately ten minutes before demonstrating signs of fatigue. He swallowed effectively with audible swallows and did not show any tongue thrust abnormalities. Maternal nipple comfort was maintained throughout the session."},
    {"row_ix": 5, "NOTE_TEXT": "Scheduled feeding at 3 PM involved coordinated suck-swallow-breath cycles without observable distress. Occasional lip smacking was noted prior to latching. The infant paused briefly to burp after five minutes and then continued feeding eagerly. The feeding session concluded smoothly as the infant released the bottle spontaneously."},
    {"row_ix": 6, "NOTE_TEXT": "Sent home on Neosure."},
    {"row_ix": 7, "NOTE_TEXT": "Patient is breastfeeding."},
    {"row_ix": 8, "NOTE_TEXT": "Patient is nursing."}, # this is to ensure "nursing" is absent, not positive.
]

df = pd.DataFrame(data)
df

Unnamed: 0,row_ix,NOTE_TEXT
0,1,The infant latched and suckled vigorously for ...
1,2,The baby displayed weak sucking reflexes and f...
2,3,"During bottle feeding, the infant took short b..."
3,4,The infant preferred the left breast and fed f...
4,5,Scheduled feeding at 3 PM involved coordinated...
5,6,Sent home on Neosure.
6,7,Patient is breastfeeding.
7,8,Patient is nursing.


## Reduce the dataset by  
- Drop rows with missing notes
- filtering out irrelevant note types (determined through manual review)
    - ["Patient Instructions", "Discharge Instructions", "MR AVS Snapshot", "ED AVS Snapshot", "IP AVS Snapshot", "Training", "Operative Report", "D/C Planning", "Pharmacy"]
    - Notes that were WIC request forms, identified with the literal string: "Ohio WIC Prescribed Formula and Food Request Form"


In [None]:
# Filter data here

## Load the MedspaCy pipeline

In [3]:
medspacy_labeler = MedSpaCyLabeler()

### Test it on an example note/sentence if you want

In [4]:
# For a single note:
entities_df = medspacy_labeler.process_text("This is a testing note that contains information about breastfeeding.")
label = medspacy_labeler.label_documents(entities_df)
label

entity_label,row_ix,medspacy_document_label
0,0,positive


### Run it on all your notes/documents
- Note that this takes a dataframe as input and assumes/requires the format above

In [5]:
# After processing your DataFrame:
entities_df = medspacy_labeler.process_dataframe(df)
medspacy_doc_labels = medspacy_labeler.label_documents(entities_df)
medspacy_doc_labels

entity_label,row_ix,medspacy_document_label
0,0,absent
1,1,absent
2,2,absent
3,3,positive
4,4,absent
5,5,negative
6,6,positive
7,7,absent


In [6]:
entities_df

Unnamed: 0,doc_id,entity,entity_label,start,end,is_negated,is_uncertain,intent,section,sentence_idx,...,NOTE_TEXT,Document_ID,is_historical,is_exclusive,amount,FREQUENCY,BOTTLE,DISCONTINUED,NURSING,BREAST
0,0,,,,,,,,,,...,The infant latched and suckled vigorously for ...,0,,,,,,,,
1,1,,,,,,,,,,...,The baby displayed weak sucking reflexes and f...,1,,,,,,,,
2,2,bottle feeding,AMBIGUOUS,1.0,3.0,False,False,False,,0.0,...,"During bottle feeding, the infant took short b...",2,False,False,,,True,False,,
3,3,breast and fed,BREAST_FEEDING,5.0,8.0,False,False,False,,0.0,...,The infant preferred the left breast and fed f...,3,False,False,,,False,False,,
4,4,,,,,,,,,,...,Scheduled feeding at 3 PM involved coordinated...,4,,,,,,,,
5,5,neosure,FORMULA_FEEDING,3.0,4.0,False,False,False,,0.0,...,Sent home on Neosure.,5,True,False,,,False,False,,
6,6,breastfeeding,BREAST_FEEDING,2.0,3.0,False,False,False,,0.0,...,Patient is breastfeeding.,6,False,False,,,False,False,,
7,7,is nursing,BREAST_FEEDING,1.0,3.0,False,False,False,,0.0,...,Patient is nursing.,7,False,False,,,False,False,True,
