<a href="https://colab.research.google.com/github/rahiakela/natural-language-processing-research-and-practice/blob/main/text-similarity-works/12_icd_10_code_and_keyword_highliting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Setup

Reference:

[Rule-based entity recognition](https://spacy.io/usage/rule-based-matching#entityruler)

[PDF-to-TEXT](https://pypi.org/project/pdftotext/)

In [None]:
!pip -q install spacy
!python -m spacy download en_core_web_sm

!pip install pillow

!sudo apt install build-essential libpoppler-cpp-dev pkg-config python3-dev
!pip install -U pdftotext
!pip install PyPDF2
!pip install fitz
!pip install PyMuPDF

Just restart the colab environment.

In [1]:
import pandas as pd
import numpy as np
import re
import time
import os

import pdb

import fitz
import cv2 
import pdftotext
from PyPDF2 import PdfFileReader, PdfFileWriter
from io import BytesIO
from PIL import Image
from matplotlib import pyplot as plt

import spacy
from spacy.matcher import PhraseMatcher
from spacy.lang.en import English

import nltk

In [2]:
!mkdir pdf-files
!mkdir txt-files

In [3]:
pdf_files_path = "pdf-files"
txt_files_path = "txt-files"

##Define some functions

In [4]:
def split_pdf(pdf_path):
  pdf_in_file = open(pdf_path, "rb")
  pdf = PdfFileReader(pdf_in_file)
  pdf_list = []
  for page in range(pdf.numPages):
      inputpdf = PdfFileReader(pdf_in_file)
      output = PdfFileWriter()
      output.addPage(inputpdf.getPage(page))
      with open(f"{pdf_files_path}/page-{page}.pdf", "wb") as outputStream:
          output.write(outputStream)
          pdf_list.append(f"page-{page}.pdf")
  return pdf_list

In [5]:
def extract_text_from_pdf(pdf_list):
  txt_file_list = []
  i = 0
  for pdf_file in pdf_list:
    with open(os.path.join(pdf_files_path, pdf_file), "rb") as f:
      pdf = pdftotext.PDF(f)
    
    # Read all the text into one string
    pdf_text = "\n\n".join(pdf)

    # write text into file
    with open(f"{txt_files_path}/page-{str(i)}.txt", "a") as f:
      f.write(pdf_text)
    txt_file_list.append(f"{txt_files_path}/page-{str(i)}.txt")
    i += 1
  return txt_file_list

In [6]:
def get_opt_pattern(icd_10_code):
  # create alternate pattern
  code_arr = icd_10_code.split(".")
  if len(code_arr) > 1:
    code1 = f"{code_arr[0]}. {code_arr[1]}"
    code2 = f"{code_arr[0]} .{code_arr[1]}"
    code3 = f"{code_arr[0]} . {code_arr[1]}"
    return [code1, code2, code3]
  else:
    return icd_10_code

In [7]:
def highlight_icd10_code_and_keyword(pdf_code_dict, page_keyword_dict, pdf_file_name):
  pdf_file = fitz.open(pdf_file_name)

  def highlight_pdf(highlight):
    for inst in highlight:
      highlight = page.add_highlight_annot(inst)
      highlight.update()
      highlight = page.search_for(text_to_be_highlighted)
      print(f"Page-{page_num}: ", code, highlight, end='\n')

  for page_num, page in enumerate(pdf_file):

    # highlight code
    if page_num in pdf_code_dict:
      for code in pdf_code_dict[page_num]:
        text_to_be_highlighted = code
        highlight = page.search_for(text_to_be_highlighted)
        if len(highlight) == 0:
          alternate_code_list = get_opt_pattern(code)
          for alt_code in alternate_code_list:
            text_to_be_highlighted = alt_code
            highlight = page.search_for(text_to_be_highlighted)
            # highlight pdf for option pattern
            highlight_pdf(highlight)
        # highlight pdf for main pattern   
        highlight_pdf(highlight)

    # highlight keyword
    if page_num in page_keyword_dict:
      for keyword in page_keyword_dict[page_num]:
        text_to_be_highlighted = keyword
        highlight = page.search_for(text_to_be_highlighted)
        for inst in highlight:
          highlight = page.add_highlight_annot(inst)
          highlight.set_colors(stroke=[1, 0.8, 0.8])
          highlight.update()
          highlight = page.search_for(text_to_be_highlighted)
          print(f"Page-{page_num}: ", keyword, highlight, end='\n')

  pdf_file.save(f"{pdf_file_name.split('.')[0]}_output.pdf", garbage=4, deflate=True, clean=True)

In [None]:
def make_icd_10_code_pattern(icd_10_code_df):
  patterns = []
  for _, row in icd_10_code_df.iterrows():
    patterns.append({"label": "ICD-10", "pattern": row["ICD-10"]})
  return patterns

In [8]:
def make_icd_10_keyword_pattern(icd_10_keyword_df):
  keywords = []
  for _, row in icd_10_keyword_df.iterrows():
    keywords.append(row["Keyword"])
  
  phrase_matcher = PhraseMatcher(nlp.vocab, attr="LOWER")
  patterns = list(nlp.tokenizer.pipe(keywords))
  phrase_matcher.add('keywords', patterns)
  return phrase_matcher

In [9]:
def search_icd_10_code(txt_list):
  pdf_page_vocab = {}
  for txt_file in txt_list:
    with open(txt_file, "r") as f:
      page_txt = f.read()
      # filter the page that have line number instead of code
      if not re.search("(P[ ][0-9]+)(,\s)(L[0-9]+)", page_txt):
        doc = nlp(page_txt)
        code_list = [ent.text for ent in doc.ents]
        if len(code_list) != 0:
          page_number = int(txt_file.split("/")[1].split(".")[0].split("-")[1])
          pdf_page_vocab[page_number] = code_list
          # print(f"Page[{txt_file.split('/')[1]}]: {code_list}")
  return pdf_page_vocab

In [10]:
def search_icd_10_keyword(txt_list, phrase_matcher):
  page_keyword_dict = {}
  # Step-4: Searching ICD-10 code
  for txt_file in txt_list:
    with open(txt_file, "r") as f:
      page_txt = f.read()
      doc = nlp(page_txt)
      matches = phrase_matcher(doc)

      keyword_list = []
      for match_id, start, end in matches:
        span = doc[start: end]
        keyword_list.append(f"{span}")

      if len(keyword_list) != 0:
        page_number = int(txt_file.split("/")[1].split(".")[0].split("-")[1])
        page_keyword_dict[page_number] = keyword_list
        print(f"Page[{txt_file.split('/')[1]}]: {keyword_list}")
  return page_keyword_dict

##Data preprocessing

In [None]:
columns = []
with open("icd_10_keywords.txt", "r") as f:
  txt_lines = f.readlines()
  for line in txt_lines:
    columns.append(line.strip("\n"))
print(columns[:10])

['Cholera due to Vibrio cholerae 01, biovar cholerae', 'Cholera due to Vibrio cholerae 01, biovar eltor', 'Cholera, unspecified', 'Typhoid fever, unspecified', 'Typhoid meningitis', 'Typhoid fever with heart involvement', 'Typhoid pneumonia', 'Typhoid arthritis', 'Typhoid osteomyelitis', 'Typhoid fever with other complications']


In [None]:
data_keyword_df = pd.DataFrame(columns, columns=["Keyword"])
data_keyword_df.head()

Unnamed: 0,Keyword
0,"Cholera due to Vibrio cholerae 01, biovar chol..."
1,"Cholera due to Vibrio cholerae 01, biovar eltor"
2,"Cholera, unspecified"
3,"Typhoid fever, unspecified"
4,Typhoid meningitis


In [None]:
data_keyword_df.to_csv("icd_10_keywords.csv", index=False)

In [None]:
data_code_df = pd.read_csv("icd_10_codes.csv")
data_code_df.head()

Unnamed: 0,ICD-10
0,A00.0
1,A00.1
2,A00.9
3,A01.00
4,A01.01


In [None]:
data_code_df["ICD-10"].head()

0     A00.0
1     A00.1
2     A00.9
3    A01.00
4    A01.01
Name: ICD-10, dtype: object

In [None]:
data_keyword_df["Keyword"].head()

0    Cholera due to Vibrio cholerae 01, biovar chol...
1      Cholera due to Vibrio cholerae 01, biovar eltor
2                                 Cholera, unspecified
3                           Typhoid fever, unspecified
4                                   Typhoid meningitis
Name: Keyword, dtype: object

In [None]:
icd_code_kerword_df = pd.DataFrame().assign(Code=data_code_df['ICD-10'], Keyword=data_keyword_df['Keyword'])
icd_code_kerword_df.head()

Unnamed: 0,Code,Keyword
0,A00.0,"Cholera due to Vibrio cholerae 01, biovar chol..."
1,A00.1,"Cholera due to Vibrio cholerae 01, biovar eltor"
2,A00.9,"Cholera, unspecified"
3,A01.00,"Typhoid fever, unspecified"
4,A01.01,Typhoid meningitis


In [None]:
icd_code_kerword_df.to_csv("icd_10_code_keywords.csv", index=False)

In [None]:
icd_code_kerword_df = pd.read_csv("icd_10_code_keywords.csv")
icd_code_kerword_df.head()

Unnamed: 0,Code,Keyword
0,A00.0,"Cholera due to Vibrio cholerae 01, biovar chol..."
1,A00.1,"Cholera due to Vibrio cholerae 01, biovar eltor"
2,A00.9,"Cholera, unspecified"
3,A01.00,"Typhoid fever, unspecified"
4,A01.01,Typhoid meningitis


##All Steps Together

In [11]:
# Step-1: spliting pdf file
pdf_file_name = "Redacted_Sample_2.pdf"
pdf_list = split_pdf(pdf_file_name)

# Step-2: Extracting text from pdf
txt_list = extract_text_from_pdf(pdf_list)

In [12]:
nlp = English()
# Step-3: loading and updating patterns to Spacy
nlp.add_pipe("entity_ruler").from_disk("./icd10_code_patterns.jsonl")

<spacy.pipeline.entityruler.EntityRuler at 0x7f40cf95b370>

In [13]:
# Step-4: Searching ICD-10 code
page_code_dict = search_icd_10_code(txt_list)

In [15]:
# Step-5: Creating ICD-10 keyword pattern
nlp = spacy.load('en_core_web_sm')
icd_code_kerword_df = pd.read_csv("icd_10_code_keywords.csv")
phrase_matcher = make_icd_10_keyword_pattern(icd_code_kerword_df)

In [16]:
# Step-6: Searching ICD-10 keyword
page_keyword_dict = search_icd_10_keyword(txt_list, phrase_matcher)

Page[page-0.txt]: ['Aphasia', 'Palpitations']
Page[page-3.txt]: ['Palpitations', 'Palpitations', 'palpitations', 'Palpitations', 'Palpitations', 'Palpitations', 'Palpitations']
Page[page-4.txt]: ['Aphasia', 'Overweight', 'Lower Abdominal Pain, Unspecified', 'Pain, Unspecified', 'Cervicogenic Headache', 'Tobacco Use']
Page[page-6.txt]: ['Aphasia', 'Aphasia', 'Aphasia', 'Aphasia', 'Occipital Neuralgia', 'Multiple Sclerosis']
Page[page-11.txt]: ['Aphasia', 'Aphasia', 'Aphasia', 'Aphasia']
Page[page-12.txt]: ['Actinic Keratosis']
Page[page-16.txt]: ['cyanosis']
Page[page-18.txt]: ['weakness']
Page[page-19.txt]: ['Migraine with aura, not intractable, without status migrainosus']
Page[page-24.txt]: ['Cervicogenic headache']
Page[page-26.txt]: ['cyanosis']
Page[page-28.txt]: ['right lower quadrant pain']
Page[page-34.txt]: ['palpitations', 'orthopnea', 'chronic cough', 'wheezing', 'anorexia', 'heartburn', 'hematemesis', 'melena', 'nausea', 'aphasia', 'weakness', 'polyphagia']
Page[page-35.txt

In [17]:
# Step-7: Highlighting ICD-10 code and keyword into pdf
output_file_name = highlight_icd10_code_and_keyword(page_code_dict, page_keyword_dict, pdf_file_name)
print(f"File[{output_file_name}] is saved after highlighting ICD-10 code and keyword")

Page-0:  Aphasia [Rect(31.346466064453125, 409.7332458496094, 60.250465393066406, 420.7252502441406)]
Page-0:  Palpitations [Rect(31.346466064453125, 653.5383911132812, 72.37847900390625, 664.5303955078125)]
Page-3:  Palpitations [Rect(142.4079132080078, 439.5684814453125, 183.43992614746094, 450.56048583984375), Rect(209.11190795898438, 439.5684814453125, 250.14390563964844, 450.56048583984375), Rect(255.0319061279297, 439.5684814453125, 295.49591064453125, 450.56048583984375), Rect(142.4079132080078, 464.7684631347656, 183.43992614746094, 475.7604675292969), Rect(142.4079132080078, 478.4684753417969, 183.43992614746094, 489.4604797363281), Rect(142.4079132080078, 492.1684875488281, 183.43992614746094, 503.1604919433594), Rect(142.4079132080078, 519.5684204101562, 183.43992614746094, 530.5604248046875)]
Page-3:  Palpitations [Rect(142.4079132080078, 439.5684814453125, 183.43992614746094, 450.56048583984375), Rect(209.11190795898438, 439.5684814453125, 250.14390563964844, 450.560485839

In [None]:
with open(f"{txt_files_path}/page-37.txt", "r") as f:
  one_txt = f.read()
  one_txt = one_txt.replace(".", ",")
  #print(one_txt)

  doc = nlp(one_txt)
  matches = phrase_matcher(doc)
  for match_id, start, end in matches:
    span = doc[start: end]
    print(span.text)
  #print([(ent.text, ent.label_) for ent in doc.ents])

Migraine with aura, not intractable, without status migrainosus
Decreased white blood cell count, unspecified
Palpitations
lower abdominal pain, unspecified
pain, unspecified
lipomatosis, not elsewhere classified
overweight
overweight


In [None]:
%%time

page_keyword_dict = {}
# Step-4: Searching ICD-10 code
for txt_file in txt_list:
  with open(txt_file, "r") as f:
    page_txt = f.read()
    # filter the page that have line number instead of code
    #if not re.search("(P[ ][0-9]+)(,\s)(L[0-9]+)", page_txt):
    doc = nlp(page_txt)
    matches = phrase_matcher(doc)

    keyword_list = []
    for match_id, start, end in matches:
      span = doc[start: end]
      keyword_list.append(f"{span}")

    if len(keyword_list) != 0:
      page_number = int(txt_file.split("/")[1].split(".")[0].split("-")[1])
      page_keyword_dict[page_number] = keyword_list
      print(f"Page[{txt_file.split('/')[1]}]: {keyword_list}")

Page[page-0.txt]: ['Aphasia', 'Palpitations']
Page[page-3.txt]: ['Palpitations', 'Palpitations', 'palpitations', 'Palpitations', 'Palpitations', 'Palpitations', 'Palpitations']
Page[page-4.txt]: ['Aphasia', 'Overweight', 'Lower Abdominal Pain, Unspecified', 'Pain, Unspecified', 'Cervicogenic Headache', 'Tobacco Use']
Page[page-6.txt]: ['Aphasia', 'Aphasia', 'Aphasia', 'Aphasia', 'Occipital Neuralgia', 'Multiple Sclerosis']
Page[page-11.txt]: ['Aphasia', 'Aphasia', 'Aphasia', 'Aphasia']
Page[page-12.txt]: ['Actinic Keratosis']
Page[page-16.txt]: ['cyanosis']
Page[page-18.txt]: ['weakness']
Page[page-19.txt]: ['Migraine with aura, not intractable, without status migrainosus']
Page[page-24.txt]: ['Cervicogenic headache']
Page[page-26.txt]: ['cyanosis']
Page[page-28.txt]: ['right lower quadrant pain']
Page[page-34.txt]: ['palpitations', 'orthopnea', 'chronic cough', 'wheezing', 'anorexia', 'heartburn', 'hematemesis', 'melena', 'nausea', 'aphasia', 'weakness', 'polyphagia']
Page[page-35.txt

In [None]:
%%time

pdf_page_dict = {}
# Step-4: Searching ICD-10 code
for txt_file in txt_list:
  with open(txt_file, "r") as f:
    page_txt = f.read()
    # filter the page that have line number instead of code
    if not re.search("(P[ ][0-9]+)(,\s)(L[0-9]+)", page_txt):
      doc = nlp(page_txt)
      code_list = [ent.text for ent in doc.ents]
      if len(code_list) != 0:
        page_number = int(txt_file.split("/")[1].split(".")[0].split("-")[1])
        pdf_page_dict[page_number] = code_list
        print(f"Page[{txt_file.split('/')[1]}]: {code_list}")

##ICD-10 code highlighting

In [None]:
0 in pdf_page_dict

False

In [None]:
pdf_page_dict[2]

['L14', 'L14']

In [None]:
%%time

# Step-4: Highlighting ICD-10 code into pdf
highlight_icd10_keyword(page_keyword_dict, pdf_file_name)

Page-37:  Migraine with aura, not intractable, without status migrainosus [Rect(107.75862121582031, 230.57130432128906, 272.81884765625, 241.70071411132812)]
Page-37:  Palpitations [Rect(107.75862121582031, 230.57130432128906, 272.81884765625, 241.70071411132812)]
Page-37:  lower abdominal pain, unspecified [Rect(107.75862121582031, 230.57130432128906, 272.81884765625, 241.70071411132812)]
Page-37:  pain, unspecified [Rect(107.75862121582031, 230.57130432128906, 272.81884765625, 241.70071411132812)]
Page-37:  lipomatosis, not elsewhere classified [Rect(107.75862121582031, 230.57130432128906, 272.81884765625, 241.70071411132812)]
Page-37:  overweight [Rect(107.75862121582031, 230.57130432128906, 272.81884765625, 241.70071411132812)]
Page-37:  overweight [Rect(107.75862121582031, 230.57130432128906, 272.81884765625, 241.70071411132812)]
CPU times: user 1.56 s, sys: 33 ms, total: 1.59 s
Wall time: 1.59 s


In [None]:
# Step-4: Highlighting ICD-10 code and keyword into pdf
highlight_icd10_code_and_keyword(page_keyword_dict, pdf_file_name)

In [None]:
True if re.search("[0-9],L", "3, L14") else False

False

In [None]:
True if re.search("(P[ ][0-9]+)(,\s)(L[0-9]+)", "P 13, L17") else False

True

In [None]:
pattern = re.compile("(P[ ][0-9]+)(,\s)(L[0-9]+)")

for i, line in enumerate(open("txt-files/pdf-page-3.txt")):
  for match in re.finditer(pattern, line):
    print('Found on line %s: %s' % (i+1, match.group()))

Found on line 2: P 60, L23
Found on line 3: P 61, L14
Found on line 4: P 56, L19
Found on line 7: P 54, L19
Found on line 8: P 49, L5
Found on line 9: P 39, L6
Found on line 10: P 37, L9
Found on line 11: P 35, L14
Found on line 12: P 27, L24
Found on line 13: P 21, L23
Found on line 15: P 11, L20
Found on line 16: P 5, L39
Found on line 18: P 3, L24
Found on line 22: P 77, L38
Found on line 27: P 86, L19
Found on line 29: P 66, L19
Found on line 30: P 29, L19
Found on line 31: P 21, L16
Found on line 32: P 12, L18
Found on line 33: P 3, L22
Found on line 34: P 3, L22
Found on line 38: P 3, L17
Found on line 41: P 79, L31


In [None]:
# Step-5: Searching ICD-10 code
for txt_file in txt_list:
  with open(txt_file, "r") as f:
    #one_txt = f.read()
    doc = nlp(f.read())
    code_list = [ent.text for ent in doc.ents]
    if len(code_list) != 0:
      print(f"Page[{txt_file.split('/')[1]}]: {code_list}")
    #print([f"Page[{txt_file.split('/')[1]}]: {(ent.text, ent.label_)}" for ent in doc.ents if ent is not None])

Page[pdf-page-2.txt]: ['L14']
Page[pdf-page-3.txt]: ['L14', 'L14', 'L22', 'L22']
Page[pdf-page-4.txt]: ['L26', 'L26', 'L26', 'L26', 'L26', 'L22', 'L14', 'L22', 'L14']
Page[pdf-page-6.txt]: ['L14', 'L14']
Page[pdf-page-7.txt]: ['L14', 'L14', 'L14', 'L14']
Page[pdf-page-8.txt]: ['L26', 'L26', 'L26']
Page[pdf-page-9.txt]: ['L26', 'L26', 'L22', 'L26', 'L22', 'L26', 'L22', 'L22', 'L22']
Page[pdf-page-10.txt]: ['L22', 'L26', 'L14']
Page[pdf-page-11.txt]: ['L22', 'L22', 'L22']
Page[pdf-page-12.txt]: ['L14']
Page[pdf-page-18.txt]: ['M54.40']
Page[pdf-page-19.txt]: ['G43.109']
Page[pdf-page-26.txt]: ['D17.1', 'F43.9']
Page[pdf-page-37.txt]: ['G43.109', 'D72.819', 'R00.2', 'R10.30', 'E88.2']
Page[pdf-page-39.txt]: ['G43.109', 'D72.819', 'R00.2', 'R10.30']
Page[pdf-page-42.txt]: ['G43.109', 'R00.2', 'G43.109', 'D72.819', 'R00.2']
Page[pdf-page-74.txt]: ['M54.2', 'R42']
Page[pdf-page-82.txt]: ['G43.109', 'R00.2', 'L25.9', 'D72.819']
Page[pdf-page-84.txt]: ['G43.109']
Page[pdf-page-85.txt]: ['L25.9

##Tessrect text extraction

In [None]:
%%time

# Step-1: spliting pdf file
pdf_file_name = "Redacted_Sample_2.pdf"
pdf_list = split_pdf(pdf_file_name)

# Step-2: converting pdf to image
img_list = pdf_to_image(pdf_list)

# Step-4: Extracting text from pdf
txt_list = extract_text_from_image(img_list)

# Step-3: loading and updating patterns to Spacy
nlp.add_pipe("entity_ruler").from_disk("./icd10_code_patterns.jsonl")

In [None]:
# Step-4: Extracting text from pdf
txt_list = extract_text_from_image(img_list)

# Step-3: loading and updating patterns to Spacy
nlp.add_pipe("entity_ruler").from_disk("./icd10_code_patterns.jsonl")

<spacy.pipeline.entityruler.EntityRuler at 0x7f4ac6e88af0>

In [None]:
%%time

pdf_page_dict = {}
# Step-4: Searching ICD-10 code
for txt_file in txt_list:
  with open(txt_file, "r") as f:
    page_txt = f.read()
    # filter the page that have line number instead of code
    if not re.search("(P[ ][0-9]+)(,\s)(L[0-9]+)", page_txt):
      doc = nlp(page_txt)
      code_list = [ent.text for ent in doc.ents]
      if len(code_list) != 0:
        page_number = int(txt_file.split("/")[1].split(".")[0].split("-")[1])
        pdf_page_dict[page_number] = code_list
        print(f"Page[{txt_file.split('/')[1]}]: {code_list}")

Page[page-17.txt]: ['R64']
Page[page-18.txt]: ['D17.1']
Page[page-19.txt]: ['G43.109']
Page[page-24.txt]: ['M50.30']
Page[page-26.txt]: ['F43.9']
Page[page-37.txt]: ['G43.109', 'D72.819', 'R10.30', 'E88.2']
Page[page-39.txt]: ['G43.109', 'D72.819', 'R10.30']
Page[page-42.txt]: ['G43.109', 'D72.819']
Page[page-74.txt]: ['M50.323', 'M46.92', 'R42']
Page[page-82.txt]: ['G43.109', 'L25.9', 'D72.819']
Page[page-85.txt]: ['R00.2', 'L25.9', 'D72.819']
Page[page-102.txt]: ['G43.109', 'L25.9']
Page[page-105.txt]: ['G43.109', 'L25.9']
CPU times: user 676 ms, sys: 15.5 ms, total: 691 ms
Wall time: 694 ms


In [None]:
%%time

# Step-4: Highlighting ICD-10 code into pdf
highlight_icd10_code(pdf_page_dict, pdf_file_name)

Page dict: ['R64']
Page dict: ['D17.1']
Page dict: ['G43.109']
Page-19:  G43.109 [Rect(227.2899932861328, 145.4748077392578, 256.4548034667969, 156.68040466308594)]
Page dict: ['M50.30']
Page dict: ['F43.9']
Page-26:  F43.9 [Rect(267.1199951171875, 294.4976806640625, 288.10455322265625, 306.6370849609375)]
Page dict: ['G43.109', 'D72.819', 'R10.30', 'E88.2']
Page-37:  D72.819 [Rect(70.31879425048828, 230.57130432128906, 100.93679809570312, 241.70071411132812)]
Page-37:  R10.30 [Rect(70.31879425048828, 248.33460998535156, 96.43319702148438, 259.4640197753906)]
Page-37:  E88.2 [Rect(70.31879425048828, 257.4552307128906, 91.48410034179688, 268.5846252441406)]
Page dict: ['G43.109', 'D72.819', 'R10.30']
Page-39:  G43.109 [Rect(68.39816284179688, 293.9032287597656, 98.70294952392578, 304.7579650878906)]
Page-39:  D72.819 [Rect(69.1170654296875, 311.899658203125, 98.97944641113281, 322.75439453125)]
Page-39:  R10.30 [Rect(69.1170654296875, 329.643310546875, 94.58699035644531, 340.498046875)]

In [None]:
with open(f"{txt_files_path}/page-37.txt", "r") as f:
  one_txt = f.read()
  print(one_txt)

, Male,

SF0063198200

Patient Name (005 - Phone Number:

7

4of6

Office/Outpatient Visit

Visit Date: Mon, Jun 24, 2019 9:00 am

Provider: Josephs, Barry, MD (Assistant: Coppage, Jasmine, )

Location: Barry Josephs, M.D. LLG

Electronically signed by Barry Josephs, MD on 06/24/2019 02:46:23 PM

Printed on 06/24/2019 at 3:21 pm.

Assessment:

¥70.0 Preventive Physical Exam (Mild)

434.91 Cerebrovascular accident (Severe)

G43.109 Migraine with aura, not intractable, without status migrainosus

722.91

Cervical disc disorder (Mild)

D72.819 Decreased white blood cell count, unspecified

ROO.2 Palpitations

R10.30 Lower abdominal pain, unspecified

E88.2 Lipomatosis, not elsewhere classified

Plan:

Preventive Physical Exam

PREVENTION & RECOMMENDATIONS:

Overall Health and Lifestyle:

Thank you for coming for the Wellness Program. It was a pleasure to update the history and physical as well as discuss

your laboratory results

Wellness anc Prevention begins with a healthy lifestyle and