<a href="https://colab.research.google.com/github/rahiakela/nlp-research-and-practice/blob/main/text-similarity-works/13_icd_10_code_highlight_with_keyword_match_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Setup

In [None]:
!pip -q install spacy
!python -m spacy download en_core_web_sm

!pip install pillow

!sudo apt install build-essential libpoppler-cpp-dev pkg-config python3-dev
!pip install -U pdftotext
!pip install PyPDF2
!pip install fitz
!pip install PyMuPDF

In [None]:
!pip install fuzzywuzzy

In [1]:
import numpy as np
import pandas as pd
import re
import os
import sys
import glob
import difflib
import pickle
from pathlib import Path
from difflib import SequenceMatcher

import fitz
import pdftotext
from PyPDF2 import PdfFileReader, PdfReader, PdfFileWriter, PdfWriter

import spacy
from spacy.matcher import PhraseMatcher
from spacy.lang.en import English

import nltk
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from string import punctuation
from fuzzywuzzy import fuzz
from fuzzywuzzy import process



In [None]:
nltk.download('punkt')
nltk.download('stopwords')

In [2]:
stop_words = set(stopwords.words('english'))

In [4]:
!mkdir -p input_files

In [None]:
(233/255,150/255,122/255)

(0.9137254901960784, 0.5882352941176471, 0.47843137254901963)

##Core Classes

In [3]:
def is_common_word_coords_nearby(page_obj, code_coord, keyword, match_sent):
  is_closest_coord = False
  # get common word list
  common_words = get_common_words(keyword, match_sent)
  if len(common_words) > 0:
    # get common word coordinate
    highlight_list = page_obj.search_for(common_words[0])
    y_coords_list = [highlight[1] for highlight in highlight_list]
    # find closet y coordinate value
    closet_y_coords = find_nearest(y_coords_list, value=code_coord)
    # check, common word coordinate within range of code coordinate
    # if (code_coord-10) <= highlight_list[0][1] <= (code_coord + 10):
    if (code_coord-10) <= closet_y_coords <= (code_coord + 10):
      is_closest_coord = True
  return is_closest_coord

In [None]:
pdf_file = fitz.open("page-36.pdf")  #Create pdf file object
pdf_page_count = pdf_file.page_count   #var to hold page count
for page in range(pdf_page_count):  #notice that page starts with index 0
  page_obj = pdf_file[page] #Create page object
  #content_of_page = pdf_file.get_page_text(page) #Get page content
  match_word = "Z00.00" 
  content_of_page = page_obj.get_text("blocks", sort=False)  #get rect for all words

  coords_array = []
  y_coords_array = []
  for content in content_of_page:
    print(content)
    if content[4].replace("(", "").replace(")", "") == match_word:
      rect_comp = fitz.Rect(content[0],content[1],content[2],content[3])
      print(f"Line #{content[6]}-{content[4]} > {rect_comp}")

In [3]:
class Highlighter:
  def __init__(self, code_df):
    # loading and updating patterns for ICD-10 code
    self.nlp_code10 = English()
    self.nlp_code10.add_pipe("entity_ruler").from_disk("icd10_code_patterns-v5.jsonl")

    # define icd-10 code dataset
    self.code_df = code_df
    self.text_list = None

    # define required directory path
    self.PDF_FILES_PATH = "pdf-files"
    self.TXT_FILES_PATH = "txt-files"
    self.OUTPUT_FILES_PATH = "output"
    create_directory(self.PDF_FILES_PATH)
    create_directory(self.TXT_FILES_PATH)
    create_directory(self.OUTPUT_FILES_PATH)

  def split_pdf(self, pdf_path):
      pdf_in_file = open(pdf_path, "rb")
      pdf = PdfReader(pdf_in_file)
      pdf_list = []
      for page in range(len(pdf.pages)):
          input_pdf = PdfReader(pdf_in_file)
          output = PdfWriter()
          #output.addPage(input_pdf.getPage(page))
          output.add_page(input_pdf.pages[page])
          with open(f"{self.PDF_FILES_PATH}/page-{page}.pdf", "wb") as outputStream:
              output.write(outputStream)
              pdf_list.append(f"page-{page}.pdf")
      return pdf_list

  def extract_text_from_pdf(self, pdf_list):
    txt_file_list = []
    i = 0
    for pdf_file in pdf_list:
      with open(os.path.join(self.PDF_FILES_PATH, pdf_file), "rb") as f:
        pdf = pdftotext.PDF(f)

      # Read all the text into one string
      pdf_text = "\n\n".join(pdf)

      # write text into file
      with open(f"{self.TXT_FILES_PATH}/page-{str(i)}.txt", "a") as f:
        f.write(pdf_text)
      txt_file_list.append(f"{self.TXT_FILES_PATH}/page-{str(i)}.txt")
      i += 1
    self.text_list = txt_file_list
    return txt_file_list

  def highlight_icd_code(self, icd10_code_dict, match_threshold=30, coordinate=False, pdf_file_name=None, cords_file_name=None):
      pdf_file = fitz.open(pdf_file_name)
      # create file to write coordinate
      txt_output_file_name = open(f"{self.OUTPUT_FILES_PATH}/{cords_file_name}", "a")
      # add file header 
      txt_output_file_name.write("| Page | Found Code | Actual ICD10-Code | Code Line # | ICD 10 description | Matched Line | Common Words | Matched Line # | confidence |\n ")

      def highlight_code(p_highlight, icd10_code, num_page, page_obj, code_coords):
        match_found = None
        sentence_list = []
        keyword = ""

        # do the color coding
        keyword = self.get_keyword(icd10_code)
        match_found_list, sentence_list = self.get_best_token_match(icd10_code, num_page, page_obj, code_coords, match_threshold)
        # highlight code if threshold is more than match threshold and common word coordinate is within range of code coordinate
        if match_found_list is not None:
          print(f"match_found_list: {match_found_list}")
          if match_found_list[0][1] >= match_threshold and len(keyword) > 0 and is_common_word_coords_nearby(page_obj, code_coords, keyword, match_found[0][0]):
            page_highlight = page.add_highlight_annot(p_highlight)
            page_highlight.set_colors(stroke=[0.66, 1, 0.07])  # light green
            page_highlight.update()
          else: # set color coding for keyword mismatch and lower match score
            page_highlight = page.add_highlight_annot(p_highlight)
            page_highlight.set_colors(stroke=[0.92, 0.59, 0.48]) # dark salmon
            page_highlight.update()
        return match_found, sentence_list, keyword

      def highlight_common_words(page_obj, y_coords_array, curr_y_coord, common_word_list):
        highlight_coords_dict = {}
        highlighted_word_list = []
        #get rect for all words
        page_content = page_obj.get_text("words", sort=False)  
        # find closet y coordinate value
        closet_y_coords = find_nearest(y_coords_array, value=curr_y_coord)
        for content in page_content:
          for common_word in common_word_list:
            curr_word = clean_text(content[4])[0] if len(clean_text(content[4])) > 0 else ""
            print(f"closet_y_coords-{closet_y_coords}, Y-{content[1]}")
            if curr_word.lower() == common_word.lower() and (closet_y_coords-20) <= content[1] <= (closet_y_coords + 20):
              rect_comp = fitz.Rect(content[0], content[1], content[2], content[3])
              print(f"closet_y_coords-{closet_y_coords}, Y-{content[1]}, Coords: {rect_comp}")
              #print(f"Line #{content[6]}-{content[4]} > {rect_comp}")
              highlight = page_obj.add_highlight_annot(rect_comp)
              highlight.update()
              highlight_coords_dict[common_word] = rect_comp
              highlighted_word_list.append(common_word)
        return highlight_coords_dict, highlighted_word_list
              
      for page_num, page in enumerate(pdf_file):
        # highlight ICD-10 code
        if page_num in icd10_code_dict:
          for code in icd10_code_dict[page_num]:
            highlight_list = page.search_for(code)
            # print(f"Page-{page_num}, Code: {code}, Coordinate: {highlight_list}")
            # prepare list for y0 coord value
            y_coords_list = [highlight[1] for highlight in highlight_list]
            for highlight in highlight_list:
              # highlight ICD-10 code  
              match_found_list, sentence_list, keyword = highlight_code(highlight, code, page_num, page, highlight[1])
              # write all info into text file
              num_page = page_num + 1
              #for idx, match_found in enumerate(match_list):
              # match_found: ('Diagnosis: Encntr for general adult medical exam w/o abnormal findings (Z00.00)', 100)
              max_match_sent = match_found_list[0][0] if match_found_list is not None else ""
              max_match_score = match_found_list[0][1] if match_found_list is not None else 0
              if max_match_score >= match_threshold and max_match_sent != 'NA':
                common_words = []
                for match_sent in match_found_list:
                  common_words.extend(get_common_words(keyword, match_sent))
                # print(f"common_words: {common_words}")
                highlight_coords_dict, highlighted_word_list = highlight_common_words(page, y_coords_list, highlight[1], common_words)
                #| Page | Found Code | Actual ICD10-Code | Code Line # | ICD 10 description | Matched Line| Common Words | Matched Line # | confidence | 
                code_cors_output = (f"|Page-{num_page} | {code} | {reverse_code_pattern(code)} | {sentence_list.index(max_match_sent) + 1}" 
                f" | {keyword if keyword else 'No keyword found'} | {max_match_sent if len(highlighted_word_list) > 0 else 'No matched line'}" 
                f" | {common_words} | {sentence_list.index(max_match_sent) + 1} | {max_match_score} |")
                txt_output_file_name.write("%s\n" % code_cors_output)
              else:
                code_cors_output = (f"|Page-{num_page} | {code} | {reverse_code_pattern(code)} | 0 | {keyword if keyword else 'No keyword found'}"
                f" | 'No matched line' | 'No common words' | 0 | {max_match_score} |")
                txt_output_file_name.write("%s\n" % code_cors_output)
              if coordinate and max_match_score >= match_threshold:
                txt_output_file_name.write("%s\n" % f"|{code}:{highlight}|")
                txt_output_file_name.write("%s\n" % f"|{highlight_coords_dict}|")
                
              txt_output_file_name.write("\n") # add extra line on every match code

      txt_output_file_name.close()
      pdf_output_file_name = f"{self.OUTPUT_FILES_PATH}/{pdf_file_name.split('/')[1].split('.')[0]}_output.pdf"
      pdf_file.save(pdf_output_file_name, garbage=4, deflate=True, clean=True)

      return pdf_output_file_name, cords_file_name

  def search_icd_code(self, txt_list):
    pdf_page_vocab = {}
    for txt_file in txt_list:
      with open(txt_file, "r") as f:
        page_txt = f.read()

        # check the page that have line number instead of code
        index_page = False
        if re.search("(P[ ][0-9]+)(,\s)(L[0-9]+)", page_txt):
          index_page = True

        doc = self.nlp_code10(page_txt)
        code_list = []
        for ent in doc.ents:
          if index_page:
            # check the code contain letter "L"
            if re.search("(L[0-9]+)", ent.text):
              continue
            else:
              code_list.append(ent.text)
          else:
            code_list.append(ent.text)

        #code_list = [ent.text for ent in doc.ents if not re.search("(P[ ][0-9]+)(,\s)(L[0-9]+)", ent.text)]
        if len(code_list) != 0:
          page_number = int(txt_file.split("/")[1].split(".")[0].split("-")[1])
          pdf_page_vocab[page_number] = list(set(code_list)) 
          # print(f"Page[{txt_file.split('/')[1]}]: {code_list}")
    return pdf_page_vocab

  def get_keyword(self, p_code):
    keyword = ""
    # reverse code if required
    code = reverse_code_pattern(p_code)
    # get keyword from dataset
    keyword_list = list(self.code_df.loc[self.code_df["Code"] == code]["Keyword"])
    if len(keyword_list) > 0:
      keyword = keyword_list[0]
    return keyword

  def get_best_token_match(self, p_code, num_page, page_obj, code_coords, match_threshold):
    # Step 1: reverse code pattern
    reversed_icd_code = reverse_code_pattern(p_code)
    # Step 2: fetch keyword based on code 
    keyword = self.get_keyword(reversed_icd_code)
    # Step 3: prepare sentence list 
    sentence_list = get_sentence_list(self.text_list, num_page)
    # Step 4: get best token match ratio 
    match_list = [
      (sentence, fuzz.token_set_ratio(keyword, sentence)) 
      for sentence in sentence_list if fuzz.token_set_ratio(keyword, sentence) > match_threshold
    ]
    match_found_list = []
    num_page = num_page + 1
    for match_f in match_list:
      highlight_list = page_obj.search_for(match_f[0])
      # prepare list for y0 coord value
      y_coords_list = [highlight[1] for highlight in highlight_list]
      # find closet y coordinate value
      if len(y_coords_list) > 0:
        closet_y_coords = find_nearest(y_coords_list, value=code_coords)
        # print(f"Code: {code_coords}, Y: {y_coords}")
        if (code_coords-20) <= closet_y_coords <= (code_coords + 20):
          match_found_list.append(match_f)
          print(f"Page-{num_page}, Code: ({p_code})-{code_coords}, closet_y_coords:{closet_y_coords} Keyword: {keyword}, Match: {match_f}")
    return match_found_list, sentence_list
    
  def get_best_token_match2(self, p_code, num_page, match_threshold):
    # Step 1: reverse code pattern
    reversed_icd_code = reverse_code_pattern(p_code)
    # Step 2: fetch keyword based on code 
    keyword = self.get_keyword(reversed_icd_code)
    # Step 3: prepare sentence list 
    sentence_list = get_sentence_list(self.text_list, num_page)
    # Step 4: get best match token ratio or wratio
    match_list = [
      (sentence, fuzz.token_set_ratio(keyword, sentence)) 
      for sentence in sentence_list if fuzz.token_set_ratio(keyword, sentence) > match_threshold
    ]
    match_found = sort_tuple(match_list)[0] if len(sort_tuple(match_list)) > 0 else ("NA", 0)
    return match_found, sentence_list,

def is_common_word_coords_nearby(page_obj, code_coord, keyword, match_sent):
  is_closest_coord = False
  # get common word list
  common_words = get_common_words(keyword, match_sent)
  if len(common_words) > 0:
    # get common word coordinate
    highlight_list = page_obj.search_for(common_words[0])
    y_coords_list = [highlight[1] for highlight in highlight_list]
    # find closet y coordinate value
    closet_y_coords = find_nearest(y_coords_list, value=code_coord)
    # check, common word coordinate within range of code coordinate
    # if (code_coord-10) <= highlight_list[0][1] <= (code_coord + 10):
    if (code_coord-10) <= closet_y_coords <= (code_coord + 10):
      is_closest_coord = True
  return is_closest_coord

def find_nearest(array, value):
  array = np.asarray(array)
  idx = (np.abs(array - value)).argmin()
  return array[idx]

def get_common_words(sent1, sent2):
  clean_token1 = clean_text(sent1)
  clean_token2 = clean_text(sent2)
  token_set1 = set(clean_token1)
  token_set2 = set(clean_token2)

  common_word_set = set()
  def get_common(token_set1, token_set2):
    for w1 in token_set1:
      for w2 in token_set2:
        if w1.lower() == w2.lower():
          common_word_set.add(w1)
  get_common(token_set1, token_set2)
  get_common(token_set2, token_set1)
  return list(common_word_set)

def clean_text(sent):
  # tokenize sentence
  sent1 = word_tokenize(sent)
  # filter stop words
  filtered_sent = [w for w in sent1 if not w.lower() in stop_words]
  filtered_sent = [w for w in filtered_sent if re.sub(re.compile('\W'), '', w)]
  clean_tokens = []
  for token in filtered_sent:
    if token.find("-"):
      tokens = token.split("-")
      clean_tokens.extend(tokens)
    else:
      clean_tokens.append(token)
  return clean_tokens

def get_sentence_line(p_code, sentence_list):
  line_list = [(line + 1) for line, sent in enumerate(sentence_list) if p_code in sent]
  return line_list

def get_sentence_list(text_list, num_page):
  with open(f"{text_list[num_page]}", "r") as f:
    lines = [line.rstrip('\n') for line in f]
    sentence_list = [" ".join(line.split()) for line in lines] # Remove extra spaces, tabs, and line breaks
  return sentence_list

def reverse_code_pattern(p_code):
  orig_code = p_code

  # check for code contains space(" ")
  tmp_code = orig_code.split(" ")
  if len(tmp_code) > 1:
    orig_code = f"{tmp_code[0].strip()}.{tmp_code[1].strip()}"

  # check for code contains dot(".")
  tmp_code = p_code.split(".")
  if len(tmp_code) > 1:
    orig_code = f"{tmp_code[0].strip()}.{tmp_code[1].strip()}"
  
  # check for code contains comma(",")
  tmp_code = p_code.split(",")
  if len(tmp_code) == 2:
    orig_code = f"{tmp_code[0].strip()}.{tmp_code[1].strip()}"
  elif len(tmp_code) == 2:
    orig_code = f"{tmp_code[0].strip()}.{tmp_code[2].strip()}"

  # handle if the first char of code is missing
  alphabats = {"Z": "2", "B": "8", "O": "0", "S": "5", "l": "1", "G": "6", "o": "9", "i": "1"}
  for key, val in alphabats.items():
    # replcae char on 0 index
    if orig_code.find(val) == 0:
      #orig_code = orig_code.replace(val, key)
      orig_code = replacer(orig_code, key, 0)
    # replcae char on 1 index
    if orig_code.find(key) == 1:
      orig_code = replacer(orig_code, val, 1)
      # replcae char on 2 index
      if orig_code.find(key) == 2:
        orig_code = replacer(orig_code, val, 2)
      break

  return orig_code

def replacer(s, newstring, index, nofail=False):
  # raise an error if index is outside of the string
  if not nofail and index not in range(len(s)):
      raise ValueError("index outside given string")

  # if not erroring, but the index is still not in the correct range..
  if index < 0:  # add it to the beginning
      return newstring + s
  if index > len(s):  # add it to the end
      return s + newstring

  # insert the new string between "slices" of the original
  return s[:index] + newstring + s[index + 1:]

def sort_tuple(p_tup):
  return (sorted(p_tup, key = lambda x: x[1], reverse=True)) 

def create_directory(dir_name):
  if not os.path.exists(dir_name):
    os.makedirs(dir_name)

In [10]:
(120.63995361328125) <= 360.1600036621094 <= (120.63995361328125 + 20)

False

##Keyword Matching & Highlighting 

- Step 1 - Z87.5
- Step 2 - Personal history of complications of pregnancy, childbirth and the puerperium
- Step 3 - Page keyword
- Step 4 - calculate cosine similirity
- Step 5 - "Green" > 60% otherwise "Yellow"

In [4]:
!rm -rf input_files
!mkdir -p input_files
!rm -rf output
!mkdir -p output

In [5]:
def purge(file_path):
  for f in glob.glob(file_path):
    os.remove(f)

In [6]:
# Step-0: create highlighter instance
INPUT_PDF_FILES_PATH = "input_files"
code_df = pd.read_csv("icd_10_code_and_keywords_v2.csv")

highlighter = Highlighter(code_df)

In [None]:
%%time

for pdf_file in os.listdir(INPUT_PDF_FILES_PATH):
  pdf_file_name = f"{INPUT_PDF_FILES_PATH}/{pdf_file}"
  cords_file_name = f"{pdf_file_name.split('/')[1].split('.')[0]}_cords.txt"

  # Step-1: splitting pdf file
  pdf_list = highlighter.split_pdf(pdf_file_name)

  # Step-2: Extracting text from pdf
  txt_list = highlighter.extract_text_from_pdf(pdf_list)

  # Step-3: Searching ICD-10 code
  icd10_code_dict = highlighter.search_icd_code(txt_list)

  # Step-4: Highlighting ICD-10 code into pdf
  pdf_output_file, txt_output_file = highlighter.highlight_icd_code(icd10_code_dict,
                                                                    match_threshold=40,
                                                                    coordinate=True,
                                                                    pdf_file_name=pdf_file_name,
                                                                    cords_file_name=cords_file_name)
  print(f"File[{pdf_output_file}] is saved after highlighting ICD-10 code")
  print(f"Highlighted coordinates are saved into [{txt_output_file}] file.")

  # remove all pdf and text files
  purge("pdf-files/*.pdf")
  purge("txt-files/*.txt")
  pdf_list = []
  txt_list = []

In [None]:
purge("pdf-files/*.pdf")
purge("txt-files/*.txt")

In [8]:
!zip output.zip output/*.*

  adding: output/36page_cords.txt (deflated 59%)
  adding: output/36page_output.pdf (deflated 1%)
  adding: output/38page_cords.txt (deflated 57%)
  adding: output/38page_output.pdf (deflated 3%)


In [None]:
!zip pdf_text_output.zip pdf-files/*.* txt-files/*.*

In [8]:
# Step-1: spliting pdf file
pdf_file_name = "36page.pdf"
pdf_list = highlighter.split_pdf(pdf_file_name)

# Step-2: Extracting text from pdf
txt_list = highlighter.extract_text_from_pdf(pdf_list)

In [9]:
# Step-3: Searching ICD-10 code
page_code10_dict = highlighter.search_icd_code(txt_list)

In [10]:
code_df = pd.read_csv("icd_10_code_and_keywords_v2.csv")

##Text clean up

In [None]:
with open(f"{txt_list[7]}", "r") as f:
  my_text = f.read()

In [None]:
nlp = spacy.load("en_core_web_sm")
doc = nlp(my_text)
sentences = [sentence.text for sentence in doc.sents]

In [None]:
sentences

In [None]:
def text_preprocess(text_file):
  sentence_list = []
  stopwords = ["is", "a"]
  doc = nlp(my_text)
  sentences = [sentence.text for sentence in doc.sents]
  for sent in sentences:
    clean_text = " ".join(sent.split())  # Remove extra spaces, tabs, and line breaks
    clean_text = re.sub(f"[{re.escape(punctuation)}]", "", clean_text) # Remove punctuation
    clean_text = re.sub(r"\b[0-9]+\b\s*", "", clean_text)     # Remove numbers
    clean_text = " ".join([w for w in clean_text.split() if not w.isdigit()]) # Remove digits= Side effect: removes extra spaces
    clean_text = " ".join([w for w in clean_text.split() if w.isalpha()]) # Remove non-alphabetic characters= Side effect: removes extra spaces
    clean_text = re.sub(r"[^A-Za-z0-9\s]+", "", clean_text) # Remove all special characters and punctuation
    # Remove stopwords from a list
    tokens = clean_text.split()
    clean_tokens = [t for t in tokens if not t in stopwords]
    clean_text = " ".join(clean_tokens)
    # Remove short tokens
    tokens = clean_text.split()
    clean_tokens = [t for t in tokens if len(t) > 3]
    clean_text = " ".join(clean_tokens)
    # Remove repeated characters
    clean_text = re.sub(r'(.)\1{3,}',r'\1', clean_text)
    if len(clean_text) > 0:
      sentence_list.append(clean_text)
  return sentence_list

In [None]:
sentence_list = text_preprocess(my_text)
sentence_list

In [None]:
with open(f"{txt_list[7]}", "r") as f:
  lines = [line.rstrip('\n') for line in f]
  sentence_list = [" ".join(line.split()) for line in lines] # Remove extra spaces, tabs, and line breaks
sentence_list

In [None]:
def get_best_match(sentence_list, keyword):
  print("#"*10)
  print(f"Matching for : {keyword}")
  print()
  ratios = [fuzz.ratio(keyword, sentence) for sentence in sentence_list]
  best_match = sentence_list[ratios.index(max(ratios))]
  print(f"Best match: {fuzz.ratio(keyword, best_match)} | {best_match}")
  print(f"Before Match: {fuzz.ratio(keyword, sentence_list[sentence_list.index(best_match) - 1])} | {sentence_list[sentence_list.index(best_match) - 1]}")
  print(f"After Match: {fuzz.ratio(keyword, sentence_list[sentence_list.index(best_match) + 1])} | {sentence_list[sentence_list.index(best_match) + 1]}")
  print()

  p_ratios = [fuzz.partial_ratio(keyword, sentence) for sentence in sentence_list]
  p_best_match = sentence_list[p_ratios.index(max(p_ratios))]
  print(f"Partial Best match: {fuzz.partial_ratio(keyword, p_best_match)} | {p_best_match}")
  print(f"Partial Before Match: {fuzz.partial_ratio(keyword, sentence_list[sentence_list.index(p_best_match) - 1])} | {sentence_list[sentence_list.index(p_best_match) - 1]}")
  print(f"Partial After Match: {fuzz.partial_ratio(keyword, sentence_list[sentence_list.index(p_best_match) + 1])} | {sentence_list[sentence_list.index(p_best_match) + 1]}")
  print()

In [None]:
keyword_list = [get_keyword(code) for code in clean_icd10_code]
keyword_list

In [None]:
[get_best_match(sentence_list, keyword) for keyword in keyword_list]

In [None]:
def get_best_match(sentence_list, keyword_list):
  for keyword in keyword_list:
    #for sentence in sentence_list:
    print("#"*10)
    print(f"Matching for : {keyword}")
    match_list = process.extract(keyword, sentence_list, scorer = fuzz.ratio, limit = 3)
    for match_found in match_list:
      print(f"{match_found[0]} | {match_found[1]}")
    print()

In [None]:
get_best_match(sentence_list, keyword_list)
#process.extract(query, choices, scorer = fuzz.partial_ratio, limit = 2)

In [None]:
sentence_list.index("Moderate Obstructive Sleep Apnea .")

25

In [None]:
for idx, sent in enumerate(sentence_list):
  print(f"{idx}>{sent}")

##All Together

In [11]:
def get_keyword(p_code):
  keyword = ""
   # reverse code if required
  code = reverse_code_pattern(p_code)
  # get keyword from dataset
  keyword_list = list(code_df.loc[code_df["Code"] == code]["Keyword"])
  if len(keyword_list) > 0:
    keyword = keyword_list[0]
  return keyword

In [None]:
# Diagnosis: Hypertriglyceridemia, sporadic (E78.3)
get_keyword("E78.3")

'Hyperchylomicronemia'

In [None]:
get_keyword("R03.0")
# Hypertriglyceridemia

'Elevated blood-pressure reading, w/o diagnosis of htn'

In [None]:
clean_icd10_code = [reverse_code_pattern(code) for code in page_code10_dict[7]]
clean_icd10_code

['Z20.822', 'E78.3', 'R05.9']

Finding for `Hypersomnia, unspecified`:
44 > Hypoxemia
65 > (central sleep apnea (G47.31) CoHypersomnia, unspecified (G47.10)

In [38]:
with open(f"{txt_list[39]}", "r") as f:
  lines = [line.rstrip('\n') for line in f]
  sentence_list = [" ".join(line.split()) for line in lines] # Remove extra spaces, tabs, and line breaks

In [None]:
def sort_tuple(tup):
  return(sorted(tup, key = lambda x: x[1], reverse=True)) 

In [None]:
[code for code in page_code10_dict[21]]

In [None]:
[(code, get_keyword(code)) for code in page_code10_dict[15]]

[('Z00.00', 'Encntr for general adult medical exam w/o abnormal findings')]

In [83]:
def get_best_token_match(p_code, num_page):
  # Step 1: reverse code pattern
  reversed_icd_code = reverse_code_pattern(p_code)
  # Step 2: fetch keyword based on code 
  keyword = get_keyword(reversed_icd_code)
  # Step 3: prepare sentence list 
  with open(f"{txt_list[num_page]}", "r") as f:
    lines = [line.rstrip('\n') for line in f]
    sentence_list = [" ".join(line.split()) for line in lines] # Remove extra spaces, tabs, and line breaks
  # Step 4: get best token match ratio 
  match_list = [(sentence, fuzz.token_set_ratio(keyword, sentence)) for sentence in sentence_list if fuzz.token_set_ratio(keyword, sentence) > 40]
  return sort_tuple(match_list)

In [84]:
match_list = [get_best_token_match(code, 38) for code in page_code10_dict[38]]
match_list

[[],
 [('General', 100),
  ('General Lymphatics', 56),
  ('Routine general medical examination at a health care facility (Z00.00)',
   49),
  ('Diagnosis: Routine general medical examination at a health care facility (Z00.00)',
   42)]]

In [51]:
for code in page_code10_dict[38]:
  print(f"{code}-{get_best_token_match(code, 38)}")

E78.3-[]
Z00.00-[('General', 100), ('General Lymphatics', 56), ('Routine general medical examination at a health care facility (Z00.00)', 49), ('Diagnosis: Routine general medical examination at a health care facility (Z00.00)', 42)]


In [78]:
def get_best_token_match(p_code, num_page, page_obj, code_coords):
  # Step 1: reverse code pattern
  reversed_icd_code = reverse_code_pattern(p_code)
  # Step 2: fetch keyword based on code 
  keyword = get_keyword(reversed_icd_code)
  # Step 3: prepare sentence list 
  with open(f"{txt_list[num_page]}", "r") as f:
    lines = [line.rstrip('\n') for line in f]
    sentence_list = [" ".join(line.split()) for line in lines] # Remove extra spaces, tabs, and line breaks
  # Step 4: get best token match ratio 
  match_list = [(sentence, fuzz.token_set_ratio(keyword, sentence)) for sentence in sentence_list if fuzz.token_set_ratio(keyword, sentence) > 40]
  match_found = ("NA", 0)
  if len(match_list) > 0:
    for match_found in match_list:
      highlight_list = page.search_for(match_found[0])
      # prepare list for y0 coord value
      y_coords_list = [highlight[1] for highlight in highlight_list]
      if len(y_coords_list) > 0:
        if (code_coords-10) <= y_coords_list[1] <= (code_coords + 10):
          match_found = match_found
  return match_found

In [81]:
code_coords = 54.60003662109375
pdf_file = fitz.open("page-36.pdf")  #Create pdf file object
for page_num, page in enumerate(pdf_file):
  for code in page_code10_dict[38]:
    match_found = get_best_token_match(code, 38, page, code_coords)
    print(f"Code: {code}, {match_found}")

Code: E78.3, ('NA', 0)
Code: Z00.00, ('Diagnosis: Routine general medical examination at a health care facility (Z00.00)', 42)


In [71]:
code_coords = 54.60003662109375
pdf_file = fitz.open("page-36.pdf")  #Create pdf file object
for page_num, page in enumerate(pdf_file):
  for code in page_code10_dict[38]:
    match_found = get_best_token_match(code, 38)
    if len(match_found) > 0:
      for match_f in match_found:
        print(f"Text: {match_f[0]}")
        highlight_list = page.search_for(match_f[0])
        # prepare list for y0 coord value
        y_coords_list = [highlight[1] for highlight in highlight_list]
        if len(y_coords_list) > 0:
          #print(y_coords_list[1])
          if (code_coords-10) <= y_coords_list[1] <= (code_coords + 10):
            print(y_coords_list[1])
            print(match_f)
        print()

Text: General

Text: General Lymphatics

Text: Routine general medical examination at a health care facility (Z00.00)
54.60003662109375
('Routine general medical examination at a health care facility (Z00.00)', 49)

Text: Diagnosis: Routine general medical examination at a health care facility (Z00.00)



In [54]:
fuzz.token_set_ratio("""History & Physical Report 
5/8/2020: Office Visit 
- Routine general medical examination at a health care facility (Z00.00), Hypertriglyceridemia, sporadic 
— _ 
Guevara, DO) """, "Encntr for general adult medical exam w/o abnormal findings")

41

In [39]:
def get_sentence_list(text_list, num_page):
  with open(f"{text_list[num_page]}", "r") as f:
    lines = [line.rstrip('\n') for line in f]
    sentence_list = [" ".join(line.split()) for line in lines] # Remove extra spaces, tabs, and line breaks
  return sentence_list

In [41]:
get_sentence_list(txt_list, 38)

['Musculoskeletal',
 'General',
 'Movements - Full range of motion in all joints.',
 'Lymphatic',
 'General Lymphatics',
 'Description - No Generalized lymphadenopathy.',
 'Assessment & Plan (Lyndsey Pussehl CMA; 5/8/2020 9:45 AM)',
 'Routine general medical examination at a health care facility (Z00.00)',
 'CBC, Comp, TSH, FLP, PSA',
 'Exercise 30-60 min most days of the week. Low fat diet encouraged to maintain good health. Discussed the options for colon and',
 'prostate cancer screening. Calcium 1000-1500 mg and Vit D 1000 |U daily recommended.',
 'Pt instructed to do aerobic cardiovascular exercise 4-5 times weekly for 30 minutes. Also, educated on healthy low fat, low sugar diet.',
 'Pt Education - Patient Education - Exercise to Stay Healthy: Brief Version *: health maintenance',
 'FOLLOW-UPIN 1 YEAR',
 'Urinalysis (81002)',
 'Electrocardiogram (ECG) (93000)',
 'Hypertriglyceridemia, sporadic (E78.3)',
 'Impression: stopped will restart and check labs',
 'Signed electronically b

In [None]:
match_list = get_best_token_match("E78.3", 7)
match_list

[]

In [None]:
match_list[0]

'Diagnosis Fever (R50.9)'

In [None]:
match_list[1]

47

In [None]:
def clean_text(sent):
  # tokenize sentence
  sent1 = word_tokenize(sent)
  # filter stop words
  filtered_sent = [w for w in sent1 if not w.lower() in stop_words]
  filtered_sent = [w for w in filtered_sent if re.sub(re.compile('\W'), '', w)]
  return filtered_sent

In [None]:
def clean_text(sent):
  # tokenize sentence
  sent1 = word_tokenize(sent)
  # filter stop words
  filtered_sent = [w for w in sent1 if not w.lower() in stop_words]
  filtered_sent = [w for w in filtered_sent if re.sub(re.compile('\W'), '', w)]
  clean_tokens = []
  for token in filtered_sent:
    if token.find("-"):
      tokens = token.split("-")
      clean_tokens.extend(tokens)
    else:
      clean_tokens.append(token)
  return clean_tokens

In [None]:
# Diagnosis Sprain of calcaneofibular (ligament) of right ankle, initial encounter (S93.411A)
clean_token = clean_text("Overweight (BMI 25.0 - 29.9) (E66.3)")
clean_token

['Overweight', 'BMI', '25.0', '29.9', 'E66.3']

In [29]:
def get_common_words(sent1, sent2):
  clean_token1 = clean_text(sent1)
  clean_token2 = clean_text(sent2)
  token_set1 = set(clean_token1)
  token_set2 = set(clean_token2)

  common_word_set = set()
  def get_common(token_set1, token_set2):
    for w1 in token_set1:
      for w2 in token_set2:
        if w1.lower() == w2.lower():
          common_word_set.add(w1)
  get_common(token_set1, token_set2)
  get_common(token_set2, token_set1)
  
  return list(common_word_set)

In [30]:
get_common_words(
  "Unspecified abdominal hernia without obstruction or gangrene",
  "Diagnosis Left lower quadrant pain (R10.32), Non-smoker (Z78.9), Hernia (K46.9), Overweight (BMI 25.0 - 29.9), Body mass index (BMI) of"
)

['Hernia', 'hernia']

In [None]:
def get_common_words(sent1, sent2):
  clean_token1 = clean_text(sent1)
  clean_token2 = clean_text(sent2)
  token_set1 = set(clean_token1)
  token_set2 = set(clean_token2)
  for token in token_set1
  return list(token_set1 & token_set2)

In [None]:
clean_tokens = []
for token in clean_token:
  if token.find("-"):
    tokens = token.split("-")
    clean_tokens.extend(tokens)
  else:
    clean_tokens.append(token)
clean_tokens

['Elevated', 'blood', 'pressure', 'reading', 'w/o', 'diagnosis', 'htn']

In [None]:
def find_nearest(array, value):
  array = np.asarray(array)
  idx = (np.abs(array - value)).argmin()
  return array[idx]

In [None]:
array = np.random.random(10)
print(array)

[0.93361323 0.10674056 0.81360362 0.26873547 0.7892294  0.36625687
 0.48473053 0.43495106 0.68754387 0.28034082]


In [None]:
print(find_nearest(array, value=0.35))

0.3662568679192558


##Fitz

In [15]:
def find_nearest(array, value):
  array = np.asarray(array)
  idx = (np.abs(array - value)).argmin()
  return array[idx]

In [None]:
pdf_file = fitz.open("page-36.pdf")  #Create pdf file object
pdf_page_count = pdf_file.page_count   #var to hold page count
for page in range(pdf_page_count):  #notice that page starts with index 0
  page_obj = pdf_file[page] #Create page object
  #content_of_page = pdf_file.get_page_text(page) #Get page content
  match_word = "Z00.00" 
  content_of_page = page_obj.get_text("blocks", sort=False)  #get rect for all words

  coords_array = []
  y_coords_array = []
  for content in content_of_page:
    print(content)
    if content[4].replace("(", "").replace(")", "") == match_word:
      rect_comp = fitz.Rect(content[0],content[1],content[2],content[3])
      print(f"Line #{content[6]}-{content[4]} > {rect_comp}")
      coords_array.append(rect_comp)
      y_coords_array.append(content[1])
      
      #coord = fitz.Rect(content[1]).rect
      print(rect_comp[1])

      highlight = page_obj.add_highlight_annot(rect_comp)
      highlight.set_colors(stroke=[0.2, 1, 0.8])
      highlight.update()

  # find closet y coordinate value
  closet_y_coords = find_nearest(y_coords_array, value=74.320068359375)
  match_word = "Comprehensive" 
  for content in content_of_page:
    # print(word)
    if content[4].replace("(", "").replace(")", "") == match_word and (closet_y_coords-20) <= content[1] <= (closet_y_coords + 20):
      rect_comp = fitz.Rect(content[0],content[1],content[2],content[3])
      print(f"Line #{content[6]}-{content[4]} > {rect_comp}")
      highlight = page_obj.add_highlight_annot(rect_comp)
      highlight.set_colors(stroke=[1, 1, 0])
      highlight.update()

pdf_output_file_name = f"page_11_output.pdf"
pdf_file.save(pdf_output_file_name, garbage=4, deflate=True, clean=True)

In [None]:
list1 = [74.320068359375, 237.0400390625, 409.8399658203125, 572.5599975585938]
curr_value = 74.320068359375 + 20
find_nearest(list1, value=curr_value)

74.320068359375

In [None]:
curr_value = 74.320068359375 - 20
find_nearest(list1, value=curr_value)

74.320068359375

In [None]:
def get_word_coordinate(page_obj, common_words):
  word_coords = {}
  content_of_page = page_obj.get_text("words", sort=False)  #get rect for all words
  for content in content_of_page:
    # print(word)
    for idx, common_word in enumerate(common_words):
      word_coord = {}
      curr_word = content[4].replace("(", "").replace(")", "")
      if curr_word == match_word:
        rect_comp = fitz.Rect(content[0],content[1],content[2],content[3])
        word_coord[common_word] = rect_comp
        word_coord["Line#"] = content[6]
        word_coords[idx] = word_coord
  return word_coords

In [None]:
pdf_file = fitz.open("pdf-files/page-11.pdf")  #Create pdf file object
pdf_page_count = pdf_file.page_count   #var to hold page count
for page in range(pdf_page_count):  #notice that page starts with index 0
  page_obj = pdf_file[page] #Create page object
  print(get_word_coordinate(page_obj, ["general", "adult", "medical", "exam"]))

{0: {'general': Rect(242.856689453125, 65.00006103515625, 274.8216857910156, 72.00347900390625), 'Line#': 2}, 1: {'adult': Rect(242.856689453125, 65.00006103515625, 274.8216857910156, 72.00347900390625), 'Line#': 2}, 2: {'medical': Rect(242.856689453125, 65.00006103515625, 274.8216857910156, 72.00347900390625), 'Line#': 2}, 3: {'exam': Rect(242.856689453125, 65.00006103515625, 274.8216857910156, 72.00347900390625), 'Line#': 2}}


In [None]:
def highlight_common_words(page_obj, y_coords_array, curr_y_coord, common_word_list):
  page_content = page_obj.get_text("words", sort=False)  #get rect for all words

  # find closet y coordinate value
  closet_y_coords = find_nearest(y_coords_array, value=curr_y_coord)
  match_word = "Comprehensive" 
  for content in page_content:
    for common_word in common_word_list:
      if content[4].replace("(", "").replace(")", "") == common_word and (closet_y_coords-20) <= content[1] <= (closet_y_coords + 20):
        rect_comp = fitz.Rect(content[0],content[1],content[2],content[3])
        print(f"Line #{content[6]}-{content[4]} > {rect_comp}")
        highlight = page_obj.add_highlight_annot(rect_comp)
        highlight.set_colors(stroke=[1, 1, 0])
        highlight.update()

In [None]:
def is_common_word_coords_nearby(page_obj, code_coord, keyword, match_sent):
  is_closest_coord = False
  # get common word list
  common_words = get_common_words(keyword, match_sent)
  if len(common_words) > 0:
    # get common word coordinate
    highlight_list = page_obj.search_for(common_words[0])
    y_coords_list = [highlight[1] for highlight in highlight_list]
    # find closet y coordinate value
    closet_y_coords = find_nearest(y_coords_list, value=code_coord)
    # check, common word coordinate within range of code coordinate
    # if (code_coord-10) <= highlight_list[0][1] <= (code_coord + 10):
    if (code_coord-10) <= closet_y_coords <= (code_coord + 10):
      is_closest_coord = True
  return is_closest_coord

In [23]:
def clean_text(sent):
  # tokenize sentence
  sent1 = word_tokenize(sent)
  # filter stop words
  filtered_sent = [w for w in sent1 if not w.lower() in stop_words]
  filtered_sent = [w for w in filtered_sent if re.sub(re.compile('\W'), '', w)]
  clean_tokens = []
  for token in filtered_sent:
    if token.find("-"):
      tokens = token.split("-")
      clean_tokens.extend(tokens)
    else:
      clean_tokens.append(token)
  return clean_tokens

In [25]:
def get_common_words(sent1, sent2):
  clean_token1 = clean_text(sent1)
  clean_token2 = clean_text(sent2)
  token_set1 = set(clean_token1)
  token_set2 = set(clean_token2)

  common_word_set = set()
  def get_common(token_set1, token_set2):
    for w1 in token_set1:
      for w2 in token_set2:
        if w1.lower() == w2.lower():
          common_word_set.add(w1)
  get_common(token_set1, token_set2)
  get_common(token_set2, token_set1)
  
  return list(common_word_set)

In [None]:
def get_common_words(page_obj, code_coords, keyword):
  common_word_list = []
  page_content = page_obj.get_text("blocks", sort=False)
  for content in page_content:
    if (code_coords-10) <= content[1] <= (code_coords + 10):
      if len(clean_text(content[4])) > 0:
        common_word_list = get_common_words(keyword, content[4])
        break
  return common_word_list

In [24]:
def get_paragraph(page_obj, code_coords):
  code_paragraph = None
  page_content = page_obj.get_text("blocks", sort=False)
  for content in page_content:
    if (code_coords-10) <= content[1] <= (code_coords + 10):
      if len(clean_text(content[4])) > 0:
        code_paragraph = content[4]
        break
  return code_paragraph

In [26]:
code_coords = 54.60003662109375
pdf_file = fitz.open("36page.pdf")  #Create pdf file object
pdf_page_count = pdf_file.page_count   #var to hold page count
for page in range(pdf_page_count):  #notice that page starts with index 0
  page_obj = pdf_file[page] #Create page object
  paragraph = get_paragraph(page_obj, code_coords)
  print(paragraph)

History & Physical Report 
5/8/2020: Office Visit 
- Routine general medical examination at a health care facility (Z00.00), Hypertriglyceridemia, sporadic 
— _ 
Guevara, DO) 



In [22]:
pdf_file = fitz.open("36page.pdf")  #Create pdf file object
pdf_page_count = pdf_file.page_count   #var to hold page count
for page in range(pdf_page_count):  #notice that page starts with index 0
  page_obj = pdf_file[page] #Create page object
  #content_of_page = pdf_file.get_page_text(page) #Get page content
  code_coords = 54.60003662109375
  sent1 = "Encntr for general adult medical exam w/o abnormal findings"

  content_of_page = page_obj.get_text("blocks", sort=False)  #get rect for all words

  coords_array = []
  y_coords_array = []
  for content in content_of_page:
    #print(content)
    if (code_coords-10) <= content[1] <= (code_coords + 10):
      if len(clean_text(content[4])) > 0:
        rect_comp = fitz.Rect(content[0],content[1],content[2],content[3])
        print(f"Paragraph: {content[4]}")
        print(f"Commons: {get_common_words(sent1, content[4])}")
        print(f"Line #{content[6]}")
        print(f"Coords: {rect_comp}")

Paragraph: History & Physical Report 
5/8/2020: Office Visit 
- Routine general medical examination at a health care facility (Z00.00), Hypertriglyceridemia, sporadic 
— _ 
Guevara, DO) 

Commons: ['general', 'medical']
Line #0
Coords: Rect(28.317279815673828, 46.96002197265625, 575.4642333984375, 72.00390625)


In [21]:
pdf_file = fitz.open("page-36.pdf")  #Create pdf file object
pdf_page_count = pdf_file.page_count   #var to hold page count
for page in range(pdf_page_count):  #notice that page starts with index 0
  page_obj = pdf_file[page] #Create page object
  #content_of_page = pdf_file.get_page_text(page) #Get page content
  match_word = "Z00.00" 
  content_of_page = page_obj.get_text("blocks", sort=False)  #get rect for all words

  coords_array = []
  y_coords_array = []
  for content in content_of_page:
    print(content)
    if content[4].replace("(", "").replace(")", "") == match_word:
      rect_comp = fitz.Rect(content[0],content[1],content[2],content[3])
      print(f"Line #{content[6]}-{content[4]} > {rect_comp}")

(28.5572566986084, 23.44000244140625, 98.15217590332031, 31.44390869140625, 'Encounter #12 \n', 0, 0)
(28.317279815673828, 46.96002197265625, 575.4642333984375, 72.00390625, 'History & Physical Report \n5/8/2020: Office Visit \n- Routine general medical examination at a health care facility (Z00.00), Hypertriglyceridemia, sporadic \n— _ \nGuevara, DO) \n', 1, 0)
(34.31671142578125, 81.280029296875, 151.66543579101562, 89.283935546875, 'ocumented. 5/8/2020 9:43 AM \n', 2, 0)
(28.5572566986084, 115.3599853515625, 189.58180236816406, 123.3638916015625, 'Marrie \nanguage: English / Race: White \n', 3, 0)
(25.917510986328125, 61.96002197265625, 78.73212432861328, 78.96832275390625, '   \n', 4, 0)
(28.5572566986084, 124.0, 45.835601806640625, 132.00390625, 'Male \n', 6, 0)
(28.557254791259766, 144.15997314453125, 557.4664916992188, 168.96392822265625, 'History of Present \n| Illness as Pussehl CMA; 5/8/2020 2:23 se3 \nThe patient is a 40 year old male who presents today for a physical. \nThe

In [33]:
def get_best_token_match(p_code, num_page, page_obj, code_coords, match_threshold):
    # Step 1: reverse code pattern
    reversed_icd_code = reverse_code_pattern(p_code)
    # Step 2: fetch keyword based on code 
    keyword = get_keyword(reversed_icd_code)
    # Step 3: prepare sentence list 
    sentence_list = get_sentence_list(txt_list, num_page)
    # Step 4: get best token match ratio 
    match_list = [
      (sentence, fuzz.token_set_ratio(keyword, sentence)) 
      for sentence in sentence_list if fuzz.token_set_ratio(keyword, sentence) > match_threshold
    ]

    match_found_list = []
    num_page = num_page + 1
    for match_f in match_list:
      highlight_list = page_obj.search_for(match_f[0])
      # prepare list for y0 coord value
      y_coords_list = [highlight[1] for highlight in highlight_list]
      #print(y_coords_list)
      # find closet y coordinate value
      if len(y_coords_list) > 0:
        closet_y_coords = find_nearest(y_coords_list, value=code_coords)
        # print(f"Code: {code_coords}, Y: {y_coords}")
        if (closet_y_coords-20) <= code_coords <= (closet_y_coords + 20):
          paragraph = get_paragraph(page_obj, code_coords)
          print(paragraph)
          print("#" * 10)
          match_found_list.append(match_f)
          #print(f"Page-{num_page}, Code: ({p_code})-{code_coords}, closet_y_coords:{closet_y_coords} Keyword: {keyword}, Match: {match_f}")
    return match_found_list, sentence_list

In [34]:
pdf_file = fitz.open("36page.pdf")  #Create pdf file object
pdf_page_count = pdf_file.page_count   #var to hold page count
for page in range(pdf_page_count):  #notice that page starts with index 0
  page_obj = pdf_file[page] #Create page object
  match_found_list, sentence_list = get_best_token_match("Z00.00", 0, page_obj, 54.60003662109375, 40)
  #print(match_found_list)

History & Physical Report 
5/8/2020: Office Visit 
- Routine general medical examination at a health care facility (Z00.00), Hypertriglyceridemia, sporadic 
— _ 
Guevara, DO) 

##########
History & Physical Report 
5/8/2020: Office Visit 
- Routine general medical examination at a health care facility (Z00.00), Hypertriglyceridemia, sporadic 
— _ 
Guevara, DO) 

##########
History & Physical Report 
5/8/2020: Office Visit 
- Routine general medical examination at a health care facility (Z00.00), Hypertriglyceridemia, sporadic 
— _ 
Guevara, DO) 

##########
History & Physical Report 
5/8/2020: Office Visit 
- Routine general medical examination at a health care facility (Z00.00), Hypertriglyceridemia, sporadic 
— _ 
Guevara, DO) 

##########
History & Physical Report 
5/8/2020: Office Visit 
- Routine general medical examination at a health care facility (Z00.00), Hypertriglyceridemia, sporadic 
— _ 
Guevara, DO) 

##########
History & Physical Report 
5/8/2020: Office Visit 
- Routine