In [2]:
from google.cloud import vision
from google.oauth2 import service_account
from google.cloud import vision_v1
from google.protobuf import json_format
import os
import re

key_path = 'KEY_FILE.json'
credentials = service_account.Credentials.from_service_account_file(key_path)
client = vision.ImageAnnotatorClient(credentials=credentials)

for i in range(1, 8):
    filename = f'bgReading_{i}.jpeg'
    
    with open(filename, 'rb') as image_file:
        content = image_file.read()
    image = vision_v1.Image(content=content)
    image_context = vision_v1.ImageContext(language_hints=["en"])

    response = client.text_detection(image=image, image_context=image_context)
    extracted_text = response.text_annotations

    cleaned_text = ""
    for text in extracted_text:
        text_description = text.description
        if isinstance(text_description, str):
            cleaned_text += re.sub(r'[^\w\s./:]', '', text_description) + " "

    text_in_one_line = " ".join(cleaned_text.split())

    with open('TextFromAccuChekImages.txt', 'a') as f:
        f.write(text_in_one_line + "\n")

In [3]:
#Create Json file from the text file

In [5]:
import spacy
import json
json_file_path = 'TextFromAccuChekImages.json'
with open(json_file_path, "r") as f:
    data = json.load(f)

In [8]:
# data

In [9]:
training_data = []
for example in data['annotations']:
  temp_dict = {}
  temp_dict['text'] = example[0]
  temp_dict['entities'] = []
  for annotation in example[1]['entities']:
    start = annotation[0]
    end = annotation[1]
    label = annotation[2].upper()
    temp_dict['entities'].append((start, end, label))
  training_data.append(temp_dict)

In [10]:
print(training_data[20])

{'text': 'ACCUCHEK Guide Logbook 51 8:18am Fasting 10/29/20 mg/dL OK ACCU CHEK Guide Logbook 51 8:18 am Fasting 10/29/20 mg / dL OK', 'entities': [(0, 14, 'DEVICE NAME'), (23, 25, 'BG READING'), (26, 32, 'TIME'), (41, 49, 'DATE'), (50, 55, 'UNITS')]}


In [11]:
from spacy.tokens import DocBin
from tqdm import tqdm

nlp = spacy.blank("en")
doc_bin = DocBin()

In [12]:
from spacy.util import filter_spans

for training_example  in tqdm(training_data): 
    text = training_example['text']
    labels = training_example['entities']
    doc = nlp.make_doc(text) 
    ents = []
    for start, end, label in labels:
        span = doc.char_span(start, end, label=label, alignment_mode="contract")
        if span is None:
            print("Skipping entity")
        else:
            ents.append(span)
    filtered_ents = filter_spans(ents)
    doc.ents = filtered_ents 
    doc_bin.add(doc)

doc_bin.to_disk("train.spacy") 

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 43/43 [00:00<00:00, 7421.41it/s]


In [14]:
!python3 -m spacy init fill-config base_config.cfg config.cfg

[38;5;2m✔ Auto-filled config with all values[0m
[38;5;2m✔ Saved config[0m
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


In [15]:
!python3 -m spacy train config.cfg --output ./ --paths.train ./train.spacy --paths.dev ./train.spacy 

[38;5;4mℹ Saving to output directory: .[0m
[38;5;4mℹ Using CPU[0m
[1m
[38;5;2m✔ Initialized pipeline[0m
[1m
[38;5;4mℹ Pipeline: ['tok2vec', 'ner'][0m
[38;5;4mℹ Initial learn rate: 0.001[0m
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     43.55    0.00    0.00    0.00    0.00
 27     200         49.40   2000.64   96.68   96.32   97.04    0.97
 60     400         42.94    401.00   98.18   96.43  100.00    0.98
100     600        132.64    488.55   98.18   96.43  100.00    0.98
150     800        149.28    551.10   98.18   96.43  100.00    0.98
215    1000        782.41    975.44   98.18   96.43  100.00    0.98
283    1200        140.66    683.19   97.42   97.06   97.78    0.97
383    1400        115.99    917.43   98.18   96.43  100.00    0.98
483    1600         78.04    898.77   98.18   96.43  100.00    0.98
592    1800         72.27    943.29   98.18   96.43

In [22]:
import re
from google.cloud import vision_v1
import spacy

key_path = 'KEY_FILE.json'
credentials = service_account.Credentials.from_service_account_file(key_path)
client = vision.ImageAnnotatorClient(credentials=credentials)

def get_values_from_image(image_file_name):

    # Read the image file
    with open(image_file_name, 'rb') as image_file:
        content = image_file.read()

    # Create an image object
    image = vision_v1.Image(content=content)
    image_context = vision_v1.ImageContext(language_hints=["en"])

    # Perform text detection on the image
    response = client.text_detection(image=image, image_context=image_context)
    extracted_text = response.text_annotations

    cleaned_text = ""
    for text in extracted_text:
        text_description = text.description
        if isinstance(text_description, str):
            cleaned_text += re.sub(r'[^\w\s./:]', '', text_description) + " "

    one_line_text = " ".join(cleaned_text.split())

    # Initialize the spaCy NER model (assuming 'nlp_ner' is a valid NER model)
    nlp_ner = spacy.load("model-best")

    # Process the one-line text with spaCy NER
    doc = nlp_ner(one_line_text)

    # Extract entities
    readings = [ent.text for ent in doc.ents if ent.label_ == "BG READING"]
    units = [ent.text for ent in doc.ents if ent.label_ == "UNITS"]
    date = [ent.text for ent in doc.ents if ent.label_ == "DATE"]
    time = [ent.text for ent in doc.ents if ent.label_ == "TIME"]
    deviceName = [ent.text for ent in doc.ents if ent.label_ == "DEVICE NAME"]

    # Create a dictionary to store the results
    result = {
        "Reading": readings,
        "units": units,
        "date": date,
        "time": time,
        "deviceName": deviceName
    }

    return result

image_file_name = 'IMG_8431.HEIC'
result = get_values_from_image(image_file_name)
print(result)

{'Reading': [], 'units': [], 'date': [], 'time': [], 'deviceName': []}


In [None]:
#Login with user

In [None]:
#upload data into it, add timestamp as date and time to local, add systimestamp as current timestamp

In [None]:
#if no reading return 'upload image again'