In [None]:
!pip install pytesseract
!pip install fastapi nest-asyncio pyngrok uvicorn
!pip install python-multipart


Collecting python-multipart
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Downloading python_multipart-0.0.20-py3-none-any.whl (24 kB)
Installing collected packages: python-multipart
Successfully installed python-multipart-0.0.20


In [None]:
import cv2
import pytesseract
from pytesseract import Output
import re

def preprocess_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    denoised = cv2.fastNlMeansDenoising(gray, h=10)
    threshold = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    return threshold

def extract_text_from_image(image):
    custom_config = r'--oem 3 --psm 6'
    details = pytesseract.image_to_data(image, output_type=Output.DICT, config=custom_config, lang='eng')
    return details

In [None]:
def parse_lab_results(ocr_data):
    tests = []
    current_test = {}

    # Join all text lines while preserving position information
    combined_text = []
    for i in range(len(ocr_data['text'])):
        if int(ocr_data['conf'][i]) > 60:  # Only consider confident detections
            combined_text.append({
                'text': ocr_data['text'][i],
                'left': ocr_data['left'][i],
                'top': ocr_data['top'][i],
                'width': ocr_data['width'][i],
                'height': ocr_data['height'][i]
            })

    # Sort by vertical position then horizontal
    combined_text.sort(key=lambda x: (x['top'], x['left']))

    # Group into lines based on vertical position
    lines = []
    current_line = []
    last_top = None

    for item in combined_text:
        if last_top is None or abs(item['top'] - last_top) < item['height']/2:
            current_line.append(item)
        else:
            lines.append(current_line)
            current_line = [item]
        last_top = item['top']

    if current_line:
        lines.append(current_line)

    for line in lines:
        line_text = ' '.join([item['text'] for item in line]).strip()

        if re.match(r'^[A-Z][A-Za-z\s]+$', line_text) and len(line_text.split()) < 5:
            if current_test:
                tests.append(current_test)
            current_test = {'test_name': line_text}

        elif current_test:
            value_match = re.match(r'([\d.,]+)\s*([a-zA-Z/%]*)', line_text)
            if value_match and 'value' not in current_test:
                current_test['value'] = value_match.group(1).replace(',', '.')
                current_test['unit'] = value_match.group(2)

            range_match = re.search(r'([\d.,]+)\s*[-–]\s*([\d.,]+)', line_text)
            if range_match and 'bio_reference_range' not in current_test:
                current_test['bio_reference_range'] = f"{range_match.group(1)}-{range_match.group(2)}"

    if current_test:
        tests.append(current_test)

    return tests

In [None]:
def check_out_of_range(tests):
    for test in tests:
        if 'value' in test and 'bio_reference_range' in test:
            try:
                value = float(test['value'])
                range_parts = test['bio_reference_range'].split('-')
                if len(range_parts) == 2:
                    low = float(range_parts[0])
                    high = float(range_parts[1])
                    test['lab_test_out_of_range'] = not (low <= value <= high)
            except (ValueError, IndexError):
                test['lab_test_out_of_range'] = False
        else:
            test['lab_test_out_of_range'] = False
    return tests

In [38]:
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse
import logging

import numpy as np
from PIL import Image
import io

app = FastAPI()
# app.add_middleware(
#     CORSMiddleware,
#     allow_origins=['*'],
#     allow_credentials=True,
#     allow_methods=['*'],
#     allow_headers=['*'],
# )


logger = logging.getLogger('uvicorn.error')
logger.setLevel(logging.DEBUG)

@app.post("/hello")
async def get_lab_tests(file: UploadFile = File(...)):
    print("I am here!")

@app.post("/get-lab-tests")
async def get_lab_tests(file: UploadFile = File(...)):
    # logger.debug(f"File Name: {file.filename}")
    # logger.debug(f"File Size: {len(content)}")
    # logger.debug(f"File MIME Type: {file.content_type}")
    print("Hello", file.filename)
    try:
        # Read and process image
        contents = await file.read()
        pil_image = Image.open(io.BytesIO(contents))
        image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
        processed_image = preprocess_image(image)

        # Extract text and parse results
        ocr_data = extract_text_from_image(processed_image)
        tests = parse_lab_results(ocr_data)
        tests = check_out_of_range(tests)

        return JSONResponse({
            "success": True,
            "data": tests
        })

    except Exception as e:
        return JSONResponse({
            "success": False,
            "error": str(e)
        }, status_code=500)

ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-127' coro=<Server.serve() done, defined at /usr/local/lib/python3.11/dist-packages/uvicorn/server.py:68> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/main.py", line 580, in run
    server.run()
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/server.py", line 66, in run
    return asyncio.run(self.serve(sockets=sockets))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 92, in run_until_complete
    self._run_once()
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 133, in _run_once
    handle._run()
  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run
    

In [None]:
import nest_asyncio
from pyngrok import ngrok
import uvicorn

# Get your authtoken from https://dashboard.ngrok.com/get-started/your-authtoken
auth_token = "2wOGF9THqCezKymAwnZsoFLlCrH_6FjJxBtxuvqNeKz7EZHWH"

# Set the authtoken
ngrok.set_auth_token(auth_token)

# Connect to ngrok
ngrok_tunnel = ngrok.connect(8000)

# Print the public URL
print('Public URL:', ngrok_tunnel.public_url)

# Apply nest_asyncio
nest_asyncio.apply()

# Run the uvicorn server
uvicorn.run(app, port=8000)

Public URL: https://7069-35-238-195-223.ngrok-free.app


INFO:     Started server process [329]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     98.45.171.8:0 - "POST /get-lab-tests HTTP/1.1" 400 Bad Request
INFO:     98.45.171.8:0 - "POST /get-lab-tests HTTP/1.1" 400 Bad Request
