In [8]:
input_dir = "c:\\dev\\play\\bloodpressure-data"

In [9]:
import os
import hashlib
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive

def compute_file_hash(file_path):
    """Compute the SHA256 hash of the file."""
    sha256_hash = hashlib.sha256()
    with open(file_path, "rb") as f:
        for byte_block in iter(lambda: f.read(4096), b""):
            sha256_hash.update(byte_block)
    return sha256_hash.hexdigest()

def download_file(file, destination):
    """Download the file from Google Drive to the destination."""
    file.GetContentFile(destination)
    print(f"Downloaded: {file['title']}")

# Authenticate and create the PyDrive client
gauth = GoogleAuth()
gauth.LoadClientConfigFile('C:\\Users\\graha\\client_secret_468958631927-kd7vtqnup9ma5l1iq22469aht4b88oqk.apps.googleusercontent.com.json')

# Try to load saved client credentials
gauth.LoadCredentialsFile('credentials.json')

if gauth.credentials is None:
    # Authenticate if they're not there
    gauth.LocalWebserverAuth()
    # Save the current credentials to a file
    gauth.SaveCredentialsFile('credentials.json')
elif gauth.access_token_expired:
    # Refresh them if expired
    gauth.Refresh()
else:
    # Initialize the saved creds
    gauth.Authorize()

drive = GoogleDrive(gauth)


In [10]:
from tqdm import tqdm as tdqm

# Find the Aktiia folder
folder_name = 'Aktiia'
file_list = drive.ListFile({'q': f"title='{folder_name}' and mimeType='application/vnd.google-apps.folder' and trashed=false"}).GetList()

if not file_list:
    print(f"Folder '{folder_name}' not found.")
else:
    folder_id = file_list[0]['id']
    # List all files in the Aktiia folder
    file_list = drive.ListFile({'q': f"'{folder_id}' in parents and trashed=false"}).GetList()

    for file in tdqm(file_list, desc='Checking files', total=len(file_list)):
        file_path = os.path.join(input_dir, file['title'])
        if os.path.exists(file_path):
            local_file_hash = compute_file_hash(file_path)
            file.GetContentFile('temp_file')
            remote_file_hash = compute_file_hash('temp_file')
            os.remove('temp_file')
            if local_file_hash != remote_file_hash:
                print(f"File '{file['title']}' has changed. Downloading the new version.")
                download_file(file, file_path)
            else:
                print(f"File '{file['title']}' has not changed.")
        else:
            print(f"File '{file['title']}' does not exist. Downloading.")
            download_file(file, file_path)

Checking files:   0%|          | 0/9 [00:00<?, ?it/s]

File 'AktiiaReport_GP_Jan2025.pdf' does not exist. Downloading.


Checking files:  11%|█         | 1/9 [00:03<00:31,  3.90s/it]

Downloaded: AktiiaReport_GP_Jan2025.pdf


Checking files:  22%|██▏       | 2/9 [00:07<00:27,  3.91s/it]

File 'AktiiaReport_GP_Dec2024.pdf' has changed. Downloading the new version.
Downloaded: AktiiaReport_GP_Dec2024.pdf


Checking files:  33%|███▎      | 3/9 [00:14<00:30,  5.10s/it]

File 'AktiiaReport_GP_Nov2024.pdf' has changed. Downloading the new version.
Downloaded: AktiiaReport_GP_Nov2024.pdf


Checking files:  44%|████▍     | 4/9 [00:21<00:28,  5.74s/it]

File 'AktiiaReport_GP_Dec2024.pdf' has changed. Downloading the new version.
Downloaded: AktiiaReport_GP_Dec2024.pdf


Checking files:  56%|█████▌    | 5/9 [00:27<00:24,  6.08s/it]

File 'AktiiaReport_GP_Dec2024.pdf' has changed. Downloading the new version.
Downloaded: AktiiaReport_GP_Dec2024.pdf


Checking files:  67%|██████▋   | 6/9 [00:31<00:15,  5.14s/it]

File 'AktiiaReport_GP_Nov2024.pdf' has changed. Downloading the new version.
Downloaded: AktiiaReport_GP_Nov2024.pdf


Checking files:  78%|███████▊  | 7/9 [00:35<00:09,  4.96s/it]

File 'AktiiaReport_GP_Nov2024.pdf' has changed. Downloading the new version.
Downloaded: AktiiaReport_GP_Nov2024.pdf


Checking files:  89%|████████▉ | 8/9 [00:37<00:04,  4.11s/it]

File 'AktiiaReport_GP_Oct2024.pdf' has not changed.


Checking files: 100%|██████████| 9/9 [00:39<00:00,  4.44s/it]

File 'AktiiaReport_GP_Nov2024.pdf' has changed. Downloading the new version.
Downloaded: AktiiaReport_GP_Nov2024.pdf





In [11]:
import os
import fitz  # PyMuPDF
import re
from datetime import datetime
import pytz

class BloodPressureReport:
    def __init__(self, input_file):
        self.input_file = input_file
        self.pattern = re.compile(r'(\d{1,2} \w+, \d{2})\n(\d{2}:\d{2})\n(\d+)\n(\d+)\n(\d+)')
        self.uk_timezone = pytz.timezone('Europe/London')

    def parse_pdf(self):
        pdf_document = fitz.open(self.input_file)
        records = []

        for page_num in range(1, len(pdf_document) - 1):
            page = pdf_document.load_page(page_num)
            text = page.get_text()
            matches = self.pattern.findall(text)

            for match in matches:
                date_str, time_str, systolic, diastolic, heart_rate = match
                date_time_str = f"{date_str} {time_str}"
                date_time_obj = datetime.strptime(date_time_str, "%d %B, %y %H:%M")
                date_time_uk = self.uk_timezone.localize(date_time_obj)
                record = {
                    'datetime': date_time_uk,
                    'systolic': int(systolic),
                    'diastolic': int(diastolic),
                    'heart_rate': int(heart_rate)
                }
                records.append(record)

        return records

all = []

# Loop over each file in the directory
for filename in os.listdir(input_dir):
    if filename.endswith('.pdf'):
        input_file = os.path.join(input_dir, filename)
        report = BloodPressureReport(input_file)
        records = report.parse_pdf()
        all.extend(records)

        # for record in records:
        #     print(record)

In [12]:
from sleep_events import connect_to_firebase

db = connect_to_firebase()

In [13]:
collection_ref = db.collection('bloodPressure')

In [14]:

all_sorted = sorted(all, key=lambda record: record['datetime'])
collection_ref.document('aktiia').set({'records': all_sorted})

update_time {
  seconds: 1736588840
  nanos: 344661000
}