# **BGM CGM Alignment**

Compares fingerstick (BGM) readings with the nearest continuous glucose monitor (CGM) values within a 15-minute window and calculates the percent difference to measure alignment. Uses pre-processed BGM and CGM datasets and exports the results to CSV for Tableau visualization.

Run on Python 3.14 | No errors | No warnings

In [1]:
# Import packages

# For data manipulation
import numpy as np
import pandas as pd

# for displaying and modifying the working directory
import os as os

# For working with datetime objects
from datetime import datetime

In [2]:
# Load BGM dataset
os.chdir(r'D:\OneDrive\Documents\Python\Current\Blood Glucose Readings\BGM') # absolute path, using \ and r prefix
df0 = pd.read_csv("Blood Glucose Readings for Analysis.csv", usecols=['Date', 'Time', 'DateTime', 'Value'])

In [3]:
# Load CGM dataset
os.chdir(r'D:\OneDrive\Documents\Python\Current\Blood Glucose Readings\CGM') # absolute path, using \ and r prefix
df1 = pd.read_csv("Clarity Readings for Analysis.csv", usecols=['Date', 'Time', 'DateTime', 'Value'])

In [4]:
# Set the working directory
os.chdir(r'D:\OneDrive\Documents\Python\Current\Blood Glucose Readings\BGM CGM Range') # absolute path, using \ and r prefix

In [5]:
# Convert the 'DateTime' columns to datetime format
df0['DateTime'] = pd.to_datetime(df0['DateTime'])
df1['DateTime'] = pd.to_datetime(df1['DateTime'])

# Define a function to find CGM readings within 15 minutes before and after each BGM reading
def find_cgm_within_time_window(bgm_time, df1, time_window_minutes=15):
    # Define the time window
    start_time = bgm_time - pd.Timedelta(minutes=time_window_minutes)
    end_time = bgm_time + pd.Timedelta(minutes=time_window_minutes)
    
    # Filter CGM readings that fall within the time window
    filtered_cgm = df1[(df1['DateTime'] >= start_time) & (df1['DateTime'] <= end_time)]
    return filtered_cgm

# Apply this function to each BGM reading and store the results
bgm_cgm_matches = []

for _, row in df0.iterrows():
    matching_cgm = find_cgm_within_time_window(row['DateTime'], df1)
    if not matching_cgm.empty:
        # For each BGM reading, store the matched CGM readings
        for _, cgm_row in matching_cgm.iterrows():
            bgm_cgm_matches.append({
                'BGM DateTime': row['DateTime'],
                'BGM Value': row['Value'],
                'CGM DateTime': cgm_row['DateTime'],
                'CGM Value': cgm_row['Value']
            })

# Convert the results into a DataFrame
df2 = pd.DataFrame(bgm_cgm_matches)

In [6]:
# Display the first 5 rows of the combined DataFrame
df2.head()

Unnamed: 0,BGM DateTime,BGM Value,CGM DateTime,CGM Value
0,2025-09-18 06:18:00,89,2025-09-18 06:07:41,111
1,2025-09-18 06:18:00,89,2025-09-18 06:12:42,115
2,2025-09-18 06:18:00,89,2025-09-18 06:17:42,116
3,2025-09-18 06:18:00,89,2025-09-18 06:22:41,118
4,2025-09-18 06:18:00,89,2025-09-18 06:27:41,115


In [7]:
# Display basic information about the combined DataFrame
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1092 entries, 0 to 1091
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   BGM DateTime  1092 non-null   datetime64[ns]
 1   BGM Value     1092 non-null   int64         
 2   CGM DateTime  1092 non-null   datetime64[ns]
 3   CGM Value     1092 non-null   int64         
dtypes: datetime64[ns](2), int64(2)
memory usage: 34.3 KB


In [8]:
# Remove the seconds from the DateTime columns
df2['BGM DateTime'] = df2['BGM DateTime'].dt.floor('min')
df2['CGM DateTime'] = df2['CGM DateTime'].dt.floor('min')

In [9]:
# Delete rows prior to 9/1/2024 (CGM data start date)
df2 = df2[df2['BGM DateTime'] > '2024-08-30']

In [10]:
# Add a Date column to df2 that is the date of the BGM DateTime
df2['Date'] = df2['BGM DateTime'].dt.date

In [11]:
# Add a column for difference
df2['Difference'] = df2['CGM Value'] - df2['BGM Value']

In [12]:
# Add a column for the absolute value of the difference
df2['Abs Difference'] = df2['Difference'].abs()

In [13]:
# Verify the changes
df2.head()

Unnamed: 0,BGM DateTime,BGM Value,CGM DateTime,CGM Value,Date,Difference,Abs Difference
0,2025-09-18 06:18:00,89,2025-09-18 06:07:00,111,2025-09-18,22,22
1,2025-09-18 06:18:00,89,2025-09-18 06:12:00,115,2025-09-18,26,26
2,2025-09-18 06:18:00,89,2025-09-18 06:17:00,116,2025-09-18,27,27
3,2025-09-18 06:18:00,89,2025-09-18 06:22:00,118,2025-09-18,29,29
4,2025-09-18 06:18:00,89,2025-09-18 06:27:00,115,2025-09-18,26,26


In [14]:
# Find the minimum Abs Difference for each BGM DateTime
df3 = df2.loc[df2.groupby('BGM DateTime')['Abs Difference'].idxmin()]

In [15]:
# Range is Difference / BGM Value
df3['Range'] = df3['Difference'] / df3['BGM Value']
# Round the Range to 2 decimal places
df3['Range'] = df3['Range'].round(2)

In [16]:
# Calculate the Range Within using a min value of 0.10
range_abs = np.maximum(np.ceil(np.abs(df3['Range']) * 10) / 10, 0.10)
df3['Range Within'] = np.where(df3['Range'] == 0, 0.10, np.sign(df3['Range']) * range_abs)

In [17]:
# Load location rules
df4 = pd.read_excel('CGM Locations.xlsx', sheet_name='Locations')

In [18]:
# Convert 'Date' column to datetime
df4['Date'] = pd.to_datetime(df4['Date'])

In [19]:
# Convert Date and Time columns in df4 to a single datetime column
df4['DateTime'] = pd.to_datetime(df4['Date'].astype(str) + " " + df4['Time'].astype(str))

In [20]:
# Sort both DataFrames by DateTime
df4 = df4.sort_values('DateTime')
df3 = df3.sort_values('BGM DateTime')

In [21]:
# Perform an asof merge to assign the most recent CGM location
df5 = pd.merge_asof(
    df3,
    df4[['DateTime', 'Location']],
    left_on='BGM DateTime',
    right_on='DateTime',
    direction='backward'
)

In [22]:
# Show the last few rows of df5 to inspect the most recent data
df5.tail(10)

Unnamed: 0,BGM DateTime,BGM Value,CGM DateTime,CGM Value,Date,Difference,Abs Difference,Range,Range Within,DateTime,Location
172,2025-12-12 06:16:00,95,2025-12-12 06:08:00,100,2025-12-12,5,5,0.05,0.1,2025-12-03 13:30:00,Right - Bottom Third
173,2025-12-12 19:08:00,101,2025-12-12 18:53:00,114,2025-12-12,13,13,0.13,0.2,2025-12-03 13:30:00,Right - Bottom Third
174,2025-12-13 07:11:00,93,2025-12-13 07:03:00,98,2025-12-13,5,5,0.05,0.1,2025-12-03 13:30:00,Right - Bottom Third
175,2025-12-13 19:08:00,121,2025-12-13 19:17:00,128,2025-12-13,7,7,0.06,0.1,2025-12-03 13:30:00,Right - Bottom Third
176,2025-12-14 07:45:00,109,2025-12-14 07:47:00,123,2025-12-14,14,14,0.13,0.2,2025-12-03 13:30:00,Right - Bottom Third
177,2025-12-14 19:07:00,105,2025-12-14 19:17:00,118,2025-12-14,13,13,0.12,0.2,2025-12-03 13:30:00,Right - Bottom Third
178,2025-12-15 06:14:00,92,2025-12-15 06:07:00,101,2025-12-15,9,9,0.1,0.1,2025-12-03 13:30:00,Right - Bottom Third
179,2025-12-15 19:03:00,104,2025-12-15 19:12:00,111,2025-12-15,7,7,0.07,0.1,2025-12-03 13:30:00,Right - Bottom Third
180,2025-12-16 06:14:00,92,2025-12-16 06:07:00,99,2025-12-16,7,7,0.08,0.1,2025-12-03 13:30:00,Right - Bottom Third
181,2025-12-16 19:01:00,98,2025-12-16 19:12:00,102,2025-12-16,4,4,0.04,0.1,2025-12-03 13:30:00,Right - Bottom Third


In [23]:
# Create a column for AM/PM based on the BGM DateTime
df5['AM/PM'] = df5['BGM DateTime'].dt.strftime('%p')    

In [24]:
# Order the columns in df5
df5 = df5[['Date', 'AM/PM', 'BGM DateTime', 'BGM Value', 'CGM DateTime', 'CGM Value', 'Difference', 'Abs Difference', 'Range', 'Range Within', 'Location']]

In [25]:
# Replace NaN values in 'Location' with 'Unknown'
df5['Location'] = df5['Location'].fillna('Unknown')

In [26]:
# Display the first 5 rows of df5
df5.head()

Unnamed: 0,Date,AM/PM,BGM DateTime,BGM Value,CGM DateTime,CGM Value,Difference,Abs Difference,Range,Range Within,Location
0,2025-09-18,AM,2025-09-18 06:18:00,89,2025-09-18 06:07:00,111,22,22,0.25,0.3,Right - Bottom Third
1,2025-09-18,PM,2025-09-18 19:02:00,109,2025-09-18 19:12:00,117,8,8,0.07,0.1,Right - Bottom Third
2,2025-09-19,AM,2025-09-19 06:17:00,92,2025-09-19 06:07:00,106,14,14,0.15,0.2,Right - Bottom Third
3,2025-09-19,PM,2025-09-19 19:02:00,123,2025-09-19 18:50:00,140,17,17,0.14,0.2,Right - Middle Third
4,2025-09-20,AM,2025-09-20 07:16:00,99,2025-09-20 07:10:00,99,0,0,0.0,0.1,Right - Middle Third


In [27]:
# Display basic information about df5
df5.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 182 entries, 0 to 181
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Date            182 non-null    object        
 1   AM/PM           182 non-null    object        
 2   BGM DateTime    182 non-null    datetime64[ns]
 3   BGM Value       182 non-null    int64         
 4   CGM DateTime    182 non-null    datetime64[ns]
 5   CGM Value       182 non-null    int64         
 6   Difference      182 non-null    int64         
 7   Abs Difference  182 non-null    int64         
 8   Range           182 non-null    float64       
 9   Range Within    182 non-null    float64       
 10  Location        182 non-null    object        
dtypes: datetime64[ns](2), float64(2), int64(4), object(3)
memory usage: 15.8+ KB


In [28]:
# Save df5 to CSV
df5.to_csv('BGM CGM Range.csv', index=False)