# **Blood Glucose Readings from Dexcom Clarity**

Run on Python 3.13 | No errors | No warnings

In [51]:
# Import packages

# For data manipulation
import numpy as np
import pandas as pd

# for displaying and modifying the working directory
import os as os

# For working with datetime objects
from datetime import datetime

In [52]:
# Set the working directory
os.chdir(r'D:\OneDrive\Documents\Python\Current\Blood Glucose Readings\Old') # absolute path, using \ and r prefix

In [None]:
# Load the new data from Clarity
df0 = pd.read_csv("Clarity_Export_Smith_Jeffrey.csv", usecols=['Index', 'Timestamp (YYYY-MM-DDThh:mm:ss)', 'Event Type', 'Glucose Value (mg/dL)'])

In [54]:
# Display the first 10 rows of the dataframe (df0)
df0.head(10)

Unnamed: 0,Index,Timestamp (YYYY-MM-DDThh:mm:ss),Event Type,Glucose Value (mg/dL)
0,1,,FirstName,
1,2,,LastName,
2,3,,Device,
3,4,2024-11-02T00:02:01,EGV,78.0
4,5,2024-11-02T00:07:01,EGV,77.0
5,6,2024-11-02T00:12:01,EGV,78.0
6,7,2024-11-02T00:17:01,EGV,78.0
7,8,2024-11-02T00:22:01,EGV,78.0
8,9,2024-11-02T00:27:01,EGV,79.0
9,10,2024-11-02T00:32:01,EGV,79.0


In [55]:
# Display basic information about the data 
df0.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 290 entries, 0 to 289
Data columns (total 4 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Index                            290 non-null    int64  
 1   Timestamp (YYYY-MM-DDThh:mm:ss)  287 non-null    object 
 2   Event Type                       290 non-null    object 
 3   Glucose Value (mg/dL)            287 non-null    float64
dtypes: float64(1), int64(1), object(2)
memory usage: 9.2+ KB


In [None]:
# Keep only the rows where Event Type is EGV (estimated glucose value)
df0 = df0[df0['Event Type'] == 'EGV']

In [57]:
# Replace Glucose Value (mg/dL) with 40 where it is 'Low'
df0['Glucose Value (mg/dL)'] = df0['Glucose Value (mg/dL)'].replace('Low', 40)

In [58]:
# Convert Glucose Value (mg/dL) to integer
df0['Glucose Value (mg/dL)'] = df0['Glucose Value (mg/dL)'].astype(int)

In [59]:
# Convert the column to datetime format
df0['Timestamp'] = pd.to_datetime(df0['Timestamp (YYYY-MM-DDThh:mm:ss)'], format='%Y-%m-%dT%H:%M:%S')

In [60]:
# Delete the Timestamp (YYYY-MM-DDThh:mm:ss) and Event Type columns
df0 = df0.drop(['Timestamp (YYYY-MM-DDThh:mm:ss)', 'Event Type'], axis=1)

In [61]:
# Delete the Index column
df0 = df0.drop(['Index'], axis=1)

In [62]:
# Rename Glucose Value (mg/dL) to Value
df0 = df0.rename(columns={'Glucose Value (mg/dL)': 'Value'})

In [63]:
# Create a Date column from the Timestamp column using the date attribute
df0['Date'] = df0['Timestamp'].dt.date

In [64]:
# Create a column for Treatment
df0['Date'] = pd.to_datetime(df0['Date'])

conditions = [
    (df0['Date'] > '2024-06-18'),
    (df0['Date'] > '2024-01-30'),
    (df0['Date'] > '2023-07-18'),
    (df0['Date'] > '2023-02-28'),
    (df0['Date'] > '2023-01-31')
]

choices = [
    'Mounjaro 12.5',
    'Mounjaro 10',
    'Mounjaro 7.5',
    'Mounjaro 5',
    'Mounjaro 2.5'
]

df0['Treatment'] = np.select(conditions, choices, default='Untreated')

In [65]:
# Creat a column named Time to extract the time from the Timestamp column
df0['Time'] = df0['Timestamp'].dt.time

In [66]:
# Rename Timestamp to DateTime
df0 = df0.rename(columns={'Timestamp': 'DateTime'})

In [67]:
# Format Time as AM/PM
df0['Time'] = df0['Time'].apply(lambda x: x.strftime('%I:%M %p'))

In [68]:
# Make Sure Date is only the date, not date and time
df0['Date'] = df0['Date'].dt.date

In [69]:
# Drop rows where Date is current date
today = datetime.today().date()
df0 = df0[df0['Date'] < today]

In [70]:
# Reorder the columns
df0 = df0[['Date', 'Time', 'DateTime', 'Value', 'Treatment']]

In [71]:
# Add a column for Source
df0['Source'] = 'CGM'

In [72]:
# Display the min and max dates
min_date0 = df0['Date'].min()
max_date0 = df0['Date'].max()

print("New Data")
print("Min Date:", min_date0)
print("Max Date:", max_date0)

New Data
Min Date: 2024-11-02
Max Date: 2024-11-02


In [73]:
# Display the first 5 rows of the dataframe (df0)
df0.head()

Unnamed: 0,Date,Time,DateTime,Value,Treatment,Source
3,2024-11-02,12:02 AM,2024-11-02 00:02:01,78,Mounjaro 12.5,CGM
4,2024-11-02,12:07 AM,2024-11-02 00:07:01,77,Mounjaro 12.5,CGM
5,2024-11-02,12:12 AM,2024-11-02 00:12:01,78,Mounjaro 12.5,CGM
6,2024-11-02,12:17 AM,2024-11-02 00:17:01,78,Mounjaro 12.5,CGM
7,2024-11-02,12:22 AM,2024-11-02 00:22:01,78,Mounjaro 12.5,CGM


In [74]:
# Display basic information about the data 
df0.info()

<class 'pandas.core.frame.DataFrame'>
Index: 287 entries, 3 to 289
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       287 non-null    object        
 1   Time       287 non-null    object        
 2   DateTime   287 non-null    datetime64[ns]
 3   Value      287 non-null    int64         
 4   Treatment  287 non-null    object        
 5   Source     287 non-null    object        
dtypes: datetime64[ns](1), int64(1), object(4)
memory usage: 15.7+ KB


In [75]:
# Display basic statistics about the data
df0.describe()

Unnamed: 0,DateTime,Value
count,287,287.0
mean,2024-11-02 11:57:32.306620160,105.087108
min,2024-11-02 00:02:01,77.0
25%,2024-11-02 05:59:32,92.0
50%,2024-11-02 11:57:03,99.0
75%,2024-11-02 17:54:34,116.5
max,2024-11-02 23:57:04,158.0
std,,18.41317


In [76]:
# Reindex df0
df0 = df0.reset_index(drop=True)

In [77]:
# Load Clarity history to df1
df1 = pd.read_csv("Clarity History.csv")

In [78]:
# Display the first 5 rows of the dataframe (df1)
df1.head()

Unnamed: 0,Date,Time,DateTime,Value,Treatment,Source
0,2024-08-29,04:53 PM,2024-08-29 16:53:35,100,Mounjaro 12.5,CGM
1,2024-08-29,04:58 PM,2024-08-29 16:58:35,112,Mounjaro 12.5,CGM
2,2024-08-29,05:03 PM,2024-08-29 17:03:36,93,Mounjaro 12.5,CGM
3,2024-08-29,05:08 PM,2024-08-29 17:08:35,83,Mounjaro 12.5,CGM
4,2024-08-29,05:13 PM,2024-08-29 17:13:36,78,Mounjaro 12.5,CGM


In [79]:
# Display basic information about the data 
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17926 entries, 0 to 17925
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Date       17926 non-null  object
 1   Time       17926 non-null  object
 2   DateTime   17926 non-null  object
 3   Value      17926 non-null  int64 
 4   Treatment  17926 non-null  object
 5   Source     17926 non-null  object
dtypes: int64(1), object(5)
memory usage: 840.4+ KB


In [80]:
# Display the min and max dates
min_date1 = df1['Date'].min()
max_date1 = df1['Date'].max()

print("Saved Data")
print("Min Date:", min_date1)
print("Max Date:", max_date1)

Saved Data
Min Date: 2024-08-29
Max Date: 2024-11-01


In [81]:
# Concatenate df0 and df1
df2 = pd.concat([df0, df1])

In [82]:
# Reindex df2
df2 = df2.reset_index(drop=True)

In [83]:
# Display the first 5 rows of the dataframe (df2)
df2.head()

Unnamed: 0,Date,Time,DateTime,Value,Treatment,Source
0,2024-11-02,12:02 AM,2024-11-02 00:02:01,78,Mounjaro 12.5,CGM
1,2024-11-02,12:07 AM,2024-11-02 00:07:01,77,Mounjaro 12.5,CGM
2,2024-11-02,12:12 AM,2024-11-02 00:12:01,78,Mounjaro 12.5,CGM
3,2024-11-02,12:17 AM,2024-11-02 00:17:01,78,Mounjaro 12.5,CGM
4,2024-11-02,12:22 AM,2024-11-02 00:22:01,78,Mounjaro 12.5,CGM


In [84]:
# Display basic information about the data 
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18213 entries, 0 to 18212
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Date       18213 non-null  object
 1   Time       18213 non-null  object
 2   DateTime   18213 non-null  object
 3   Value      18213 non-null  int64 
 4   Treatment  18213 non-null  object
 5   Source     18213 non-null  object
dtypes: int64(1), object(5)
memory usage: 853.9+ KB


In [85]:
# Convert DateTime to datetime format
df2['DateTime'] = pd.to_datetime(df2['DateTime'])

In [86]:
# Sort df2 by DateTime
df2 = df2.sort_values(by='DateTime')

In [87]:
# Convert Date to datetime format
df2['Date'] = pd.to_datetime(df2['Date'])

In [88]:
# Find the most recent 90 days by Date in df2
most_recent_date = df2['Date'].max()
ninety_days_ago = most_recent_date - pd.DateOffset(days=90)

In [89]:
# Make Sure Date is only the date, not date and time
df2['Date'] = df2['Date'].dt.date

In [90]:
# Verify that all dates are consecutive

print("Saved Data")
print("Min Date:", min_date1)
print("Max Date:", max_date1)
print("")

print("New Data")
print("Min Date:", min_date0)
print("Max Date:", max_date0)
print("")

# Get the min and max dates
min_date2 = df2['Date'].min()
max_date2 = df2['Date'].max()

print ("Combined Data")
print("Min Date:", min_date2)
print("Max Date:", max_date2)

# Create a complete date range from min to max date
complete_date_range = pd.date_range(start=min_date2, end=max_date2, freq='D')

# Get the unique dates from your dataset
unique_dates = pd.to_datetime(df2['Date']).sort_values().unique()

# Check for missing dates using numpy set difference
missing_dates = np.setdiff1d(complete_date_range, unique_dates)

# Display missing dates, if any
if len(missing_dates) > 0:
    print("Missing dates:")
    print(missing_dates)
else:
    print("No missing dates, all dates are consecutive.")

Saved Data
Min Date: 2024-08-29
Max Date: 2024-11-01

New Data
Min Date: 2024-11-02
Max Date: 2024-11-02

Combined Data
Min Date: 2024-08-29
Max Date: 2024-11-02
No missing dates, all dates are consecutive.


In [91]:
# Verify the dataset
df2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 18213 entries, 287 to 286
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       18213 non-null  object        
 1   Time       18213 non-null  object        
 2   DateTime   18213 non-null  datetime64[ns]
 3   Value      18213 non-null  int64         
 4   Treatment  18213 non-null  object        
 5   Source     18213 non-null  object        
dtypes: datetime64[ns](1), int64(1), object(4)
memory usage: 996.0+ KB


In [92]:
# Find and count duplicate rows
duplicate_count = df2.duplicated().sum()

print(f"Number of duplicate rows: {duplicate_count}")

Number of duplicate rows: 0


In [93]:
# Delete duplicate rows
df2 = df2.drop_duplicates()

In [94]:
# Find and count duplicate rows
duplicate_count = df2.duplicated().sum()

print(f"Number of duplicate rows: {duplicate_count}")

Number of duplicate rows: 0


In [95]:
# Reindex df2
df2 = df2.reset_index(drop=True)

In [96]:
# Validate the dataset
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18213 entries, 0 to 18212
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       18213 non-null  object        
 1   Time       18213 non-null  object        
 2   DateTime   18213 non-null  datetime64[ns]
 3   Value      18213 non-null  int64         
 4   Treatment  18213 non-null  object        
 5   Source     18213 non-null  object        
dtypes: datetime64[ns](1), int64(1), object(4)
memory usage: 853.9+ KB


In [97]:
# Save updated hsitory to a csv file
df2.to_csv('Clarity History.csv', index=False)

In [98]:
# Delete rows where Date is before ninety_days_ago
df2['Date'] = pd.to_datetime(df2['Date'])
df2 = df2[df2['Date'] > ninety_days_ago]
df2['Date'] = df2['Date'].dt.date

In [99]:
# Verify the changes
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18213 entries, 0 to 18212
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       18213 non-null  object        
 1   Time       18213 non-null  object        
 2   DateTime   18213 non-null  datetime64[ns]
 3   Value      18213 non-null  int64         
 4   Treatment  18213 non-null  object        
 5   Source     18213 non-null  object        
dtypes: datetime64[ns](1), int64(1), object(4)
memory usage: 853.9+ KB


In [100]:
# Save df2 to a csv file
df2.to_csv('Clarity Readings for Analysis.csv', index=False)