# **Blood Glucose Readings from Dexcom Clarity**

Run on Python 3.13 | No errors | No warnings

In [None]:
# Import packages

# For data manipulation
import numpy as np
import pandas as pd

# for displaying and modifying the working directory
import os as os

# For working with datetime objects
from datetime import datetime

In [None]:
# Set the working directory
os.chdir(r'D:\OneDrive\Documents\Python\Current\Blood Glucose Readings\Old') # absolute path, using \ and r prefix

In [None]:
# Load the new data from Clarity
df0 = pd.read_csv("Clarity_Export_Smith_Jeffrey.csv", usecols=['Index', 'Timestamp (YYYY-MM-DDThh:mm:ss)', 'Event Type', 'Glucose Value (mg/dL)'])

In [None]:
# Display the first 10 rows of the dataframe (df0)
df0.head(10)

In [None]:
# Display basic information about the data 
df0.info()

In [None]:
# Keep only the rows where Event Type is EGV (estimated glucose value)
df0 = df0[df0['Event Type'] == 'EGV']

In [None]:
# Replace Glucose Value (mg/dL) with 40 where it is 'Low'
df0['Glucose Value (mg/dL)'] = df0['Glucose Value (mg/dL)'].replace('Low', 40)

In [None]:
# Convert Glucose Value (mg/dL) to integer
df0['Glucose Value (mg/dL)'] = df0['Glucose Value (mg/dL)'].astype(int)

In [None]:
# Rename and format Timestamp column
df0['DateTime'] = pd.to_datetime(df0.pop('Timestamp (YYYY-MM-DDThh:mm:ss)'), format='%Y-%m-%dT%H:%M:%S')

In [None]:
# Delete Event Type and Index columns
df0 = df0.drop(['Event Type', 'Index'], axis=1)

In [None]:
# Rename Glucose Value (mg/dL) to Value
df0 = df0.rename(columns={'Glucose Value (mg/dL)': 'Value'})

In [None]:
# Create a Date column from the DateTime column
df0['Date'] = df0['DateTime'].dt.date

In [None]:
# Create a Time column from the DateTime column
df0['Time'] = df0['DateTime'].dt.time

In [None]:
# Create a column for Treatment
df0['Date'] = pd.to_datetime(df0['Date'])

conditions = [
    (df0['Date'] > '2024-06-18'),
    (df0['Date'] > '2024-01-30'),
    (df0['Date'] > '2023-07-18'),
    (df0['Date'] > '2023-02-28'),
    (df0['Date'] > '2023-01-31')
]

choices = [
    'Mounjaro 12.5',
    'Mounjaro 10',
    'Mounjaro 7.5',
    'Mounjaro 5',
    'Mounjaro 2.5'
]

df0['Treatment'] = np.select(conditions, choices, default='Untreated')

In [None]:
# Format Time as AM/PM
df0['Time'] = df0['Time'].apply(lambda x: x.strftime('%I:%M %p'))

In [None]:
# Make Sure Date is only the date, not date and time
df0['Date'] = df0['Date'].dt.date

In [None]:
# Drop rows where Date is current date
today = datetime.today().date()
df0 = df0[df0['Date'] < today]

In [None]:
# Reorder the columns
df0 = df0[['Date', 'Time', 'DateTime', 'Value', 'Treatment']]

In [None]:
# Add a column for Source
df0['Source'] = 'CGM'

In [None]:
# Display the min and max dates
min_date0 = df0['Date'].min()
max_date0 = df0['Date'].max()

print("New Data")
print("Min Date:", min_date0)
print("Max Date:", max_date0)

In [None]:
# Display the first 5 rows of the dataframe (df0)
df0.head()

In [None]:
# Display basic information about the data 
df0.info()

In [None]:
# Display basic statistics about the data
df0.describe()

In [None]:
# Reindex df0
df0 = df0.reset_index(drop=True)

In [None]:
# Load Clarity history to df1
df1 = pd.read_csv("Clarity History.csv")

In [None]:
# Display the first 5 rows of the dataframe (df1)
df1.head()

In [None]:
# Display basic information about the data 
df1.info()

In [None]:
# Display the min and max dates
min_date1 = df1['Date'].min()
max_date1 = df1['Date'].max()

print("Saved Data")
print("Min Date:", min_date1)
print("Max Date:", max_date1)

In [None]:
# Concatenate df0 and df1
df2 = pd.concat([df0, df1])

In [None]:
# Reindex df2
df2 = df2.reset_index(drop=True)

In [None]:
# Display the first 5 rows of the dataframe (df2)
df2.head()

In [None]:
# Display basic information about the data 
df2.info()

In [None]:
# Convert DateTime to datetime format
df2['DateTime'] = pd.to_datetime(df2['DateTime'])

In [None]:
# Sort df2 by DateTime
df2 = df2.sort_values(by='DateTime')

In [None]:
# Convert Date to datetime format
df2['Date'] = pd.to_datetime(df2['Date'])

In [None]:
# Find the most recent 90 days by Date in df2
most_recent_date = df2['Date'].max()
ninety_days_ago = most_recent_date - pd.DateOffset(days=90)

In [None]:
# Make Sure Date is only the date, not date and time
df2['Date'] = df2['Date'].dt.date

In [None]:
# Verify that all dates are consecutive

print("Saved Data")
print("Min Date:", min_date1)
print("Max Date:", max_date1)
print("")

print("New Data")
print("Min Date:", min_date0)
print("Max Date:", max_date0)
print("")

# Get the min and max dates
min_date2 = df2['Date'].min()
max_date2 = df2['Date'].max()

print ("Combined Data")
print("Min Date:", min_date2)
print("Max Date:", max_date2)

# Create a complete date range from min to max date
complete_date_range = pd.date_range(start=min_date2, end=max_date2, freq='D')

# Get the unique dates from your dataset
unique_dates = pd.to_datetime(df2['Date']).sort_values().unique()

# Check for missing dates using numpy set difference
missing_dates = np.setdiff1d(complete_date_range, unique_dates)

# Display missing dates, if any
if len(missing_dates) > 0:
    print("Missing dates:")
    print(missing_dates)
else:
    print("No missing dates, all dates are consecutive.")

In [None]:
# Verify the dataset
df2.info()

In [None]:
# Find and count duplicate rows
duplicate_count = df2.duplicated().sum()

print(f"Number of duplicate rows: {duplicate_count}")

In [None]:
# Delete duplicate rows
df2 = df2.drop_duplicates()

In [None]:
# Find and count duplicate rows
duplicate_count = df2.duplicated().sum()

print(f"Number of duplicate rows: {duplicate_count}")

In [None]:
# Reindex df2
df2 = df2.reset_index(drop=True)

In [None]:
# Validate the dataset
df2.info()

In [None]:
# Save updated hsitory to a csv file
df2.to_csv('Clarity History.csv', index=False)

In [None]:
# Delete rows where Date is before ninety_days_ago
df2['Date'] = pd.to_datetime(df2['Date'])
df2 = df2[df2['Date'] > ninety_days_ago]
df2['Date'] = df2['Date'].dt.date

In [None]:
# Verify the changes
df2.info()

In [None]:
# Save df2 to a csv file
df2.to_csv('Clarity Readings for Analysis.csv', index=False)