In [None]:
!pip install tslearn

Collecting tslearn
  Downloading tslearn-0.6.3-py3-none-any.whl.metadata (14 kB)
Downloading tslearn-0.6.3-py3-none-any.whl (374 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.4/374.4 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tslearn
Successfully installed tslearn-0.6.3


In [None]:
import pandas as pd
import glob
from google.colab import drive
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tslearn.metrics import cdist_dtw
from tslearn.clustering import TimeSeriesKMeans
from sklearn.metrics import silhouette_score

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
rhea_path = '/content/drive/MyDrive/FinalProject/Dexcom Data/'
# lauren_path = '/content/gdrive/MyDrive/STAT5243/FinalProject/Dexcom Data/'

csv_files = glob.glob(rhea_path + '*.csv')

df_list = []

for file in csv_files:
    df = pd.read_csv(file, skiprows=range(1, 11))
    df_list.append(df)

dexcom_df = pd.concat(df_list, ignore_index=True)

dexcom_df.shape

(339429, 14)

In [None]:
dexcom_df.columns

Index(['Index', 'Timestamp (YYYY-MM-DDThh:mm:ss)', 'Event Type',
       'Event Subtype', 'Patient Info', 'Device Info', 'Source Device ID',
       'Glucose Value (mg/dL)', 'Insulin Value (u)', 'Carb Value (grams)',
       'Duration (hh:mm:ss)', 'Glucose Rate of Change (mg/dL/min)',
       'Transmitter Time (Long Integer)', 'Transmitter ID'],
      dtype='object')

In [None]:
timeseries_data = dexcom_df[['Glucose Value (mg/dL)']]

In [None]:
dexcom_df.loc[dexcom_df['Glucose Value (mg/dL)'] == 'Low', 'Glucose Value (mg/dL)'] = 39
dexcom_df.loc[dexcom_df['Glucose Value (mg/dL)'] == 'High', 'Glucose Value (mg/dL)'] = 401

In [None]:
dexcom_df['Timestamp'] = pd.to_datetime(dexcom_df['Timestamp (YYYY-MM-DDThh:mm:ss)'])
dexcom_df.head()

Unnamed: 0,Index,Timestamp (YYYY-MM-DDThh:mm:ss),Event Type,Event Subtype,Patient Info,Device Info,Source Device ID,Glucose Value (mg/dL),Insulin Value (u),Carb Value (grams),Duration (hh:mm:ss),Glucose Rate of Change (mg/dL/min),Transmitter Time (Long Integer),Transmitter ID,Timestamp
0,11,2024-12-03T00:02:40,EGV,,,,iOS G6,233,,,,,7441274.0,897B4L,2024-12-03 00:02:40
1,12,2024-12-03T00:07:41,EGV,,,,iOS G6,226,,,,,7441574.0,897B4L,2024-12-03 00:07:41
2,13,2024-12-03T00:12:41,EGV,,,,iOS G6,224,,,,,7441874.0,897B4L,2024-12-03 00:12:41
3,14,2024-12-03T00:17:41,EGV,,,,iOS G6,210,,,,,7442174.0,897B4L,2024-12-03 00:17:41
4,15,2024-12-03T00:22:40,EGV,,,,iOS G6,185,,,,,7442474.0,897B4L,2024-12-03 00:22:40


In [None]:
# Group by week using pd.Grouper (by default, groups by end-of-week, usually Sunday)
weekly_groups = dexcom_df.groupby(pd.Grouper(key='Timestamp', freq='W'))

# Create a list of weekly time series for the glucose values.
# Each element in weekly_series is a numpy array representing one week's data.
weekly_series = [group['Glucose Value (mg/dL)'].values
                 for _, group in weekly_groups if not group.empty]

In [None]:
max_length = max(len(series) for series in weekly_series)

padded_series = []
for series in weekly_series:
    padded = np.pad(series, (0, max_length - len(series)), mode='edge')
    padded_series.append(padded)

data_array = np.array(padded_series)
data_array = data_array[..., np.newaxis]  # Add a feature dimension

print("Shape of data_array:", data_array.shape)

Shape of data_array: (172, 4008, 1)


In [None]:
normalized_data = []
scaler = MinMaxScaler()
for series in data_array:
    normalized_series = scaler.fit_transform(series)
    normalized_data.append(normalized_series)
normalized_data = np.array(normalized_data)

# Compute the DTW distance matrix
distance_matrix = cdist_dtw(normalized_data)

# K-Means clustering with DTW as the metric
kmeans = TimeSeriesKMeans(n_clusters=2, metric="dtw", random_state=0)
clusters = kmeans.fit_predict(normalized_data)

# Evaluate clusters using the silhouette score
score = silhouette_score(distance_matrix, clusters, metric="precomputed")
print(f'Silhouette Score: {score}')

# Plot an example time series (first weekly series) for visualization
plt.plot(normalized_data[0].ravel())
plt.title('Example Weekly Time Series Data')
plt.xlabel('Time Step')
plt.ylabel('Normalized Glucose Value')
plt.show()

KeyboardInterrupt: 

In [None]:
# Compute DTW distance matrix
distance_matrix = cdist_dtw(normalized_data)

In [None]:
# K-Means clustering with DTW as the metric
kmeans = TimeSeriesKMeans(n_clusters=2, metric="dtw")
clusters = kmeans.fit_predict(normalized_data)

# Evaluate clusters using silhouette score with precomputed distance matrix
score = silhouette_score(distance_matrix, clusters, metric="precomputed")
print(f'Silhouette Score: {score}')