In [1]:
import datetime
import numpy as np
from functools import cmp_to_key
from pprint import pprint
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
def readFile(fname):
    data = None
    with open(fname, 'r') as f:
        data = f.readlines()[1:]
        data = [tup.split(',') for tup in data]
    return data

def convInt(str):
    if str == '' or str == 'nan':
        return 0
    else:
        return int(str)
    
def compare(x, y):
    return x[1] > y[1]

def writeFile(fname, data):
    with open(fname, 'w') as f:
        for line in data:
            f.write(line)

In [4]:
ins_data = readFile('InsulinData.csv')
insulin_data = []
for r in ins_data:
    carb = r[24]
    bolus = r[19]
    date = datetime.datetime.strptime(r[1], "%m/%d/%Y")
    time = datetime.datetime.strptime(r[2], "%H:%M:%S").time()
    if carb != '' and carb != '0':
        insulin_data.append([int(float(bolus)), datetime.datetime.combine(date, time)])
insulin_data.reverse()

In [5]:
valid_insulin_data = []
i = 0
while i < len(insulin_data)-1:
    diff = insulin_data[i+1][1] - insulin_data[i][1]
    if diff < datetime.timedelta(minutes=30):
        i += 2
        continue
    elif diff >= datetime.timedelta(hours=2):
        valid_insulin_data.append(insulin_data[i])
    i += 1

In [7]:
meal_windows = []
for insulin_tuple in valid_insulin_data:
    meal_windows.append([insulin_tuple[1]-datetime.timedelta(minutes=30), insulin_tuple[1]+datetime.timedelta(hours=2)])
    
cgm_data = readFile('CGMData.csv')
meal_dataset = []
mpt = len(meal_windows)-1
i = 0
while i < len(cgm_data) and mpt >= 0:
    date = datetime.datetime.strptime(cgm_data[i][1], "%m/%d/%Y")
    time = datetime.datetime.strptime(cgm_data[i][2], "%H:%M:%S").time()
    ts = datetime.datetime.combine(date, time)
    if ts < meal_windows[mpt][1]:
        meal = [valid_insulin_data[mpt][0]]
        for _ in range(30):
            if i >= len(cgm_data):
                break
            meal.append(cgm_data[i][30])
            i += 1
        meal_dataset.append(meal)
        mpt -= 1
    else:
        i += 1

In [9]:
meal_dataset[1]

[8,
 '177',
 '194',
 '200',
 '201',
 '189',
 '162',
 '166',
 '173',
 '176',
 '169',
 '167',
 '166',
 '168',
 '180',
 '182',
 '178',
 '171',
 '160',
 '145',
 '138',
 '132',
 '120',
 '127',
 '129',
 '132',
 '123',
 '104',
 '100',
 '107',
 '106']

In [10]:
valid_meal_dataset = []
for m in meal_dataset:
    if '' not in m:
        valid_meal_dataset.append(m)

In [12]:
valid_meal_dataset[0]

[8,
 '177',
 '194',
 '200',
 '201',
 '189',
 '162',
 '166',
 '173',
 '176',
 '169',
 '167',
 '166',
 '168',
 '180',
 '182',
 '178',
 '171',
 '160',
 '145',
 '138',
 '132',
 '120',
 '127',
 '129',
 '132',
 '123',
 '104',
 '100',
 '107',
 '106']

In [21]:
bolus_data = [m[0] for m in valid_meal_dataset]
meal_dataset = [m[1:] for m in valid_meal_dataset]
bolus_data[:10]

[8, 3, 0, 5, 9, 1, 5, 3, 1, 2]

In [22]:
meal_dataset = [[convInt(cell) for cell in row] for row in meal_dataset]
meal_dataset = np.array(meal_dataset)

cgm_min = np.min(meal_dataset)
cgm_max = np.max(meal_dataset)

n_bins = int((cgm_max - cgm_min)/20)+1
bin_list = [[] for _ in range(n_bins)]
i = 0
for row in meal_dataset:
    for cell in row:
        i = int((cell - cgm_min)/20)
        bin_list[i].append(cell)

In [23]:
b_max_arr = [max(a) for a in meal_dataset]
b_max_arr = [int((tup - cgm_min)/20) for tup in b_max_arr]
print(len(b_max_arr))

b_meal_arr = [a[6] for a in meal_dataset]
b_meal_arr = [int((tup - cgm_min)/20) for tup in b_meal_arr]
print(len(b_meal_arr))

433
433


In [12]:
# bolus_data = []
# i = len(valid_insulin_data) - 1
# for r in ins_data:
#     date = datetime.datetime.strptime(r[1], "%m/%d/%Y")
#     time = datetime.datetime.strptime(r[2], "%H:%M:%S").time()
#     stamp = datetime.datetime.combine(date, time)
#     if valid_insulin_data[i] == stamp and r[24] != '' and r[24] != '0':
#         bolus_data.append(int(float(r[19])))
#         i -= 1
# print(len(bolus_data))

560


In [27]:
COUNT = len(bolus_data)
dataset = []
for i in range(COUNT):
    row = ["max_"+str(b_max_arr[i]), "meal_"+str(b_meal_arr[i]), "ins_"+str(bolus_data[i])]
    dataset.append(row)
dataset[0]

['max_8', 'meal_6', 'ins_8']

In [28]:
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
print(df)

     ins_0  ins_1  ins_10  ins_11  ins_12  ins_13  ins_2  ins_3  ins_4  ins_5  \
0    False  False   False   False   False   False  False  False  False  False   
1    False  False   False   False   False   False  False   True  False  False   
2     True  False   False   False   False   False  False  False  False  False   
3    False  False   False   False   False   False  False  False  False   True   
4    False  False   False   False   False   False  False  False  False  False   
..     ...    ...     ...     ...     ...     ...    ...    ...    ...    ...   
428  False  False   False   False   False   False  False  False  False   True   
429  False  False   False   False   False   False  False  False  False   True   
430  False  False   False   False   False   False   True  False  False  False   
431  False  False   False   False   False   False  False  False  False  False   
432  False  False   False   False   False   False  False  False  False  False   

     ...  meal_15  meal_16 

In [42]:
frequent_itemsets = apriori(df, min_support=0.004, use_colnames=True)
f_items = frequent_itemsets
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets = frequent_itemsets[(frequent_itemsets['length'] == 3)]
frequent_itemsets = frequent_itemsets[frequent_itemsets['support']==frequent_itemsets['support'].max()]
frequent_itemsets[['itemsets','support']]

Unnamed: 0,itemsets,support
290,"(meal_5, ins_1, max_7)",0.013857
299,"(max_6, meal_5, ins_2)",0.013857


In [43]:
freq_data = []
for i, r in frequent_itemsets.iterrows():
    r_str = ''
    for s in r['itemsets']:
        r_str += s[-1]
        r_str += ','
    r_str += str(r['support'])
    r_str += '\n'
    freq_data.append(r_str)
writeFile('Result_1.csv', freq_data)

In [44]:
rules = association_rules(f_items, metric="confidence", min_threshold=0.1)
rules['ante_len'] = rules['antecedents'].apply(lambda x: len(x))
rules['cons_len'] = rules['consequents'].apply(lambda x: len(x))
rules = rules[(rules['ante_len'] == 2) & (rules['cons_len'] == 1)]
rules[['antecedents', 'consequents', 'confidence']]

Unnamed: 0,antecedents,consequents,confidence
275,"(max_5, meal_4)",(ins_0),0.272727
276,"(max_5, ins_0)",(meal_4),0.750000
277,"(ins_0, meal_4)",(max_5),0.600000
279,"(max_6, meal_3)",(ins_0),0.666667
280,"(max_6, ins_0)",(meal_3),0.500000
...,...,...,...
518,"(max_9, meal_6)",(ins_8),0.200000
519,"(ins_8, meal_6)",(max_9),0.500000
520,"(meal_9, ins_9)",(max_10),0.500000
521,"(meal_9, max_10)",(ins_9),0.181818


In [45]:
def func(row):
    for e in row['antecedents']:
        if e[:-1] != 'max_' and e[:-1] != 'meal_':
            return False
    return True
rules = rules[rules.apply(func, axis=1)]
rules_1 = rules[rules['confidence']==rules['confidence'].max()]
rules_1[['antecedents','consequents','confidence']]

Unnamed: 0,antecedents,consequents,confidence
279,"(max_6, meal_3)",(ins_0),0.666667
507,"(meal_2, max_3)",(ins_7),0.666667


In [46]:
rules_1_data = []
for i, r in rules_1.iterrows():
    r_str = ''
    for s in r['antecedents']:
        r_str += s[-1]
        r_str += ','
    for s in r['consequents']:
        r_str += s[-1]
        r_str += ','
    r_str += str(r['confidence'])
    r_str += '\n'
    rules_1_data.append(r_str)
writeFile('Result_2.csv', rules_1_data)

In [47]:
rules_2 = rules[rules['confidence']<0.15]
rules_2 = rules_2.sort_values(by='confidence')
rules_2[['antecedents','consequents','confidence']]

Unnamed: 0,antecedents,consequents,confidence
283,"(max_6, meal_4)",(ins_0),0.111111
381,"(max_6, meal_4)",(ins_3),0.111111
429,"(meal_6, max_7)",(ins_4),0.111111
465,"(meal_6, max_7)",(ins_5),0.111111
510,"(max_6, meal_4)",(ins_7),0.111111
514,"(meal_6, max_7)",(ins_7),0.111111
309,"(max_8, meal_7)",(ins_1),0.117647
390,"(max_8, meal_7)",(ins_3),0.117647
438,"(max_8, meal_7)",(ins_4),0.117647
357,"(meal_8, max_9)",(ins_2),0.125


In [48]:
rules_2_data = []
for i, r in rules_2.iterrows():
    r_str = ''
    for s in r['antecedents']:
        r_str += s[-1]
        r_str += ','
    for s in r['consequents']:
        r_str += s[-1]
        r_str += ','
    r_str += str(r['confidence'])
    r_str += '\n'
    rules_2_data.append(r_str)
writeFile('Result_3.csv', rules_2_data)