In [1]:
# Declaration

import pandas as pd
import numpy as np
from enum import Enum
from enum import IntEnum

## [Pattern]

### Goal 1 : Livespan after first medication per drugs

In [67]:
# Goal 1 : livespan after first medication

data = pd.read_csv('diabetes/p_m.csv')
data = data[data['DEAD'] == 1]
data = data[data['AGEMEDICATION'] == data['AGEFIRSTMEDICATION']].reset_index(drop=True)

In [68]:
data_cnt = len(data)

metformin = data[data['METFORMIN'] == 1].describe()['DAYSLIVEFIRSTMED']
avg_metformin = metformin['mean']

insulin = data[data['INSULIN'] == 1].describe()['DAYSLIVEFIRSTMED']
avg_insulin = insulin['mean']

glp1 = data[data['GLP1'] == 1].describe()['DAYSLIVEFIRSTMED']
avg_glp1 = glp1.describe()['mean']

sglt2 = data[data['SGLT2'] == 1].describe()['DAYSLIVEFIRSTMED']
avg_sglt2 = sglt2.describe()['mean']


In [69]:
print('Live span after first medication')
print('Num of samples :', data_cnt)

print('Metformin')
print(' - used :', int(metformin['count']), 'times (', '%0.2f' % ((metformin['count']/data_cnt)*100), '%)')
print(' - average livespan : ', '%0.2f' % metformin['mean'], 'days / ', '%0.2f' % (metformin['mean']/30), ' months / ', '%0.2f' % (metformin['mean']/365), ' years')

print('Insulin')
print(' - used :', int(insulin['count']) ,'times (', '%0.2f' % ((insulin['count']/data_cnt)*100), '%)')
print(' - average livespan : ', '%0.2f' % insulin['mean'], 'days / ', '%0.2f' % (insulin['mean']/30), ' months / ', '%0.2f' % (insulin['mean']/365), ' years')

print('GLP1')
print(' - used :', int(glp1['count']) , 'times (', '%0.2f' % ((glp1['count']/data_cnt)*100), '%)')
print(' - average livespan : ', '%0.2f' % glp1['mean'], 'days / ', '%0.2f' % (glp1['mean']/30), ' months / ', '%0.2f' % (glp1['mean']/365), ' years')

print('SGLT2')
print(' - used :', int(sglt2['count']), 'times (', '%0.2f' % ((sglt2['count']/data_cnt)*100), '%)')
print(' - average livespan : ', '%0.2f' % sglt2['mean'], 'days / ', '%0.2f' % (sglt2['mean']/30), ' months / ', '%0.2f' % (sglt2['mean']/365), ' years')

Live span after first medication
Num of samples : 4023
Metformin
 - used : 3701 times ( 92.00 %)
 - average livespan :  8230.72 days /  274.36  months /  22.55  years
Insulin
 - used : 352 times ( 8.75 %)
 - average livespan :  7693.61 days /  256.45  months /  21.08  years
GLP1
 - used : 249 times ( 6.19 %)
 - average livespan :  4755.20 days /  158.51  months /  13.03  years
SGLT2
 - used : 2 times ( 0.05 %)
 - average livespan :  2655.00 days /  88.50  months /  7.27  years


## [Patern]

### Goal 2 : Drugs based on HBA1C

In [71]:
data = pd.read_csv("diabetes/m_o.csv")
data = data[~data["HBA1C"].isna()].reset_index(drop=True)
data.head(1)

Unnamed: 0,PATIENT,ENCOUNTER,MEDSTART,INSULIN,METFORMIN,GLP1,SGLT2,OBSDATE,CODE,HBA1C
0,002abc25-f52d-48a3-91b1-9d5f9480184e,3307b95e-c15b-43a3-9d9a-20b49b0fa5ad,2016-04-29,1,0,0,0,2016-04-29,4548-4,7.0


In [72]:
data_cnt = len(data)

metformin = data[data['METFORMIN'] == 1].describe()["HBA1C"]
avg_metformin = metformin['mean']

insulin = data[data['INSULIN'] == 1].describe()["HBA1C"]
avg_insulin = insulin['mean']

glp1 = data[data['GLP1'] == 1].describe()["HBA1C"]
avg_glp1 = glp1.describe()['mean']

sglt2 = data[data['SGLT2'] == 1].describe()["HBA1C"]
avg_sglt2 = sglt2.describe()['mean']

In [73]:
print('HBA1C average levels at the time of medication')
print('Num of samples :', data_cnt)

print('Metformin : ', '%0.2f' % metformin['mean'])
print('Insulin   : ', '%0.2f' % insulin['mean'])
print('GLP1      : ', '%0.2f' % glp1['mean'])
print('SGLT2     : ', '%0.2f' % sglt2['mean'])

HBA1C average levels at the time of medication
Num of samples : 4792
Metformin :  8.63
Insulin   :  7.33
GLP1      :  9.60
SGLT2     :  9.69


## [Pattern]

### Goal 3 Duration until next medication

In [106]:
data = pd.read_csv('diabetes/p_m.csv')
#data = data.drop(['RACE', 'GENDER', 'AGEDEAD', 'AGEMEDICATION', 'MEDSTOP'], axis=1, errors='ignore')

In [107]:
data = data.set_index(['PATIENT', 'ENCOUNTER'])
data = data.sort_values(['MEDSTART'])
data['MEDNEXT'] = data.groupby(level=[0])['MEDSTART'].shift(-1)
data['MEDSTART'] = pd.to_datetime(data['MEDSTART'], format='%Y-%m-%d')
data['MEDNEXT'] = pd.to_datetime(data['MEDNEXT'], format='%Y-%m-%d')
data['MEDDUR'] = (data['MEDNEXT'] - data['MEDSTART']).dt.days
data = data[~data['MEDDUR'].isna()]

In [111]:
data_cnt = len(data)

metformin = data[data['METFORMIN']==1].describe()['MEDDUR']
insulin = data[data['INSULIN']==1].describe()['MEDDUR']
glp1 = data[data['GLP1']==1].describe()['MEDDUR']
sglt2 = data[data['SGLT2']==1].describe()['MEDDUR']

In [119]:
print('Average duration until next medication')
print('Num of samples :', data_cnt)
print('Metformin : ', '%0.2f' % metformin['mean'], 'days /', '%0.2f' % (metformin['mean']/30), ' months /', '%0.2f' % (metformin['mean']/365), ' years')
print('Insulin   : ', '%0.2f' % insulin['mean'], 'days /', '%0.2f' % (insulin['mean']/30), ' months /', '%0.2f' % (insulin['mean']/365), ' years')
print('GLP1      : ', '%0.2f' % glp1['mean'], 'days /', '%0.2f' % (glp1['mean']/30), ' months /', '%0.2f' % (glp1['mean']/365), ' years')
print('SGLT2     : ', '%0.2f' % sglt2['mean'], 'days /', '%0.2f' % (sglt2['mean']/30), ' months /', '%0.2f' % (sglt2['mean']/365), ' years')

Average duration until next medication
Num of samples : 7070
Metformin :  4273.23 days / 142.44  months / 11.71  years
Insulin   :  1904.59 days / 63.49  months / 5.22  years
GLP1      :  1873.57 days / 62.45  months / 5.13  years
SGLT2     :  780.36 days / 26.01  months / 2.14  years


## [Learning]

### Goal 1 : Predicting medication for new patient