In [None]:
import pandas as pd
pd.options.mode.chained_assignment = None

import numpy as np
import matplotlib.pyplot as plt

# load in data
data_path='../data_extraction/data/'

In [None]:
cohort = pd.read_table(data_path+'cohort.tsv')

In [None]:
# numerical variable summaries
#print(cohort[['age','los','max_lactate','vaso_frac']].describe())

In [None]:
icustay = 202634 # empty weight
patientweight = cohort.loc[cohort['icustay_id'] == icustay]['weight']
#patientweight.isnan
type(patientweight)
patientweight.values[0]
if np.isnan(patientweight.values[0]):
    print('yes')


In [None]:
# Process urine values 

urine = pd.read_csv(data_path+'urine.csv')

#urine['four_hour_block'] = pd.Series((urine.min_from_intime/interval).astype(int), index=maps.index)

icustays = np.unique(urine['icustay_id'])

# For each icustay key, a list of: [array of urine output volumes in 4h windows, starting time of windows, AKI window indices]  
urine_4h = {}
# Binary outcome of aki or not for each icustay. 
aki_urine ={} #np.zeros(len(icustays)) 

for icustay in icustays:
    # Get all the urine values and times for the icustay_id 
    u= np.array(urine.loc[urine['icustay_id'] == icustay]['value']) 
    t = np.array(urine.loc[urine['icustay_id'] == icustay]['min_from_intime'])
    
    # Keep the time of first urine measurement, and get relative times. 
    t0=t[0]
    t=t-t0
    
    # Calculate urine output in 4 hour blocks starting from the time of first urine measurement. 
    # Hence the first urine measurement will not be used.
    
    # Urine volumes for four hour blocks, starting from the first measurement extending to or before the last urine measurement. 
    # Urine output for the block before the first measurement is not calculated.
    nblocks = int(np.ceil(t[-1]/ 240))
    urine_blocks = np.zeros(nblocks)
    
    # For every urine measurement, add the proportionate volume to the appropriate 4h windows. 
    for ind in range(1, len(u)):
        
        # Which 4h block index the measurement falls in 
        blocknum = int(t[ind]/ 240 )
        if blocknum == nblocks:
            blocknum = blocknum-1
        
        # Left time limit of the rectangle to calculate urine volume proportion that fits into block. 
        leftlimit_t = max(t[ind-1], int(t[ind]/240)*240)
        
        # The proportion of urine volume that belongs to the current 4h block
        propcurrent = (t[ind]-leftlimit_t)/(t[ind]-t[ind-1])
        
        urine_blocks[blocknum] += u[ind]*propcurrent
        
        if (leftlimit_t!=t[ind-1])&(blocknum>0) :
            urine_blocks[blocknum-1] += u[ind]*(1-propcurrent)
                        
    
    # Get the patient weight to calculate RIFLE criteria. 
    patientweight = cohort.loc[cohort['icustay_id'] == icustay]['weight'].values
    if np.isnan(patientweight[0]):
        patientweight=50 # temporary                          
            
    # Whether the urine block meets the I criteria
    urine_blocks_I = urine_blocks < (2 * patientweight)      
    print(urine_blocks)
    print('patient weight: ', patientweight)
    print('patient weight*2: ', 2*patientweight)
    print(urine_blocks_I)
        
    
    # Find 3 consecutive 4h blocks that satisfy the I kidney injury criteria   
    aki_urine[icustay]=0
    b=0
    first_aki_block=[]
    while (b<nblocks-2) & (aki_urine[icustay]==0):
        if np.array_equal(urine_blocks_I[b:(b+3)], [True, True, True]):
            aki_urine[icustay]=1
            first_aki_block=b
            print('got one')
        b+=1
    
    urine_4h[icustay]=[urine_blocks, t0, first_aki_block]
    
    raw_input("Press Enter\n\n\n")
                

In [None]:
urine_blocks[8]

In [None]:
urine_4h

In [None]:
#urine_4h
aki_urine

In [None]:
#[250, 500, 719]
#[240, 480, 480]

x=720
int(x/240)*240


In [None]:
# convert raw MAP readings to MAP features
maps = pd.read_csv(data_path+'map.csv')
interval = 60

# average MAP for every hour
maps['hour'] = pd.Series((maps.min_from_intime/interval).astype(int), index=maps.index)

# get means for every hour with data present. 
mean_maps = maps.groupby(['icustay_id', 'hour'])['value'].mean()
mean_maps = mean_maps.to_frame().reset_index().set_index(['icustay_id'])


# interpolate MAPs for missing hours

# For each icustay, 
min_hours = mean_maps.groupby([mean_maps.index.get_level_values(0)])['hour'].min()
max_hours = mean_maps.groupby([mean_maps.index.get_level_values(0)])['hour'].max()

interp_index = []
for this_icustay in min_hours.index:
    min_hour = min_hours.loc[this_icustay]
    max_hour = max_hours.loc[this_icustay]
    test =[hour for hour in np.arange(min_hour,max_hour+1)]
    interp_index += [(this_icustay, hour) for hour in np.arange(min_hour,max_hour+1)]

mean_maps = mean_maps.set_index(['hour'],append=True)
interp_mean_maps = mean_maps.reindex(pd.MultiIndex.from_tuples(interp_index,names=['icustay_id','hour']))
interp_mean_maps = interp_mean_maps['value'].interpolate(method='linear')
interp_mean_maps = interp_mean_maps.to_frame().reset_index()

# get percent of hours missing a MAP value
missing_map = len(interp_mean_maps.index) - len(mean_maps.index)

frac_missing = missing_map/float(len(interp_mean_maps.index))
print("Fraction of hours missing MAP values:", frac_missing)

# get minimum MAP value per patient
min_ind = interp_mean_maps.groupby('icustay_id')['value'].idxmin(skipna=True)
min_maps = interp_mean_maps.loc[min_ind]

# bin MAP values for first 72 hours
map_72 = interp_mean_maps.loc[interp_mean_maps['hour']<72]

map_cutoffs = np.append(np.arange(30,100,10),200)
map_72['bins'] = pd.cut(map_72['value'], map_cutoffs)
binned_min_maps = pd.cut(min_maps['value'], map_cutoffs)

map_fracs = map_72.groupby('icustay_id')['bins'].value_counts(normalize=True)

In [None]:
urine

In [None]:
maps.head

In [None]:
area=np.trapz([0,1,1], [0, 1.2, 5])
area

In [None]:
t=np.array([0,1,2,3])
u=np.array([0,1,2,1])
dt=np.diff(t)
print(dt)



In [None]:
area=np.dot(dt, u[1:])
area

In [None]:
a={20003: ['jack', [1,2,3]], 20002:['jill', [9,9]] }