In [None]:
import pandas as pd
pd.options.mode.chained_assignment = None

import numpy as np
import matplotlib.pyplot as plt

# load in data
data_path='../data_extraction/data/'

cohort = pd.read_table(data_path+'cohort.tsv')

In [None]:
# Process urine values 

urine = pd.read_csv(data_path+'urine.csv')


icustays = np.unique(urine['icustay_id'])

# For each icustay key, the array of urine output volumes in 4h windows
urine_4h = {}
# Binary outcome of aki or not for each icustay, and starting time of 4h window aki was identified in  
aki_urine = np.zeros([len(icustays), 3], dtype='int') 


for icuind in range(0, len(icustays)):
    icustay = icustays[icuind]
    aki_urine[icuind,0]=icustay
    
    # Get all the urine values and times for the icustay_id 
    u= np.array(urine.loc[urine['icustay_id'] == icustay]['value']) 
    t = np.array(urine.loc[urine['icustay_id'] == icustay]['min_from_intime'])
    
    # Keep the time of first urine measurement, and get relative times. 
    t0=t[0]
    t=t-t0
    
    # Calculate urine output in 4 hour blocks starting from the time of first urine measurement. 
    # Hence the first urine measurement will not be used.
    
    # Urine volumes for four hour blocks, starting from the first measurement extending to or before the last urine measurement. 
    # Urine output for the block before the first measurement is not calculated.
    nblocks = int(np.ceil(t[-1]/ 240))
    urine_blocks = np.zeros(nblocks)
    
    # For every urine measurement, add the proportionate volume to the appropriate 4h windows. 
    for ind in range(1, len(u)):
        
        # Which 4h block index the measurement falls in 
        blocknum = int(t[ind]/ 240 )
        if blocknum == nblocks:
            blocknum = blocknum-1
        
        # Left time limit of the rectangle to calculate urine volume proportion that fits into block. 
        leftlimit_t = max(t[ind-1], int(t[ind]/240)*240)
        
        # The proportion of urine volume that belongs to the current 4h block
        propcurrent = (t[ind]-leftlimit_t)/(t[ind]-t[ind-1])
        
        urine_blocks[blocknum] += u[ind]*propcurrent
        
        if (leftlimit_t!=t[ind-1])&(blocknum>0) :
            urine_blocks[blocknum-1] += u[ind]*(1-propcurrent)
                        
    
    # Get the patient weight to calculate RIFLE criteria. 
    patientweight = cohort.loc[cohort['icustay_id'] == icustay]['weight'].values
    if np.isnan(patientweight[0]):
        patientweight=50 # temporary                          
            
    # Whether the urine block meets the I criteria
    urine_blocks_I = urine_blocks < (2 * patientweight)      
        
    # Find 3 consecutive 4h blocks that satisfy the I kidney injury criteria   
    aki_urine[icuind, 1]=0
    b=0
    while (b<nblocks-2) & (aki_urine[icuind, 1] == 0):
        if np.array_equal(urine_blocks_I[b:(b+3)], [True, True, True]):
            aki_urine[icuind, 1]=1
            # The starting time of the aki onset window
            aki_urine[icuind, 2] = int(b*240+t0)
        b+=1
    
    urine_4h[icustay]=urine_blocks

    
aki_urine = pd.DataFrame(aki_urine, columns=['icustay_id', 'aki_result', 'aki_onset_t'])
                

In [None]:
# The aki results of each icustay
aki_urine

In [None]:
# The 4 hour urine volumes (in a dictionary)
urine_4h