In [2]:
import h5py
import torch
import sparse 
import numpy as np
import pandas as pd 
from collections import Counter

  from .autonotebook import tqdm as notebook_tqdm


### Trying hdf5 Files

In [2]:
splits = h5py.File("/home/shahad.hardan/Documents/research_tf/preprocessed_fiddle_3/features/outcome=ARF,T=12.0,dt=1.0/splits.hdf5", 'r')

In [3]:
splits.keys()

<KeysViewHDF5 ['with_notes']>

In [4]:
splits["with_notes"].keys()

<KeysViewHDF5 ['test', 'train', 'val']>

In [5]:
splits['with_notes']["train"].keys()

<KeysViewHDF5 ['S', 'X', 'attention_mask', 'input_ids', 'label', 'token_type_ids']>

In [6]:
splits['with_notes']["train"]["X"].shape

(6626, 12, 5132)

In [7]:
splits['with_notes']["train"]["S"].shape

(6626, 96)

In [10]:
splits['with_notes']["val"]["X"].shape

(1389, 12, 5132)

In [11]:
splits['with_notes']["val"]["S"].shape

(1389, 96)

In [7]:
# splits['with_notes']["train"]["X"][()] 
splits.close()

In [8]:
xs = h5py.File("/home/shahad.hardan/Documents/research_tf/preprocessed_fiddle_3/features/outcome=ARF,T=12.0,dt=1.0/Xs.hdf5", 'r')

In [9]:
xs.keys()

<KeysViewHDF5 ['S', 'X']>

In [11]:
xs["S"].shape

(14174, 96)

In [14]:
xs["X"].shape

(14174, 12, 5132)

In [15]:
xs.close()

In [16]:
notes = h5py.File("/home/shahad.hardan/Documents/research_tf/preprocessed_fiddle_3/features/outcome=ARF,T=12.0,dt=1.0/notes.hdf5", 'r')

In [17]:
notes.keys()

<KeysViewHDF5 ['notes']>

In [19]:
notes["notes"].keys()

<KeysViewHDF5 ['axis0', 'axis1', 'block0_items', 'block0_values', 'block1_items', 'block1_values']>

In [21]:
print("shape of axis0", notes["notes"]["axis0"].shape)
print("shape of axis1", notes["notes"]["axis1"].shape)
print("shape of block0_items", notes["notes"]["block0_items"].shape)
print("shape of block0_values", notes["notes"]["block0_values"].shape)
print("shape of block1_items", notes["notes"]["block1_items"].shape)
print("shape of block1_values", notes["notes"]["block1_values"].shape)

shape of axis0 (4,)
shape of axis1 (9393,)
shape of block0_items (1,)
shape of block0_values (9393, 1)
shape of block1_items (3,)
shape of block1_values (1,)


In [22]:
notes["notes"]["axis0"][()]

array([b'ICUSTAY_ID', b'input_ids', b'token_type_ids', b'attention_mask'],
      dtype='|S14')

In [23]:
notes["notes"]["axis1"][()]

array([   0,    1,    2, ..., 9390, 9391, 9392])

In [24]:
notes["notes"]["block0_items"][()]

array([b'ICUSTAY_ID'], dtype='|S10')

In [26]:
notes.close()

### Trying Torch Functions

In [1]:
import torch 
x = torch.randn(2,3)
x

  from .autonotebook import tqdm as notebook_tqdm


tensor([[ 1.9538,  1.3804,  0.8969],
        [-1.0373,  1.1481, -0.2625]])

In [2]:
torch.cat((x, x, x), 0)

tensor([[ 1.9538,  1.3804,  0.8969],
        [-1.0373,  1.1481, -0.2625],
        [ 1.9538,  1.3804,  0.8969],
        [-1.0373,  1.1481, -0.2625],
        [ 1.9538,  1.3804,  0.8969],
        [-1.0373,  1.1481, -0.2625]])

In [3]:
torch.cat((x, x, x), 1)

tensor([[ 1.9538,  1.3804,  0.8969,  1.9538,  1.3804,  0.8969,  1.9538,  1.3804,
          0.8969],
        [-1.0373,  1.1481, -0.2625, -1.0373,  1.1481, -0.2625, -1.0373,  1.1481,
         -0.2625]])

In [12]:
x = torch.arange(1., 6.)
x

tensor([1., 2., 3., 4., 5.])

In [13]:
torch.topk(x, 3)

torch.return_types.topk(
values=tensor([5., 4., 3.]),
indices=tensor([4, 3, 2]))

In [3]:
t = torch.tensor([[1, 2], [3, 4]])
torch.gather(t, 1, torch.tensor([[0, 0], [1, 0]]))

tensor([[1, 1],
        [4, 3]])

In [4]:
torch.gather(t, -1, torch.tensor([[0, 0], [1, 0]]))

tensor([[1, 1],
        [4, 3]])

### Checking npz files

In [17]:
x_sparse = sparse.load_npz("/home/shahad.hardan/Documents/research_tf/preprocessed_fiddle_3/features/outcome=ARF,T=12.0,dt=1.0/X.npz")
x_sparse

0,1
Format,coo
Data Type,int64
Shape,"(14174, 12, 5132)"
nnz,13508911
Density,0.015476046226568408
Read-only,True
Size,412.3M
Storage ratio,0.1


In [18]:
s_sparse = sparse.load_npz("/home/shahad.hardan/Documents/research_tf/preprocessed_fiddle_3/features/outcome=ARF,T=12.0,dt=1.0/S.npz")
s_sparse

0,1
Format,coo
Data Type,int64
Shape,"(14174, 96)"
nnz,158269
Density,0.11631405507737172
Read-only,True
Size,2.4M
Storage ratio,0.2


In [17]:
population = pd.read_csv("/home/shahad.hardan/Documents/research_tf/preprocessed_fiddle_3/population/ARF_12.0h.csv")

In [18]:
population.shape

(14174, 3)

In [20]:
population.columns

Index(['ID', 'ARF_ONSET_HOUR', 'ARF_LABEL'], dtype='object')

In [21]:
Counter(population["ARF_LABEL"])

Counter({0: 12806, 1: 1368})

In [25]:
icustays_mv = pd.read_csv("/home/shahad.hardan/Documents/research_tf/preprocessed_fiddle_3/prep/icustays_MV.csv")

In [26]:
icustays_mv.shape

(23620, 13)

In [28]:
len(np.unique(icustays_mv["SUBJECT_ID"]))

17710