In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
# Enable imports from src
import sys
PROJ_ROOT = os.path.abspath('../')
sys.path.append(os.path.join(PROJ_ROOT, 'src'))
import glob
import h5py
import tqdm
import misc

%matplotlib inline

In [2]:
data_path = os.path.join(PROJ_ROOT, 'data/raw/Ultrasound_high_fps/train/*/*.h5')
h5files = glob.glob(data_path)
len(h5files)

252

In [3]:
data = h5py.File(h5files[6])

In [4]:
data['tissue/data'].shape

(368, 260, 139)

In [5]:
video = np.transpose(data['tissue/data'], [2, 1, 0])
fps = 1 / (data['tissue/times'][5] - data['tissue/times'][4])

In [6]:
misc.ultraSoundAnimation(video, fps=fps)

In [7]:
data.close()

In [8]:
enhanced = 1 / (1 + np.exp(-((video / 255.) - 0.6) * .7))
misc.ultraSoundAnimation(enhanced, fps=fps)

In [9]:
pal_txt = open('../../pal.txt')
line = pal_txt.readline()[:-1]
pal = np.array([int(val) for val in line.split(',')])

In [10]:
pal_enhanced = np.array([[[pal[int(video[i, j, k])] for k in range(video.shape[2])] for j in range(video.shape[1])] for i in range(video.shape[0])])

In [11]:
misc.ultraSoundAnimation(pal_enhanced, fps=fps)

In [3]:
shapes = []
for h5file in h5files:
    with h5py.File(h5file) as data:
        shapes.append(data['tissue/data'].shape)

In [5]:
max_len = min(shapes, key=lambda x: x[2])[2]
max_height = min(shapes, key=lambda x: x[1])[1]
max_width = min(shapes, key=lambda x: x[0])[0]
print(max_len, max_height, max_width)

58 180 255


In [14]:
sum([x[2] for x in shapes])

33563

In [15]:
len(shapes)

252

In [16]:
i = 0
for h5file in h5files:
    with h5py.File(h5file) as data:
        print(i)
        i += data['tissue/data'].shape[2]

0
150
295
418
554
687
810
949
1079
1220
1368
1549
1716
1853
1990
2128
2213
2304
2392
2484
2579
2676
2849
3015
3184
3352
3529
3710
3880
4041
4206
4325
4439
4555
4704
4834
4967
5108
5244
5382
5491
5595
5697
5830
5956
6134
6326
6492
6728
6831
6950
7074
7187
7301
7416
7532
7647
7769
7870
8001
8149
8313
8473
8606
8713
8835
8934
8992
9050
9108
9273
9430
9593
9697
9803
9913
10038
10177
10311
10407
10507
10607
10716
10820
10930
11012
11093
11186
11269
11346
11411
11509
11591
11673
11803
11932
12042
12177
12285
12396
12479
12590
12660
12791
12918
13044
13191
13312
13442
13547
13654
13761
13886
14007
14142
14338
14495
14659
14843
15025
15174
15297
15412
15529
15614
15698
15793
15925
16056
16189
16312
16438
16554
16681
16803
16942
17129
17316
17506
17608
17710
17817
17921
18002
18091
18209
18329
18447
18603
18757
18914
18987
19052
19111
19208
19308
19402
19483
19561
19638
19802
19910
20064
20182
20297
20410
20523
20636
20758
20863
20970
21078
21178
21286
21385
21485
21585
21680
21880
22070
22263


In [17]:
h5_idc = list(range(0, len(h5files)))
np.random.shuffle(h5_idc)

In [18]:
train_num = int(round(0.8 * len(h5files)))

In [19]:
train_num

202

In [20]:
train_idc = h5_idc[:train_num]
val_idc = h5_idc[train_num:]

In [21]:
max_train_width = max([shapes[i][0] for i in train_idc])
max_train_height = max([shapes[i][1] for i in train_idc])
num_train_frames = sum([(shapes[i][2] - 1) for i in train_idc])

In [22]:
num_train_frames

26807

In [23]:
train_path = os.path.join(PROJ_ROOT, 'data/processed/train.h5')
train_data = h5py.File(train_path, 'w')

In [24]:
train_data.create_dataset('fixed', shape=(num_train_frames, max_train_height, max_train_width, 1))
train_data.create_dataset('moving', shape=(num_train_frames, max_train_height, max_train_width, 1))

<HDF5 dataset "moving": shape (26807, 380, 537, 1), type "<f4">

In [25]:
train_data['fixed'][3:5].shape

(2, 380, 537, 1)

In [26]:
frame_idx = 0

for i in tqdm.tqdm_notebook(train_idc):
    with h5py.File(h5files[i], 'r') as data:
        video = np.transpose(data['tissue/data'], [2, 1, 0])
        height = video.shape[1]
        width = video.shape[2]
        num_frames = video.shape[0]
        video = np.array([[[float(pal[int(video[i, j, k])])
                                   for k in range(video.shape[2])] 
                                  for j in range(video.shape[1])] 
                                 for i in range(video.shape[0])])
        h_diff = max_train_height - height
        w_diff = max_train_width - width
        if h_diff % 2 == 0:
            h_add_one = 0
        else:
            h_add_one = 1
        if w_diff % 2 == 0:
            w_add_one = 0
        else:
            w_add_one = 1
            
        video = np.pad(video, [[0, 0], 
                               [h_diff // 2, h_diff // 2 + h_add_one], 
                               [w_diff // 2, w_diff // 2 + w_add_one]],
                      mode='constant')
        video /= 255.
        
        fixed = video[:-1, :, :, None]
        moving = video[1:, :, :, None]
        # print(frame_idx, num_frames, fixed.shape)
        
        train_data['fixed'][frame_idx:frame_idx + num_frames - 1] = fixed
        print(train_data['fixed'][frame_idx:frame_idx + num_frames - 1].shape[0], fixed.shape[0],
             train_data['moving'][frame_idx:frame_idx + num_frames - 1].shape[0], moving.shape[0])
        train_data['moving'][frame_idx:frame_idx + num_frames - 1] = moving
        frame_idx += (num_frames - 1)

HBox(children=(IntProgress(value=0, max=202), HTML(value='')))

80 80 80 80
171 171 171 171
183 183 183 183
186 186 186 186
134 134 134 134
99 99 99 99
165 165 165 165
125 125 125 125
164 164 164 164
79 79 79 79
205 205 205 205
92 92 92 92
80 80 80 80
138 138 138 138
124 124 124 124
82 82 82 82
96 96 96 96
132 132 132 132
107 107 107 107
106 106 106 106
181 181 181 181
112 112 112 112
163 163 163 163
149 149 149 149
122 122 122 122
211 211 211 211
129 129 129 129
155 155 155 155
103 103 103 103
57 57 57 57
99 99 99 99
152 152 152 152
119 119 119 119
156 156 156 156
177 177 177 177
154 154 154 154
103 103 103 103
159 159 159 159
121 121 121 121
129 129 129 129
101 101 101 101
81 81 81 81
115 115 115 115
121 121 121 121
88 88 88 88
76 76 76 76
108 108 108 108
205 205 205 205
136 136 136 136
177 177 177 177
109 109 109 109
101 101 101 101
164 164 164 164
147 147 147 147
122 122 122 122
106 106 106 106
114 114 114 114
103 103 103 103
191 191 191 191
113 113 113 113
130 130 130 130
107 107 107 107
91 91 91 91
84 84 84 84
149 149 149 149
57 57 57 57
123 

In [27]:
train_data.close()

In [28]:
max_val_width = max([shapes[i][0] for i in val_idc])
max_val_height = max([shapes[i][1] for i in val_idc])
num_val_frames = sum([(shapes[i][2] - 1) for i in val_idc])

In [29]:
val_path = os.path.join(PROJ_ROOT, 'data/processed/val.h5')
val_data = h5py.File(val_path, 'w')

In [30]:
val_data.create_dataset('fixed', shape=(num_val_frames, max_val_height, max_val_width, 1))
val_data.create_dataset('moving', shape=(num_val_frames, max_val_height, max_val_width, 1))

frame_idx = 0
for i in tqdm.tqdm_notebook(val_idc):
    with h5py.File(h5files[i], 'r') as data:
        video = np.transpose(data['tissue/data'], [2, 1, 0])
        height = video.shape[1]
        width = video.shape[2]
        num_frames = video.shape[0]
        video = np.array([[[float(pal[int(video[i, j, k])])
                                    for k in range(video.shape[2])] 
                                   for j in range(video.shape[1])] 
                                  for i in range(video.shape[0])])
        h_diff = max_val_height - height
        w_diff = max_val_width - width
        if h_diff % 2 == 0:
            h_add_one = 0
        else:
            h_add_one = 1
        if w_diff % 2 == 0:
            w_add_one = 0
        else:
            w_add_one = 1
            
        video = np.pad(video, [[0, 0], 
                               [h_diff // 2, h_diff // 2 + h_add_one], 
                               [w_diff // 2, w_diff // 2 + w_add_one]],
                      mode='constant')
        video /= 255.
        
        fixed = video[:-1, :, :, None]
        moving = video[1:, :, :, None]
        
        # print(frame_idx, num_frames, fixed.shape)
        
        val_data['fixed'][frame_idx:frame_idx + num_frames - 1] = fixed
        val_data['moving'][frame_idx:frame_idx + num_frames - 1] = moving
        frame_idx += (num_frames - 1)

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))




In [31]:
val_data.close()

In [None]:
with h5py.File(train_path) as data:
    for i in range(data['fixed'].shape[0]):
        if (data['fixed'][i, :, :, :] == 0.0).all():
            print(i)