In [45]:
from pathlib import Path

root = Path("/mnt/jbrockma")
data_root = root / "bachelor-thesis-data"
images_root = root / "bachelor-thesis-images"

In [46]:
from PIL import Image
import numpy as np
with Image.open(images_root / "chest" / "00000001_000.png") as im:
    a = np.asarray(im)

a.dtype

dtype('uint8')

In [47]:
with Image.open(images_root / "chest" / "00000002_000.png") as im:
    b = np.asarray(im)

b.dtype

dtype('uint8')

In [48]:
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])
z = np.stack((x,y))

In [49]:
np.stack((z, z, z))

array([[[1, 2, 3],
        [4, 5, 6]],

       [[1, 2, 3],
        [4, 5, 6]],

       [[1, 2, 3],
        [4, 5, 6]]])

In [50]:
c = np.stack((a, b))
c.shape

(2, 256, 256)

In [51]:
import pandas as pd

df = pd.read_csv(root / "bachelor-thesis-data" / "chest-image-data.csv", index_col="file_name")
df.head()

Unnamed: 0_level_0,atelectasis,cardiomegaly,consolidation,edema,effusion,emphysema,fibrosis,hernia,infiltration,mass,nodule,pleural_thickening,pneumonia,pneumothorax
file_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
00000001_000.png,0,1,0,0,0,0,0,0,0,0,0,0,0,0
00000001_001.png,0,1,0,0,0,1,0,0,0,0,0,0,0,0
00000001_002.png,0,1,0,0,1,0,0,0,0,0,0,0,0,0
00000002_000.png,0,0,0,0,0,0,0,0,0,0,0,0,0,0
00000003_001.png,0,0,0,0,0,0,0,1,0,0,0,0,0,0


In [52]:
row = df.loc["00000001_000.png"]
print(row)

atelectasis           0
cardiomegaly          1
consolidation         0
edema                 0
effusion              0
emphysema             0
fibrosis              0
hernia                0
infiltration          0
mass                  0
nodule                0
pleural_thickening    0
pneumonia             0
pneumothorax          0
Name: 00000001_000.png, dtype: int64


In [53]:
mnist = np.load("/root/.medmnist/chestmnist.npz")
mnist["test_labels"][0]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)

In [54]:
row.to_numpy()

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [55]:
np.stack((row.to_numpy(), row.to_numpy()))

array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [56]:
from tqdm.notebook import tqdm
with open(root / "CXR8" / "test_list.txt") as f:
    line_sum = sum(1 for line in f)
    f.seek(0)
    for line in tqdm(f, total=line_sum):
        if line.strip() not in df.index:
            print(line.strip())
            break

print("hi")

  0%|          | 0/25596 [00:00<?, ?it/s]

hi


In [67]:
import concurrent.futures


with open(root / "CXR8" / "test_list.txt") as f:
    lines = f.readlines()

lines_len = len(lines)

WIDTH = HEIGHT = 256

test_images = np.empty((lines_len, HEIGHT, WIDTH), dtype=np.uint8)
test_labels = np.empty((lines_len, len(df.columns)), dtype=np.uint8)

chest_images_root = images_root / "chest"

def write_to_array(file_name, i):
    try:
        with Image.open(chest_images_root / file_name) as im:
            arr = np.asarray(im)
            test_images[i] = arr
            test_labels[i] = df.loc[file_name]
    except Exception as e:
        print(e)

with tqdm(total=lines_len) as pbar:
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = []

        def update_progress(_):
            pbar.update()

        for i, line in enumerate(lines):
            file_name = line.strip()
            future = executor.submit(grayscale_and_write_to_array, file_name, i)
            future.add_done_callback(update_progress)
            futures.append(future)

        concurrent.futures.wait(futures)

print(test_images[:5])
print(test_labels[:5])

  0%|          | 0/25596 [00:00<?, ?it/s]

[[[ 53  29  17 ...  13  14  11]
  [ 52  28  17 ...  13  14  11]
  [ 51  28  16 ...  13  14  12]
  ...
  [ 11  10   9 ... 102 173 198]
  [ 25  25  23 ... 104 177 201]
  [ 48  46  44 ... 107 181 203]]

 [[ 38  31  24 ... 126 124 116]
  [ 40  33  26 ... 117 113 105]
  [ 37  30  23 ...  90  88  83]
  ...
  [104 111 107 ...  89 143 173]
  [162 173 170 ...  92 144 172]
  [166 178 176 ...  86 133 158]]

 [[  0   0   0 ...   0   0   0]
  [  0   0   0 ...   0   0   0]
  [  0   0   0 ...   0   0   0]
  ...
  [  0   0   0 ...   0   0   0]
  [  0   0   0 ...   0   0   0]
  [  0   0   0 ...   0   0   0]]

 [[  0  25  40 ...  13  33  78]
  [  0  26  38 ...  10  26  64]
  [  0  30  37 ...  10  25  62]
  ...
  [144 118  90 ... 255 145   0]
  [162 137 107 ... 255 126   0]
  [185 159 132 ... 255 120   0]]

 [[  0   0   0 ...   0   0   0]
  [ 57  57  51 ...  57  57  53]
  [ 59  59  55 ...  62  62  57]
  ...
  [ 22  21  20 ...  41  40  33]
  [ 23  22  21 ...  44  41  36]
  [ 21  22  20 ...  41  39  34]]]
