In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv('')

In [None]:
np.sort(df.loc[df.subject==508].file)

In [None]:
df.loc[0]

In [None]:
len(df.loc[df.set=='train'].subject.unique())

In [None]:
slices = np.arange(23)
np.random.shuffle(slices)
pats = np.array(df.loc[df.set=='train'].subject.unique())
np.random.shuffle(pats)

In [None]:
files = []
for pat, _slice in zip(pats, slices):
    files.append('scans/img_pat_{}_ct_slice_{}.pkl'.format(pat, _slice))

In [None]:
import matplotlib.pyplot as plt
import pickle
fig, axs = plt.subplots(4, 6, figsize=(22, 15))
for i, file in enumerate(files):
    with open(file, 'rb') as f:
        img = pickle.load(f)
    axs.flatten()[i].imshow(img, cmap='gray')
    axs.flatten()[i].set_title('patient {} slice {}'.format(pats[i], slices[i]))

In [None]:
colours = np.array([[0.,0.,1.], [0.13,0.4,0.], [1., .1, .1]])
colours = colours[:, :, np.newaxis, np.newaxis]


fig, axs = plt.subplots(6, 4, figsize=(15,20))
for i, file in enumerate(files):
    ax = axs.flatten()[i]
    with open(file, 'rb') as f:
        img = pickle.load(f)
    # img preprocessing
    img = np.stack([img] * 3, 0)
    img = (img + 1.) / 2.
    # 
    labs = np.empty((0, 128, 128))
    for label in ['femur_l', 'femur_r', 'bladder', 'prostate']:
        with open('img_pat_{}_{}_slice_{}.pkl'.format(pats[i], label, slices[i]), 'rb') as f:
            l = pickle.load(f)
        labs = np.concatenate((labs, l[np.newaxis]),0)
    labs[1] = labs[0] + labs[1]
    labs = labs[1:]
    labs = np.stack([labs] * 3, 1)
    clabel = (labs * colours).sum(0)
    img_n_label = np.moveaxis(img * .5 + clabel,0 , -1)
    ax.imshow(img_n_label)
    axs.flatten()[i].set_title('patient {} slice {}'.format(pats[i], slices[i]))

In [None]:
new_idx= []
for file in files:
    new_idx.append(df.loc[df.file == file].index.item())

In [None]:
df.head()

In [None]:
old_idx = list(df.index)
for i, idx in enumerate(new_idx):
    old_idx.remove(idx)
    old_idx.insert(i, idx)

In [None]:
reshuffled_df = df.loc[old_idx]

In [None]:
reshuffled_df.reset_index(inplace=True)
reshuffled_df.drop(columns=['index'], inplace=True)

In [None]:
reshuffled_df.head()

In [None]:
fig, axs = plt.subplots(4, 6, figsize=(22, 15))
for i, ax in enumerate(axs.flatten()):
    file = reshuffled_df.loc[i, 'file']
    with open(file, 'rb') as f:
        img = pickle.load(f)
    ax.imshow(img, cmap='gray')
    pat = reshuffled_df.loc[i, 'subject']
    slice = file.rsplit('_slice_')[1].rsplit('.')[0]
    ax.set_title('k: {} -- patient {} slice {}'.format(i, pat, slice))
plt.tight_layout()
plt.savefig('first_k_pats.png')

In [None]:
fig, axs = plt.subplots(4, 6, figsize=(22, 15))
for i, ax in enumerate(axs.flatten()):
    file = reshuffled_df.loc[i, 'file']
    with open(file, 'rb') as f:
        img = pickle.load(f)
    # img preprocessing
    img = np.stack([img] * 3, 0)
    img = (img + 1.) / 2.
    # 
    labs = np.empty((0, 128, 128))
    for label in ['femur_l', 'femur_r', 'bladder', 'prostate']:
        with open(reshuffled_df.loc[i, label], 'rb') as f:
            l = pickle.load(f)
        labs = np.concatenate((labs, l[np.newaxis]),0)
    labs[1] = labs[0] + labs[1]
    labs = labs[1:]
    labs = np.stack([labs] * 3, 1)
    clabel = (labs * colours).sum(0)
    img_n_label = np.moveaxis(img * .5 + clabel,0 , -1)
    ax.imshow(img_n_label)
    pat = reshuffled_df.loc[i, 'subject']
    slice = file.rsplit('_slice_')[1].rsplit('.')[0]
    ax.set_title('k: {} -- patient {} slice {}'.format(i, pat, slice))
plt.tight_layout()
plt.savefig('first_k_pat_with_seg.png')


In [None]:
fig, axs = plt.subplots(4, 6, figsize=(22, 15))
n_pats = 4 * 6
slices = [int(file.rsplit('_slice_')[1].rsplit('.')[0]) for file in reshuffled_df.file[:n_pats]]
idx = np.argsort(np.array(slices))
for i, ax in enumerate(axs.flatten()):
    file = reshuffled_df.loc[idx[i], 'file']
    with open(file, 'rb') as f:
        img = pickle.load(f)
    # img preprocessing
    img = np.stack([img] * 3, 0)
    img = (img + 1.) / 2.
    # 
    labs = np.empty((0, 128, 128))
    for label in ['femur_l', 'femur_r', 'bladder', 'prostate']:
        with open(reshuffled_df.loc[idx[i], label], 'rb') as f:
            l = pickle.load(f)
        labs = np.concatenate((labs, l[np.newaxis]),0)
    labs[1] = labs[0] + labs[1]
    labs = labs[1:]
    labs = np.stack([labs] * 3, 1)
    clabel = (labs * colours).sum(0)
    img_n_label = np.moveaxis(img * .5 + clabel,0 , -1)
    ax.imshow(img_n_label)
    pat = reshuffled_df.loc[i, 'subject']
    slice = file.rsplit('_slice_')[1].rsplit('.')[0]
    ax.set_title('patient {} slice {}'.format(pat, slice))
plt.tight_layout()
plt.savefig('first_k_pat_with_seg_ordered.png')
plt.show()

In [None]:
reshuffled_df.to_csv('', index=False)