# Cleaning the predictions dataframe & slice out the targets
There was probably a better way to save the predictions.

### Import things

In [1]:
import os
import cv2
import numpy as np
import pandas as pd

import tifffile as tiff
from PIL import Image, ImageDraw
from torchvision.transforms import ToPILImage  
import matplotlib.pyplot as plt

In [2]:
# make a new folder for the sliced targets
try:
    os.mkdir('../test/boxed')
except:
    pass

### Load dataset

In [3]:
df = pd.read_csv('../CSVs/test_preds.csv')

In [4]:
# we don't need the labels column
df = df.drop('labels', axis = 1)

In [5]:
df.head()

Unnamed: 0,filename,boxes,scores
0,26dc41664_0_0.tiff,"tensor([[351.8979, 595.6012, 353.0101, 595.978...","tensor([0.1850, 0.0802], device='cuda:0')"
1,26dc41664_0_10576.tiff,"tensor([[ 0.8213, 595.5236, 2.7035, 595.982...","tensor([0.2273, 0.2135], device='cuda:0')"
2,26dc41664_0_11237.tiff,"tensor([[ 6.0920, 595.8847, 10.6282, 595.998...","tensor([0.3000, 0.2998, 0.0830, 0.0738], devic..."
3,26dc41664_0_11898.tiff,"tensor([[ 0.9095, 595.4414, 2.7767, 596.000...","tensor([0.1307, 0.1217], device='cuda:0')"
4,26dc41664_0_12559.tiff,"tensor([[ 2.4880, 595.9504, 4.4031, 596.000...","tensor([0.3689, 0.0760, 0.0659], device='cuda:0')"


### Cleaning

The boxes and scores columns are strings of tensor arrays.  
I need to turn them into lists of lists.

    1. split by '[', get rid of index 0
    2. join by '['
    3. split again by ']', get rid of last index
    4. join by ']'
    5. get rid of extra spaces
    6. get rid of the '\n'
    7. use eval() on the columns 

In [6]:
# steps 1 - 6 for boxes
df['boxes'] = df['boxes'].map(lambda x:']'.join('['.join(x.split('[')[1:]).split(']')[:-1]).replace(' ', '').replace('\n', ' '))

In [7]:
# steps 1 - 6 for scores
df['scores'] = df['scores'].map(lambda x: ']'.join('['.join(x.split('[')[1:]).split(']')[:-1]).replace(' ', '').replace('\n', ' '))

In [8]:
# step 7 for boxes
l = []
for rows in range(len(df['boxes'])):
    e = df['boxes'][rows]
    if len(df['boxes'][rows]) > 0:
        e = list(eval(df['boxes'][rows]))
    l.append(e)

df['boxes'] = l

In [9]:
# drop the boxes with no targets
df = df[df['boxes'] != '']

# reset the index
df = df.reset_index(drop=True)

### Save the new clean predictions dataframe

In [10]:
# turn the strings in the scores column into floats
df['scores'] = df['scores'].map(lambda x: [float(y) for y in x.split(',')])

In [11]:
df.to_csv('../CSVs/preds_clean.csv', index=False)

### Making a new dataframe of just the targets

In [12]:
df.head()

Unnamed: 0,filename,boxes,scores
0,26dc41664_0_0.tiff,"[[351.8979, 595.6012, 353.0101, 595.9781], [35...","[0.185, 0.0802]"
1,26dc41664_0_10576.tiff,"[[0.8213, 595.5236, 2.7035, 595.9822], [3.1453...","[0.2273, 0.2135]"
2,26dc41664_0_11237.tiff,"[[6.092, 595.8847, 10.6282, 595.9988], [2.6143...","[0.3, 0.2998, 0.083, 0.0738]"
3,26dc41664_0_11898.tiff,"[[0.9095, 595.4414, 2.7767, 596.0], [3.2004, 5...","[0.1307, 0.1217]"
4,26dc41664_0_12559.tiff,"[[2.488, 595.9504, 4.4031, 596.0], [0.8694, 59...","[0.3689, 0.076, 0.0659]"


In [13]:
new_df = []

for idx in range(3073):
    img_name = df.loc[idx][0]
    path = os.path.join('../test/images/slices', img_name)
    img_array = tiff.imread(path)

    for ele in range(len(df.loc[idx]['scores'])):
        d = {}
        if len(df.loc[idx]['scores']) == 1:
            score = np.round(df.loc[idx]["scores"][ele], decimals= 4)
            boxes = df.loc[idx]['boxes']
        else:
            score = np.round(df.loc[idx]["scores"][ele], decimals= 4)
            boxes = df.loc[idx]['boxes'][ele]
        
        if score > 0.9:
            d['filename'] = img_name
            d['img_size'] = img_array.shape
            d['target'] = ele
            d['xmin'] = boxes[0]
            d['ymin'] = boxes[1]
            d['xmax'] = boxes[2]
            d['ymax'] = boxes[3]
            d['boxes'] = boxes
            d['scores'] = score
            
            new_df.append(d)

In [14]:
targets = pd.DataFrame(new_df)

### Add original filenames and original size columns

In [17]:
originals = sorted(os.listdir('../test/images'))[0:10:2]

In [40]:
original_sizes = []
for i in originals:
    d = {}
    path = os.path.join('../test/images/', i)
    array = tiff.imread(path)
    if len(array.shape) == 5:
        array = array.squeeze().transpose(1, 2, 0)
    d['filename'] = i[:-5]
    d['original_size'] = array.shape[:-1]
    original_sizes.append(d)

In [43]:
real_sizes=pd.DataFrame(original_sizes)

In [51]:
targets['original_filename'] = targets['filename'].map(lambda x: x[:9])

In [15]:
targets.head(10)

Unnamed: 0,filename,img_size,target,xmin,ymin,xmax,ymax,boxes,scores
0,26dc41664_10132_13881.tiff,"(596, 661)",0,307.4098,295.7395,469.9866,422.9715,"[307.4098, 295.7395, 469.9866, 422.9715]",0.9623
1,26dc41664_10132_15203.tiff,"(596, 661)",0,236.3699,88.3256,475.1046,274.9074,"[236.3699, 88.3256, 475.1046, 274.9074]",0.9496
2,26dc41664_10132_17847.tiff,"(596, 661)",0,215.0557,264.8056,408.7051,419.6268,"[215.0557, 264.8056, 408.7051, 419.6268]",0.9452
3,26dc41664_10132_18508.tiff,"(596, 661)",0,63.1179,457.6605,266.5054,595.5453,"[63.1179, 457.6605, 266.5054, 595.5453]",0.9865
4,26dc41664_10132_3966.tiff,"(596, 661)",0,530.9509,20.2528,661.0,172.5583,"[530.9509, 20.2528, 661.0, 172.5583]",0.9115
5,26dc41664_10728_15203.tiff,"(596, 661)",0,255.5544,429.3421,479.8551,594.9636,"[255.5544, 429.3421, 479.8551, 594.9636]",0.9689
6,26dc41664_10728_17186.tiff,"(596, 661)",0,216.397,334.6497,408.0965,491.6394,"[216.397, 334.6497, 408.0965, 491.6394]",0.9629
7,26dc41664_10728_17186.tiff,"(596, 661)",1,63.8716,168.7273,223.6911,315.7713,"[63.8716, 168.7273, 223.6911, 315.7713]",0.9319
8,26dc41664_11324_13220.tiff,"(596, 661)",0,266.8625,15.8819,485.0984,306.5435,"[266.8625, 15.8819, 485.0984, 306.5435]",0.9139
9,26dc41664_11324_14542.tiff,"(596, 661)",0,0.0,282.6719,186.9622,507.3418,"[0.0, 282.6719, 186.9622, 507.3418]",0.9116


In [60]:
targets = pd.merge(targets, real_sizes, left_on='original_filename', right_on='filename')

In [62]:
targets = targets.drop('filename_y', axis=1)

In [70]:
targets = targets.rename(columns = {'filename_x': 'filename'})

In [93]:
targets.to_csv('../CSVs/targets.csv', index=False)

### Slice out targets and save the images

In [21]:
for idx in range(len(targets)):
    img = targets.loc[idx]['filename']
    img_name = targets.loc[idx]['filename'][:-5]

    path = os.path.join('../test/images/slices', img)
    img_array = tiff.imread(path)
    image = Image.fromarray(img_array)

    xmin = int(targets.loc[idx]['xmin'])
    ymin = int(targets.loc[idx]['ymin'])
    xmax = int(targets.loc[idx]['xmax'])
    ymax = int(targets.loc[idx]['ymax'])

    Image.fromarray(np.array(image)[ymin:ymax, xmin:xmax]).save(f'../test/boxed/{img}_tgt_{idx}.tiff')