This notebook is just me working my way through fast.ai v2 courses. 

Specifically:
* The medical imaging tutorial https://docs.fast.ai/tutorial.medical_imaging.html
* The bounding boxes tutorial: https://docs.fast.ai/tutorial.datablock.html#Bounding-boxes
* useful: https://github.com/muellerzr/Practical-Deep-Learning-for-Coders-2.0/blob/master/Computer%20Vision/06_Object_Detection.ipynb
* https://www.kaggle.com/muellerzr/fastai2-starter-kernel

Shout out to this notebook who did it in Fastai v1. I copied a bunch of code from it
https://www.kaggle.com/robertlangdonvinci/vinbigdata-chest-abnormalities-detection-fastai
<br>(If you can give me an upvote give him an up-vote too...)

In [None]:
from fastai.basics import *
from fastai.callback.all import *
from fastai.vision.all import *
from fastai.medical.imaging import *

import pydicom
import matplotlib.image as immg
import matplotlib.pyplot as plt
import matplotlib.patches as patches

In [None]:
# Handy fast.ai function to pull all DICOM file names into a list
#items = get_dicom_files("../input/vinbigdata-chest-xray-abnormalities-detection/train") #full images
items = get_image_files('../input/vinbigdata-resized-image-512/train') #using the 512 images
items[0:5]

In [None]:
# another handy fast.ai funciton to split items randomly...
trn,val = RandomSplitter()(items)

In [None]:
#xray_sample.pixel_array, xray_sample.pixel_array.shape

In [None]:
#xray_sample.show()

In [None]:
%%time 
# takes 7-8 minutes, so load from pickle
'''dicom_dataframe = pd.DataFrame.from_dicoms(items, window=dicom_windows.lungs, px_summ=False)

dicom_dataframe.to_pickle('dicom_dataframe_pickle.pkl')
dicom_dataframe.shape'''

In [None]:
# long time to extract the DICOM information, so extracted into pickle for easy loading
dicom_dataframe = pd.read_pickle('../input/vinbigdata-chest-xray-dicom-data-frame/dicom_dataframe_pickle.pkl')
dicom_dataframe.shape # should be 15k by 29

In [None]:
dicom_dataframe.head()

In [None]:
df = pd.read_csv('../input/vinbigdata-chest-xray-abnormalities-detection/train.csv')
img_dim = pd.read_csv('../input/vinbigdata-resized-image-512/train_meta.csv')
tr_img_dir = Path('../input/vinbigdata-resized-image-512/train')
ts_img_dir = Path('../input/vinbigdata-resized-image-512/test')

In [None]:
tr_df = df.merge(img_dim,on='image_id',how='left')
tr_df.head()

In [None]:
# Create a df without class 14, the no finding class
tr_df1 = tr_df[tr_df['class_id']!=14].copy()
tr_df1.head()

In [None]:
# Rescale bounding boxes to use the resized images
tr_df1['x_min'] = tr_df1['x_min']*512/tr_df['dim1']
tr_df1['x_max'] = tr_df1['x_max']*512/tr_df['dim1']
tr_df1['y_min'] = tr_df1['y_min']*512/tr_df['dim0']
tr_df1['y_max'] = tr_df1['y_max']*512/tr_df['dim0']

In [None]:
# Creating a group by dataframe to pass the images in later
df_grp = tr_df1.groupby(['image_id'])
df_grp.head()

In [None]:
# taking a look at the values of one image, and the different classes in them
df_grp.get_group('f8c4ffc718ece871a52ab5f63b04b41c')

In [None]:
# take a look at one image from the training set with bounding boxes
b_fea = ['x_min', 'y_min', 'x_max', 'y_max']
name = '9a5094b2563a1ef3ff50dc5c7ff71345'
loc = '../input/vinbigdata-resized-image-512/train/'+name+'.png'
aaa = df_grp.get_group(name)
bbx = aaa.loc[:,b_fea] #get x and y coordinates for all rows
img = immg.imread(loc) # tensor representation for the image
fig,ax = plt.subplots(figsize=(18,10))
ax.imshow(img,cmap='binary')

# Find how many lines there are for an image in the df
# Draw a box for each time
for i in range(len(bbx)): 
    box = bbx.iloc[i].values
    x,y,w,h = box[0], box[1], box[2]-box[0], box[3]-box[1]
    rect = patches.Rectangle((x,y),w,h,linewidth=1,edgecolor='r',facecolor='none',)
    ax.text(*box[:2], aaa['class_name'].iloc[i], verticalalignment='top', color='white', fontsize=12, weight='bold')
    ax.add_patch(rect)
plt.show()

# thanks again: https://www.kaggle.com/robertlangdonvinci/vinbigdata-chest-abnormalities-detection-fastai

In [None]:
#tr_df1.head()
nofinding_df = tr_df[tr_df['class_id']==14].copy()


In [None]:
tr_df1.head()

In [None]:
values = {'x_min': 0, 'y_min':0, 'x_max':1, 'y_max':1}
nofinding_df.fillna(value=values, inplace = True)


frames = [nofinding_df, tr_df1]
tr_df2= pd.concat(frames)
tr_df2.tail()
nofinding_df.head()

In [None]:
def get_lbl_img(train):
    chest2bbox = {}
    grp = train.image_id.unique()
    tr_gr = train.groupby(['image_id'])
    from tqdm.notebook import tqdm
    for i in tqdm(range(len(grp))):
        name = str(grp[i])+'.png'
        bbox = []
        lbls = []
        temp_b = []
        temp = tr_gr.get_group(grp[i])
        tt = temp.loc[:, (['class_id','x_min', 'y_min', 'x_max', 'y_max'])].values
        for j in range(len(temp)):
            lbls.append(tt[j][0].astype(int))
            b = list(np.round(tt[j][1:]))   # x,y, width, height
            # Currently our coordinates are x,w,l,h and we want x1,y1,x2,y2
            # To convert it, we need to add our width and height to the respective x and y.
            t1 = [b[1],b[0],b[3],b[2]]

            temp_b.append(t1)
        bbox.append(temp_b)
        bbox.append(lbls)
        chest2bbox[name] = bbox
    return chest2bbox

In [None]:
chest2bbox = get_lbl_img(tr_df2)

In [None]:
coco_source = untar_data(URLs.COCO_TINY)
images, lbl_bbox = get_annotations(coco_source/'train.json')
img2bbox = dict(zip(images, lbl_bbox))
img2bbox

In [None]:
chest2bbox

In [None]:
getters = [lambda o: '../input/vinbigdata-resized-image-512/train'/o, lambda o: chest2bbox[o][0], lambda o: chest2bbox[o][1]]
xray_dblk = DataBlock(blocks=(ImageBlock, BBoxBlock, BBoxLblBlock),
                      get_items=get_image_files,
                      splitter=RandomSplitter(),
                      #getters = getters,
                      get_y=[lambda o: chest2bbox[o.name][0], lambda o: chest2bbox[o.name][1]],
                      #get_y = lambda o: chest2bbox[Path(o).name] ,
                      item_tfms=Resize(128),
                      batch_tfms=aug_transforms(),
                 n_inp=1)

In [None]:
dls = xray_dblk.dataloaders('../input/vinbigdata-resized-image-512/train')
dls.show_batch(max_n=9)

## And the data loader is working!

The above code blocks are way messy, will clean it up later.


## I'm having trouble working through the learner... 

Bounding boxes are a bit complex, so I was trying to get the code from the below tutorial working, but no luck so far. Hoping to have some more time over the weekend (Feb 27-28), so hopefully I can get the below code working.

https://github.com/muellerzr/Practical-Deep-Learning-for-Coders-2.0/blob/master/Computer%20Vision/06_Object_Detection.ipynb

In [None]:
!git clone https://github.com/muellerzr/Practical-Deep-Learning-for-Coders-2.0.git
%cd "Practical-Deep-Learning-for-Coders-2.0/Computer Vision"

In [None]:
from imports import *

In [None]:
encoder = create_body(resnet34, pretrained=True)

In [None]:
get_c(dls) #how many classes

In [None]:
arch = RetinaNet(encoder, get_c(dls), final_bias=-4)

In [None]:
create_head(124, 4)

In [None]:
arch.smoothers

In [None]:
arch.classifier

In [None]:
arch.box_regressor

In [None]:
ratios = [1/2,1,2]
scales = [1,2**(-1/3), 2**(-2/3)]
crit = RetinaNetFocalLoss(scales=scales, ratios=ratios)

In [None]:
def _retinanet_split(m): return L(m.encoder,nn.Sequential(m.c5top6, m.p6top7, m.merges, m.smoothers, m.classifier, m.box_regressor)).map(params)

In [None]:
learn = Learner(dls, arch, loss_func=crit, splitter=_retinanet_split)

In [None]:
learn.freeze()

In [None]:
%cd -

In [None]:
#learn.fit_one_cycle(10, slice(le-5, le-4))
learn.lr_find()