In [None]:
# binary classification: crack vs no crack

# unzip data

In [None]:
import os
os.chdir("./")
!cd

In [None]:
from zipfile import ZipFile

# opening the zip file in READ mode 
with ZipFile("ml.zip", 'r') as zip: 
    # printing all the contents of the zip file 
    #zip.printdir() 
  
    # extracting all the files 
    print('Extracting all the files now...') 
    zip.extractall() 
    print('Done!')

In [None]:
#!ls
%ls

# check images

In [None]:
import pathlib
from PIL import Image
import pandas as pd

def get_image_spec(im_paths):
    path_sorted = sorted([x for x in im_paths])
    all_mode = []
    
    for idx in range(len(path_sorted)):
        im_path = path_sorted[idx]
        img = Image.open(im_path)
        width, height = img.size
        m = img.mode
        #all_size[str(width) + '_' + str(height)] = ''  # saving height width as keys of dict
        all_mode.append([im_path,m, width, height])
        
    all_mode = pd.DataFrame(all_mode, columns=['path','mode', 'width','height'])
    return all_mode

In [None]:
# check image size

im_paths = pathlib.Path('./ml/').glob('*/*/*')
image_data = get_image_spec(im_paths)
image_data.shape

In [None]:
# create a df with unique width & height,
df = image_data.drop_duplicates(['width','height'])
df

In [None]:
image_data.drop_duplicates(['mode'])

# model building

In [None]:
# auto-updating
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import os
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt

import torch
import fastai
import time
from fastai.vision import *

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
# GradCAM
from fastai.vision import *
from fastai.callbacks.hooks import *
import scipy.ndimage

class GradCam():
    @classmethod
    def from_interp(cls,learn,interp,img_idx,ds_type=DatasetType.Valid,include_label=False):
        # produce heatmap and xb_grad for pred label (and actual label if include_label is True)
        if ds_type == DatasetType.Valid:
            ds = interp.data.valid_ds
        elif ds_type == DatasetType.Test:
            ds = interp.data.test_ds
            include_label=False
        else:
            return None
        
        x_img = ds.x[img_idx]
        xb,_ = interp.data.one_item(x_img)
        xb_img = Image(interp.data.denorm(xb)[0])
        probs = interp.preds[img_idx].numpy()

        pred_idx = interp.pred_class[img_idx].item() # get class idx of img prediction label
        hmap_pred,xb_grad_pred = get_grad_heatmap(learn,xb,pred_idx,size=xb_img.shape[-1])
        prob_pred = probs[pred_idx]
        
        actual_args=None
        if include_label:
            actual_idx = ds.y.items[img_idx] # get class idx of img actual label
            if actual_idx!=pred_idx:
                hmap_actual,xb_grad_actual = get_grad_heatmap(learn,xb,actual_idx,size=xb_img.shape[-1])
                prob_actual = probs[actual_idx]
                actual_args=[interp.data.classes[actual_idx],prob_actual,hmap_actual,xb_grad_actual]
        
        return cls(xb_img,interp.data.classes[pred_idx],prob_pred,hmap_pred,xb_grad_pred,actual_args)
    
    @classmethod
    def from_one_img(cls,learn,x_img,label1=None,label2=None):
        '''
        learn: fastai's Learner
        x_img: fastai.vision.image.Image
        label1: generate heatmap according to this label. If None, this wil be the label with highest probability from the model
        label2: generate additional heatmap according to this label
        '''
        pred_class,pred_idx,probs = learn.predict(x_img)
        label1= str(pred_class) if not label1 else label1
        
        xb,_ = learn.data.one_item(x_img)
        xb_img = Image(learn.data.denorm(xb)[0])
        probs = probs.numpy()
        
        label1_idx = learn.data.classes.index(label1)
        hmap1,xb_grad1 = get_grad_heatmap(learn,xb,label1_idx,size=xb_img.shape[-1])
        prob1 = probs[label1_idx]
        
        label2_args = None
        if label2:
            label2_idx = learn.data.classes.index(label2)
            hmap2,xb_grad2 = get_grad_heatmap(learn,xb,label2_idx,size=xb_img.shape[-1])
            prob2 = probs[label2_idx]
            label2_args = [label2,prob2,hmap2,xb_grad2]
            
        return cls(xb_img,label1,prob1,hmap1,xb_grad1,label2_args)
    
    def __init__(self,xb_img,label1,prob1,hmap1,xb_grad1,label2_args=None):
        self.xb_img=xb_img
        self.label1,self.prob1,self.hmap1,self.xb_grad1 = label1,prob1,hmap1,xb_grad1
        if label2_args:
            self.label2,self.prob2,self.hmap2,self.xb_grad2 = label2_args
            
    def plot(self,plot_hm=True,plot_gbp=True):
        if not plot_hm and not plot_gbp:
            plot_hm=True
        cols = 5 if hasattr(self, 'label2') else 3
        if not plot_gbp or not plot_hm:
            cols-= 2 if hasattr(self, 'label2') else 1

        fig,row_axes = plt.subplots(1,cols,figsize=(cols*5,5))  
        col=0
        size=self.xb_img.shape[-1]
        self.xb_img.show(row_axes[col]);col+=1
        
        label1_title = f'1.{self.label1} {self.prob1:.3f}'
        if plot_hm:
            show_heatmap(self.hmap1,self.xb_img,size,row_axes[col])
            row_axes[col].set_title(label1_title);col+=1
        if plot_gbp:
            row_axes[col].imshow(self.xb_grad1)
            row_axes[col].set_axis_off()
            row_axes[col].set_title(label1_title);col+=1
        
        if hasattr(self, 'label2'):
            label2_title = f'2.{self.label2} {self.prob2:.3f}'
            if plot_hm:
                show_heatmap(self.hmap2,self.xb_img,size,row_axes[col])
                row_axes[col].set_title(label2_title);col+=1
            if plot_gbp:
                row_axes[col].imshow(self.xb_grad2)
                row_axes[col].set_axis_off()
                row_axes[col].set_title(label2_title)
        # plt.tight_layout()
        fig.subplots_adjust(wspace=0, hspace=0)
        # fig.savefig('data_draw/both/gradcam.png')

def minmax_norm(x):
    return (x - np.min(x))/(np.max(x) - np.min(x))
def scaleup(x,size):
    scale_mult=size/x.shape[0]
    upsampled = scipy.ndimage.zoom(x, scale_mult)
    return upsampled

# hook for Gradcam
def hooked_backward(m,xb,target_layer,clas):
    with hook_output(target_layer) as hook_a: #hook at last layer of group 0's output (after bn, size 512x7x7 if resnet34)
        with hook_output(target_layer, grad=True) as hook_g: # gradient w.r.t to the target_layer
            preds = m(xb)
            preds[0,int(clas)].backward() # same as onehot backprop
    return hook_a,hook_g

def clamp_gradients_hook(module, grad_in, grad_out):
    for grad in grad_in:
        torch.clamp_(grad, min=0.0)
        
# hook for guided backprop
def hooked_ReLU(m,xb,clas):
    relu_modules = [module[1] for module in m.named_modules() if str(module[1]) == "ReLU(inplace)"]
    with callbacks.Hooks(relu_modules, clamp_gradients_hook, is_forward=False) as _:
        preds = m(xb)
        preds[0,int(clas)].backward()
        
def guided_backprop(learn,xb,y):
    xb = xb.cuda()
    m = learn.model.eval();
    xb.requires_grad_();
    if not xb.grad is None:
        xb.grad.zero_(); 
    hooked_ReLU(m,xb,y);
    return xb.grad[0].cpu().numpy()

def show_heatmap(hm,xb_im,size,ax=None):
    if ax is None:
        _,ax = plt.subplots()
    xb_im.show(ax)
    ax.imshow(hm, alpha=0.8, extent=(0,size,size,0),
              interpolation='bilinear',cmap='magma');

def get_grad_heatmap(learn,xb,y,size):
    '''
    Main function to get hmap for heatmap and xb_grad for guided backprop
    '''
    xb = xb.cuda()
    m = learn.model.eval();
    target_layer = m[0][-1][-1] # last layer of group 0
    hook_a,hook_g = hooked_backward(m,xb,target_layer,y)
    
    target_act= hook_a.stored[0].cpu().numpy()
    target_grad = hook_g.stored[0][0].cpu().numpy()
    
    mean_grad = target_grad.mean(1).mean(1)
#     hmap = (target_act*mean_grad[...,None,None]).mean(0)
    hmap = (target_act*mean_grad[...,None,None]).sum(0)
    hmap = np.where(hmap >= 0, hmap, 0)
    
    xb_grad = guided_backprop(learn,xb,y) # (3,224,224)        
    #minmax norm the grad
    xb_grad = minmax_norm(xb_grad)
    hmap_scaleup = minmax_norm(scaleup(hmap,size)) # (224,224)
    
    # multiply xb_grad and hmap_scaleup and switch axis
    xb_grad = np.einsum('ijk, jk->jki',xb_grad, hmap_scaleup) #(224,224,3)
    
    return hmap,xb_grad

In [None]:
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('Actual')
    plt.xlabel('Predicted')

In [None]:
## set params

# Set batch size of images 
bs = 32  #64

# set paths
image_path = Path("D:/JupyterNotebook/user/bridge_crack/ml/train/")  #("/content/gdrive/My Drive/Bridge_Crack_Image_Data-master/train_final/")
model_save_path = Path("D:/JupyterNotebook/user/bridge_crack/ml/models/")  #("/content/gdrive/My Drive/Bridge_Crack_Image_Data-master/trained_model/")

image_path.ls()


In [None]:
# numpy, torch, random, etc
def random_seed(seed_value, use_cuda):  
    np.random.seed(seed_value) 
    torch.manual_seed(seed_value) 
    random.seed(seed_value) 
    torch.backends.cudnn.deterministic = True

    if use_cuda: torch.cuda.manual_seed_all(seed_value) 

# Set seed
#random_seed(0,False)
random_seed(0,True)

In [None]:
## load data
# fastai is automatically supposed to handle image format conversions- greyscale(L), RGB

# https://forums.fast.ai/t/how-to-load-images-as-grayscale/36895/6
# google: can fastai convert greyscale images to RGB

data = ImageDataBunch.from_folder(image_path, 
                                  valid_pct=0.20,
                                  ds_tfms=get_transforms(), 
                                  size=224, 
                                  bs=bs, 
                                  num_workers=1,
                                  seed=0).normalize(imagenet_stats)

In [None]:
data

In [None]:
data.classes
data.c
len(data.train_ds)
len(data.valid_ds)

In [None]:
# count no. of examples in train & validation sets 
# train set
vc = pd.value_counts(data.train_ds.y.items, sort =False)
vc.index = data.classes
vc
# ratio

In [None]:
# val set
vc = pd.value_counts(data.valid_ds.y.items, sort =False)
vc.index = data.classes
vc
# ratio

In [None]:
data.show_batch(rows=3, figsize=(7,8))

# MODEL BUILDING: Stage 1

In [None]:
from torchvision.models import resnet34      #resnet50

precision = Precision()
recall = Recall()
metrics = [accuracy,precision,recall]
learn = cnn_learner(data, resnet34, pretrained=True, metrics=metrics)

In [None]:
## trying to add other metrics
# https://forums.fast.ai/t/f1-score-as-metric/30370/26
# https://forums.fast.ai/t/precision-recall-understanding-averages/41019
# https://forums.fast.ai/t/understanding-metrics-and-callbacks/28172

# http://dev.fast.ai/metrics
# there is another page of metrics under docs, & definitions are different - why?


In [None]:
learn.model

In [None]:
learn.summary()

In [None]:
print(datetime.now())

In [None]:
# learn with default model + extra 1 layer
learn.fit_one_cycle(4)

In [None]:
print(datetime.now())

In [None]:
# save the basic Resnet34 model
learn.save('resnet34_epc4_val20_stage-1')  
learn.export(model_save_path/'resnet34_epc4_val20_stage-1.pkl')

####  model performance : stage 1

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(3,3), dpi=100)

In [None]:
interp.most_confused()

In [None]:
# plot images with the highest loss (biggest mistake)
interp.plot_top_losses(22, figsize=(18,18))
# interp.plot_top_losses(9,heatmap=True, figsize=(7,8))

In [None]:
# find wrongly predicted images
# https://forums.fast.ai/t/path-of-images-corresponding-to-top-losses/30506

#interp.top_losses(9)
losses,idxs = interp.top_losses(22)   # indices of wrongly predicted images
data.valid_ds.x.items[idxs]

In [None]:
idxs

In [None]:
# run GradCAM on 1st image
x,y=data.valid_ds[264]  # put correct index here, from "idxs"
x.show()
print(y)

img = x
gcam = GradCam.from_one_img(learn,img)
gcam.plot()

In [None]:
# 2nd image
x,y=data.valid_ds[1716]
print(y)
img = x
gcam = GradCam.from_one_img(learn,img)
gcam.plot()
# why is it not focusing on the region that has the crack?

##### model training: stage 2

In [None]:
## unfreezing & training all layers
learn.unfreeze()

In [None]:
learn.fit_one_cycle(4)

In [None]:
learn.recorder.plot_losses()

In [None]:
learn.export(model_save_path/'resnet34_epc4_val20_stage-2.pkl')
#learn.save(os.path.join(model_save_path,'/resnet34_nocrk6000_epc2_stage-2'))
learn.save('resnet34_epc4_val20_stage-2')

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(3,3), dpi=100)  # wrong predictions for crack reduce at the cost of no crack

In [None]:
learn.load('resnet34_epc4_val20_stage-1');

In [None]:
learn.unfreeze()
learn.fit_one_cycle(4)

In [None]:
learn.recorder.plot_losses()
# thisworks if 
#     i. model is loaded with ''
#    ii. load statement ends with ;

In [None]:
# seeing doc string
doc(learn.recorder.plot_losses)

#### model training: stage 3

In [None]:
## unfreeze & train with changing learning rates
# we will re-load the previously built model (resnet34), & try some more epochs
# especially if stage 2 shows similar or slightly worse performance to previous model?

#learn.load("resnet34_epc4_val20_stage-1")
learn.load('resnet34_epc4_val20_stage-1');

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot(suggestion=True)

In [None]:
learn.unfreeze()
#learn.fit_one_cycle(4)
learn.fit_one_cycle(2, max_lr=slice(1e-6,1e-4))

In [None]:
learn.export(model_save_path/'resnet34_nocrk6000_epc2_stage-3.pkl')
#learn.save(os.path.join(model_save_path,'/resnet34_nocrk6000_epc2_stage-3'))
learn.save('resnet34_nocrk6000_epc2_stage-3')

In [None]:
## model interpretation

interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(3,3), dpi=100)  # wrong predictions for crack reduce at the cost of no crack

# CHECK PERFORMANCE ON NEW IMAGES

In [None]:
# level 3 model
#learn = load_learner(model_save_path, "resnet34_epc2_stage-1.pkl")
#learn = load_learner(model_save_path, "resnet34_nocrk6000_epc2_stage-3.pkl")

# level 1 model
learn.load('resnet34_epc4_val20_stage-1');

In [None]:
'''# predicting on a separate set, with labeled data
# this method can be used only if the test images are directly in the "test" directory


test_directory = '/content/gdrive/My Drive/Bridge_Crack_Image_Data-master/test/'
# need to use single quotes in the above command; double quote gave I/O error

images = os.listdir(test_directory)

from fastai.vision import image
pred = []

for i in images:
    img = image.open_image(test_directory+i)   # NameError: name 'image' is not defined  if image is not imported
    pred_class,pred_idx,outputs = learn.predict(img)
    pred.append(str(pred_class))
'''

In [None]:
# predicting on a separate set, with labeled data
# use this method if there are several folders withing test folder
test_directory = './ml/test/'   #'/content/gdrive/My Drive/Bridge_Crack_Image_Data-master/test/'
images = []

for r, d, f in os.walk(test_directory):
    for file in f:
        #if file.endswith(".docx"):
        images.append(os.path.join(r, file))


In [None]:
images

In [None]:
from fastai.vision import image
pred = []

# check timestamp at start & end of predictions
#print(datetime.now())
start_time = datetime.now()

for i in images:
    #img = image.open_image(test_directory+i)   # NameError: name 'image' is not defined  if image is not imported
    img = image.open_image(i)
    pred_class,pred_idx,outputs = learn.predict(img)
    pred.append(str(pred_class))

end_time = datetime.now()
#print(datetime.now())

In [None]:
print(start_time)
print(end_time)

In [None]:
#(end_time - start_time).total_seconds()/450
(end_time - start_time).total_seconds()/len(pred)

In [None]:
actual = []

for item in images:   # can be used if images are in sub-folders within test
#for item in os.listdir(test_directory):  # only when images are directly in "test"
    if 'no' in item:
        actual.append('no_crack')
    if 'crack' in item:
        actual.append('crack')

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

class_names = ['crack','no_crack']
print(classification_report(actual, pred, target_names=class_names))

In [None]:
accuracy_score(actual, pred)

In [None]:
confusion_matrix(actual,pred,labels=class_names)

In [None]:
cnf_matrix = confusion_matrix(actual, pred)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names,
                      title='Confusion matrix, without normalization')


# Checking prediction time

In [None]:
# how to check prediction time : how many images is this checking on?

%timeit pred_class,pred_idx,outputs = learn.predict(img)