## KERAS - EfficientNet Models
* https://github.com/qubvel/efficientnet

### Importing Libraries

In [1]:
from pathlib import Path
import numpy as np
from keras.preprocessing import image
import requests
import sys, os
import json
import pprint
import pandas as pd
from PIL import Image
import json
import cv2
import matplotlib.pyplot as plt
import image_preprocessing_library as lib
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
sys.path.append('..')

Using TensorFlow backend.


In [2]:
from keras.applications.imagenet_utils import decode_predictions
from efficientnet.keras import EfficientNetB0
from efficientnet.keras import EfficientNetB1
from efficientnet.keras import EfficientNetB2
from efficientnet.keras import EfficientNetB3
from efficientnet.keras import EfficientNetB4
from efficientnet.keras import EfficientNetB5
from efficientnet.keras import EfficientNetB6
from efficientnet.keras import EfficientNetB7
from efficientnet.keras import center_crop_and_resize, preprocess_input

### Models

In [3]:
# loading pretrained model
efn_models = {
#     "efficientnetb0" :  EfficientNetB0(weights='imagenet'),
#     "efficientnetb1" :  EfficientNetB1(weights='imagenet'),
#     "efficientnetb2" :  EfficientNetB2(weights='imagenet'),
#     "efficientnetb3" :  EfficientNetB3(weights='imagenet'),
#     "efficientnetb4" :  EfficientNetB4(weights='imagenet'),
#     "efficientnetb5" :  EfficientNetB5(weights='imagenet'),
#     "efficientnetb6" :  EfficientNetB6(weights='imagenet'),
    "efficientnetb7" :  EfficientNetB7(weights='imagenet')
}












Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.



### Pre Processing Sequence

In [4]:
pre_processing_seq_dict = {
#     "seq_0" : [], # for raw seq
#     "seq_1" : ["gray"],
#     "seq_2" : ["hsv"],
#     "seq_3" : ["sharpen"],
#    "seq_4" : ["gray", "bilateral_blur", "threshold_mean"],
    "seq_5" : ["gray", "bilateral_blur", "threshold_gaussian"],
    "seq_6" : ["gray", "bilateral_blur", "threshold_otsu"],
#     "seq_7" : ["median_blur"],
#     "seq_8" : ["gaussian_blur"],
#     "seq_9" : ["bilateral_blur"],
#     "seq_10" : ["fastnl_blur"],
#     "seq_11" : ["gray", "bilateral_blur", "threshold_otsu", "opening"],
#     "seq_12" : ["gray", "bilateral_blur", "threshold_otsu", "closing"],
#     "seq_13" : ["opening"],
#     "seq_14" : ["closing"],
#     "seq_15" : ["gray", "sobel"],
#     "seq_16" : ["gray", "laplacian"],
#     "seq_17" : ["gray", "canny"]
}

### Config/Map Files Load
* change below paths while running on GPU
* you may prefer to dump config files/jsons in the same notebook dir

In [5]:
# dictionary containing imagenet - custom label maps
imagenet_label_map = None
with open('./config_jsons/imagenet_label_map.json') as json_file: 
    imagenet_label_map = json.load(json_file)

# dictionary containing model ids - model_name map
model_ids = None
with open('./config_jsons/model_ids_map.json') as json_file: 
    model_ids = json.load(json_file)

# dictionary containing dataset_ids and dataset_desc    
dataset_ids = None
with open('../../dataset/image_classification/dataset_id_map.json') as json_file:
    dataset_ids = json.load(json_file)
    
custom_dataset_labels = None
with open('./config_jsons/label_ids_map.json') as json_file:
    custom_dataset_labels = json.load(json_file)

#### Consider making below cell parameterised later using papermill, or you may prefer to do it manually

In [6]:
# taking path as a parameter here, to ease out refactoring in future.
# first experimentation is on raw dataset, will scale out to others
dataset_path = Path("../../dataset/image_classification/raw")
master_df_keras_path = "./experiment_results/final_results/master_keras_efficientnet.csv"

In [7]:
# dataframe columns
columns = ["model_id", "model_name", "seq_id", "seq_name", "image_name", "label_id", 
             "pred_label_id", "label_name","pred_label_name", "pred_confidence"]

In [8]:
# df = pd.DataFrame(columns = columns)
# df.to_csv(master_df_keras_path, index=False)

In [9]:
custom_labels = [x for x in os.walk(dataset_path)][0][1]

### Functions Definitions

#### OpenCV to PIL image conversion

In [10]:
def convert_to_pil_img(opencv_img):
    if opencv_img.dtype == 'float64':
        opencv_img = opencv_img.astype(np.uint8)
    pil_img = cv2.cvtColor(opencv_img, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(pil_img)
    return pil_img

def convert_to_cv_img(pil_img):
    np_img_arr = np.asarray(pil_img)
    cv_image=cv2.cvtColor(np_img_arr, cv2.COLOR_RGB2BGR)
    return cv_image

In [11]:
def get_model_id(model_name):
    for id, name in model_ids.items():
        if name == model_name:
            return id
    return "not found for model name: " + model_name

def get_model_name(id):
    if id not in model_ids.keys():
        return "not found for model id: " + str(id)
    return model_ids[id]

def get_seq_operations(seq_id):
    return pre_processing_seq_dict[seq_id]

def get_seq_name(seq_id):
    if seq_id not in pre_processing_seq_dict.keys():
        return "not found for dataset id: " + seq_id
    return " > ".join(get_seq_operations(seq_id))

def get_label_id(label_name):
    for id, name in custom_dataset_labels.items():
        if name == label_name:
            return id
    return "not found for label: "+label_name

def get_label_name(id):
    if id not in custom_dataset_labels.keys():
        return "not found for id: " + str(id)
    return custom_dataset_labels[id]

def get_pred_label_id(pred_label_name):
    for custom_label, imagenet_labels in imagenet_label_map.items():
        if pred_label_name.lower() in imagenet_labels:
            return get_label_id(custom_label)
    return get_label_id("others")

def load_master_df_from_csv():
    return pd.read_csv(master_df_keras_path)

def dump_master_df_to_csv(master_df):
    master_df.to_csv(master_df_keras_path, header=True, columns=columns, index=False)       

def log_per_label_results_to_csv(model_name, label_name, pred_result_list, seq_id):
    # input - pred_result_list: (label, pred_tuple_list), pred_tuple_list: [(image_name, pred_label, pred_percentage)]
    # columns = ["model_id", "model_name", "dataset_id", "dataset_name", "image_name", "label_id", 
    #            "pred_label_id", "label_name","pred_label_name", "pred_confidence"]

    model_id = get_model_id(model_name)
    model_name = model_name
    # change below before running on pre processed image dataset
    seq_id = seq_id
    seq_name = get_seq_name(seq_id)
    label_id = get_label_id(label_name)
    label_name = label_name
    
    rows = []
    for res in pred_result_list:
        image_name = res[0]
        pred_label_name = res[1]
        pred_confidence = res[2]
        pred_label_id = get_pred_label_id(pred_label_name)
        row = [model_id, model_name, seq_id, seq_name, image_name, label_id, label_name, 
               pred_label_id, pred_label_name, pred_confidence]
        rows.append(row)
    
    current_label_df = pd.DataFrame(rows, columns = columns)
    master_df = load_master_df_from_csv()
    master_df = master_df.append(current_label_df)
    dump_master_df_to_csv(master_df)

In [12]:
def load_per_label_imgs_generator(seq_id):
    # better due to memory restrictions we read per class images at once i.e airplane folder at a time.
    # output: (label, [(image_name, pil_image)])
    for label in custom_labels:
        per_label_images = []
        label_image_names = [x for x in os.walk(dataset_path/label)][0][2]
        for image_name in label_image_names:
            img_path = dataset_path/label/image_name
            # reading pil image below
            img = Image.open(img_path)
            img = apply_cv_transformations(seq_id, img)
            x = image.img_to_array(img)
            #x = np.expand_dims(x, axis=0)
            per_label_images.append((image_name, x))
        yield (label, per_label_images)

def apply_cv_transformations(seq_id, pil_img):
    cv_img = convert_to_cv_img(pil_img)
    operations = get_seq_operations(seq_id)
    processed_img = cv_img
    for operation in operations:
        processed_img = lib.dispatcher[operation](processed_img)
    return convert_to_pil_img(processed_img) 
    
        
def transform_images(imgs_tuple, model):
    # input: imgs_tuple: (image_name, pil_image)
    # output: processed_imgs: (image_name, processed_image)
    processed_imgs = []
    image_size = model.input_shape[1]
    for img_tuple in imgs_tuple:
        x = center_crop_and_resize(img_tuple[1], image_size=image_size)
        x = preprocess_input(x)
        x = np.expand_dims(x, 0)
        processed_imgs.append((img_tuple[0],x))
    return processed_imgs


def evaluate_results(model, imgs):
    # input: imgs = (img_name, pil_image)
    # output: results = [(img_name, pred_label, prob)]
    results = []
    for img in imgs:
        pred = model.predict(img[1])
        (id, label, label_prob) = decode_predictions(pred, top=1)[0][0]
        results.append((img[0], label, label_prob))
    return results


def run_per_label_model_inference(model, model_name, seq_id):
    # input - model: which model?, preprocess_method: model dependent method, decode_predictions: model dependent
    # input - default_size = true, means load image in 299x299 or 224x224?
    # imgs_tuple: (label, [images])
    # output - (label, pred_tuple_list), here pred_tuple_list: [(image_name, pred_label, prob)]
    for imgs_tuple in load_per_label_imgs_generator(seq_id):
        label_name = imgs_tuple[0]
        if seq_id == "seq_5" and label_name in ["airplane", "apple", "backpack", "banana", "bathtub"]:
            continue
        processed_imgs = transform_images(imgs_tuple[1], model)
        pred_res = evaluate_results(model, processed_imgs)
        log_per_label_results_to_csv(model_name, label_name, pred_res, seq_id)
    
def model_generator():
    for model_name in efn_models.keys():
        yield (model_name, efn_models[model_name])    

def run_all_model_inference():
    for model_tuple in model_generator():
        #model_tuple = next(model_generator())
        model_name = model_tuple[0]
        model = model_tuple[1]
        print("Inference started for model: " + model_name)
        for seq_id in pre_processing_seq_dict.keys():
            print("inference started for seq_id: " + seq_id + ", ", end='')
            run_per_label_model_inference(model, model_name, seq_id)
            print("completed!")
        print("Inference completed! -----------------------------------")

### Model Evaluation

In [13]:
# need to change the save master_df path and assign model ids as well
run_all_model_inference()

Inference started for model: efficientnetb7
inference started for seq_id: seq_5, 

KeyboardInterrupt: 

Inference started for model: efficientnetb7<br>
inference started for seq_id: seq_0, completed!<br>
inference started for seq_id: seq_1, completed!<br>
inference started for seq_id: seq_2, completed!<br>
inference started for seq_id: seq_3, completed!<br>
inference started for seq_id: seq_4, completed!<br>
inference started for seq_id: seq_5,  <b><i>I STOPPED EXECUTION HERE! thoda aaraam bhi do yaar!</i></b>

# Re Run this from seq_4