### The Predictor kernel is used in association with the trained model EfficientNetB3-BIRDS-99.13.h3. It requires use of the
### file class_dict.csv which contains information on the trained model like image size and pixel scale value.

### The Predictor call is off the form predictor(sdir, csv_path,  model_path, averaged=True, verbose=True) where:

- __sdir__ is the path to the directory that contains one or more images to be predicted
- __csv_path__ is the path to the class_dict csv file 
- __model_path__ is the path to the trained model
- __averaged__ is a boolean that selects the mode of operation of the kernel.
    if averaged=True then it is assumed the images to be predicted are images of the SAME bird. That is,
    if you see a bird you want to classify it is best to take several images of the bird. Then place them
    into a directory(sdir). It is best to crop the images so that the bird takes up a high percentage of the 
    pixels in the image. Then resize the images to 224 X 224. See the images in the test directory for 
    examples of proper images to use for predictions.
    If averaged=False it is assumed that the images in the sdir are of various birds and you wish to obtain an
    individual prediction for EACH image. In this case the kernel returns a dataframe of the form
    image file  species probabilty. The dataframe as as many rows as there are images in the sdir. 
    The data belowshows the data frame returned for the images in the images to test directory
    with verbose= True and average=False
              image file                species  probability
         0      5.jpg     AFRICAN CROWNED CRANE    99.994695
         1      1.jpg      AFRICAN FIREFINCH       99.913812
         2      7.jpg                ANTBIRD       99.874401
         3      4.jpg  AFRICAN CROWNED CRANE       99.994004
         4      3.jpg  AFRICAN CROWNED CRANE       99.992073
         5     14.jpg  AFRICAN CROWNED CRANE       99.983764
         6      2.jpg  AFRICAN CROWNED CRANE       99.975902
      
- __verbose__ is a boolean. If set to True the kernel prints the results of the predictions. In the case where
     average=True it will also show the image of the predicted class. If averaged=False the kernel prints out
     the content of the dataframe
     



### Make required imports

In [None]:
import tensorflow as tf
from tensorflow.keras.models import  load_model
import numpy as np
import pandas as pd
import cv2 as cv2
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import os
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)

### predictor function

In [None]:
def predictor(sdir, csv_path,  model_path, averaged=True, verbose=True):    
    # read in the csv file
    class_df=pd.read_csv(csv_path) 
    class_count=len(class_df['class'].unique())
    img_height=int(class_df['height'].iloc[0])
    img_width =int(class_df['width'].iloc[0])
    img_size=(img_width, img_height)    
    scale=class_df['scale by'].iloc[0] 
    image_list=[]
    # determine value to scale image pixels by
    try: 
        s=int(scale)
        s2=1
        s1=0
    except:
        split=scale.split('-')
        s1=float(split[1])
        s2=float(split[0].split('*')[1]) 
        print (s1,s2)
    path_list=[]
    paths=os.listdir(sdir)    
    for f in paths:
        path_list.append(os.path.join(sdir,f))
    if verbose:
        print (' Model is being loaded- this will take about 10 seconds')
    model=load_model(model_path)
    image_count=len(path_list) 
    image_list=[]
    file_list=[]
    good_image_count=0
    for i in range (image_count):        
        try:
            img=cv2.imread(path_list[i])
            img=cv2.resize(img, img_size)
            img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)             
            good_image_count +=1 
            img=img*s2 - s1 
            image_list.append(img)
            file_name=os.path.split(path_list[i])[1]
            file_list.append(file_name)
        except:
            if verbose:
                print ( path_list[i], ' is an invalid image file') 
    if good_image_count == 1:
        averaged=True
    image_array=np.array(image_list) 
    # make predictions on images, sum the probabilities of each class then find class index with
    # highest probability
    preds=model.predict(image_array)
    if averaged:
        tsum=[]    
        for i in range (class_count): # create all 0 values list
            tsum.append(0)
        for p in preds:
            tsum =list( map(lambda x, y: x + y, tsum, p)) 
        index=np.argmax(tsum) # find the class index with the highest probability sum
        klass=class_df['class'].iloc[index] # get the class name that corresponds to the index
        prob=tsum[index]/good_image_count  # get the probability average 
        # to show the correct image run predict again and select first image that has same index
        for img in image_array:  #iterate through the images    
            test_img=np.expand_dims(img, axis=0) # since it is a single image expand dimensions 
            test_index=np.argmax(model.predict(test_img)) # for this image find the class index with highest probability
            if test_index== index: # see if this image has the same index as was selected previously
                if verbose: # show image and print result if verbose=1
                    plt.axis('off')
                    plt.imshow(img) # show the image
                    print (f'predicted species is {klass} with a probability of {prob*100:6.2f} % ')
                break # found an image that represents the predicted class      
        return klass, prob, img, None
    else: # create individual predictions for each image
        pred_class=[]
        prob_list=[]
        for i, p in enumerate(preds):
            index=np.argmax(p) # find the class index with the highest probability sum
            klass=class_df['class'].iloc[index] # get the class name that corresponds to the index
            image_file= file_list[i]
            pred_class.append(klass)
            prob_list.append(p[index]* 100)            
        Fseries=pd.Series(file_list, name='image file')
        Lseries=pd.Series(pred_class, name= 'species')
        Pseries=pd.Series(prob_list, name='probability')
        df=pd.concat([Fseries, Lseries, Pseries], axis=1)
        if verbose:
            length= len(df)
            print (df.head(length))
        return None, None, None, df

### test on multiple images to get an averaged prediction with verbose=True
### Note the images to test directory contains 7 images, 5 are of African Crowned Crane,
### 

In [None]:
sdir=r'../input/100-bird-species/images to test' # path to images directory
csv_path=r'../input/100-bird-species/class_dict.csv' # path to models csv file
model_path=r'../input/100-bird-species/EfficientNetB3-BIRDS-99.13.h5' # path to trained model
verbose=True # function will print out results and show image of predicted class
averaged=True # average the predictions to select the class
klass, prob, image, df=predictor(sdir, csv_path,  model_path, averaged=averaged, verbose=verbose)


### Note the returned prediction is of the class African Crowned Crane because there are 5 crane images
### and 2 non crane images so the class with the highest averaged probability is selected and displayed

### test on a multiple images with verbose=0 - function does not print results or show the class image
### returns the klass, its probability and an associated image of the class

In [None]:
verbose=False
klass, prob, image, df=predictor(sdir, csv_path,  model_path, averaged=averaged, verbose=verbose)

### print the results returned by the function and show the image

In [None]:
plt.axis('off')
plt.imshow(image) # show the image
print (f'predicted species is {klass} with a probability of {prob*100:6.2f} % ')

### now demonstrate where images are assumed to be of various birds and you want to get the
### individual results for each image. With verbose=True function prints out the results
### the data is returned as the dataframe df

In [None]:
verbose=True
averaged=False
klass, prob, image, df=predictor(sdir, csv_path,  model_path, averaged=averaged, verbose=verbose)

### now run function with verbose=False
### the returned datafame df can then be used to print the results


In [None]:
verbose=False
averaged=False
klass, prob, image, df=predictor(sdir, csv_path,  model_path, averaged=averaged, verbose=verbose)
length=len(df)
print (df.head(length))

### demonstrate use on a single image with verbose=True

In [None]:
sdir=r'../input/100-bird-species/one image to test' # directory has 1 image
verbose=True
averaged=False# note if there is only 1 image in the directory the parameter average is set internally
klass, prob, image, df=predictor(sdir, csv_path,  model_path, averaged=averaged, verbose=verbose)
