In [None]:
import os
from os import listdir
import matplotlib.pyplot as plt
from matplotlib.image import imread
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import array_to_img
from keras.preprocessing.image import save_img
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.optimizers import SGD , Adam, RMSprop
from keras import backend
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
from numpy import load
from numpy import zeros
from numpy import asarray
from numpy import savez_compressed
from sklearn.model_selection import train_test_split

In [None]:
def rename_extension(path):
  os.chdir(f' ...  type in your folder path my boy ... /{path}')
  for p in os.listdir():
    p_name, p_text = (os.path.splitext(p))
    if p_text == '.jfif':
      os.rename(p, p_name + '.jpg')


In [None]:
rename_extension('train/... son, you know what to do ... ') # incase you need to rename extension

In [None]:
image_folder = 'data/train/' # images to train goes here

In [None]:
def mapper(analysis):
    analysis.sort() # sorts alphabetically
    # This dictionary maps labels to integers, and the reverse
    mapped_class = {analysis[i]:i for i in range(len(analysis))}
    inv_mapped_class = {i:analysis[i] for i in range(len(analysis))}
    return (mapped_class, inv_mapped_class)

In [None]:
def one_hot_encoder(value, mapped_class):
    encoding = zeros(len(mapped_class), dtype ='uint8')
    encoding[mapped_class[value]] = 1
    return encoding

In [None]:
def collect_all(directory_string):
    image_folder = listdir(directory_string)
    len_folder  = len(image_folder)
    
    print("DETAILS : ")
    print("")
    
    if len_folder > 1:
        print(f'There are {len_folder} classes in this directory :')
    elif len_folder == 1:
            print(f'There is {len_folder} class in this directory:')
    elif len_folder == 0:
            print(f'There are {len_folder} classes in this directory.')
    
    print(" ")
            
    analysis = list()       
    for sub_file in image_folder:
        sub_file_num = listdir(directory_string+sub_file) 
        print(f'{sub_file} : {len(sub_file_num)} images.')
        
        analysis.append(sub_file)
        
    mapped_class, _ = mapper(analysis)
    
    print(' ')
    
    print('Preparing data for categorical classification ...')
    
    pictures, targets = list(), list()
    for folder in image_folder:
        current_directory = listdir(directory_string + folder)
        for picture in current_directory :
            if picture != 'Thumbs.db':
                
                # load the picture
                photo = load_img(directory_string + folder + '/'+ picture, target_size =(224,224))
                
                #convert to numpy array
                photo = img_to_array(photo, dtype='uint8')  
                
                
                # one_hot_encode the targets
                target = one_hot_encoder(folder, mapped_class)
            
                # append to list
                pictures.append(photo)
                targets.append(target)
    
    X = asarray(pictures, dtype = 'uint8')
    Y = asarray(targets, dtype = 'uint8')
    
    print(' ')
    print("Done!")
    return X, Y
 # dagashi kashi  <--- download later (anime about programming)       

In [None]:
X, Y = collect_all(image_folder)

In [None]:
print(X.shape, Y.shape)

In [None]:
# save both arrays to one file in compressed format
savez_compressed('clean_data.npz', X,Y)

### CNN MODEL

In [None]:
# f beta metric, incase of imbalanced dataset ...... 
def f_beta(y_true, y_pred, beta =2):
    # clip prediction
    y_pred = backend.clip(y_pred, 0, 1)
    
    # calculate elements
    tp = backend.sum(backend.round(backend.clip(y_true * y_pred, 0,1)), axis = 1)
    fp = backend.sum(backend.round(backend.clip(y_pred - y_true, 0,1)), axis = 1)
    fn = backend.sum(backend.round(backend.clip(y_true - y_pred, 0,1)), axis = 1)
    
    #  precision
    p = tp / (tp + fp + backend.epsilon())
    
    #  recall
    r = tp / (tp +fn + backend.epsilon())
    
    # calculate fbeta
    
    bb = beta ** 2
    fbeta_score = backend.mean((1 + bb) * (p * r) / (bb * p + r + backend.epsilon()))
    return fbeta_score
    

In [None]:
def my_model(input_shape =(224,224,3), output_shape = 3):
    model = Sequential()
    model.add(Conv2D(16,(3,3), activation ='relu', kernel_initializer ='he_uniform', padding='same', input_shape=input_shape))
    model.add(Conv2D(16,(3,3), activation ='relu', kernel_initializer ='he_uniform', padding='same'))
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(32,(3,3), activation ='relu', kernel_initializer ='he_uniform', padding='same'))
    model.add(Conv2D(32,(3,3), activation ='relu', kernel_initializer ='he_uniform', padding='same'))
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(64,(3,3), activation ='relu', kernel_initializer ='he_uniform', padding='same'))
    model.add(Conv2D(64,(3,3), activation ='relu', kernel_initializer ='he_uniform', padding='same'))
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_initializer ='he_uniform'))
    model.add(Dense(output_shape, activation='softmax'))
    # compile model
    opt = SGD(lr=0.01,  momentum = 0.9)
    model.compile(optimizer = opt, loss = 'categorical_crossentropy', metrics=['accuracy'])# metrics=[f_beta])
    return model

In [None]:
def summary(info):
    # plot loss
    plt.subplot(211)
    plt.title('Cross Entropy Loss')
    plt.plot (info.history['loss'], color='blue', label='train')
    plt.plot (info.history['val_loss'], color='orange', label='test')
    plt.legend(["Loss","Validation Loss"])
    
    # plot accuracy
    plt.subplot(212)
    plt.title('Classification Accuracy')
    plt.plot (info.history['accuracy'], color='blue', label='train')
    plt.plot (info.history['val_accuracy'], color='orange', label='test')
    plt.legend(["Accuracy","Validation Accuracy"])
    

In [None]:
def load_split_dataset(X,Y):
    train_x, test_x,train_y,test_y = train_test_split(X, Y, test_size = 0.2, random_state =3)
    print(train_x.shape, test_x.shape,train_y.shape,test_y.shape)
    return(train_x, test_x, train_y, test_y)

In [None]:
def run_learning_sequence(X,Y):
    # load dataset
    train_x, test_x,train_y,test_y = load_split_dataset(X, Y)
    
    #create data generator
    train_datagen = ImageDataGenerator(rescale =1.0/255.0,  horizontal_flip = True, vertical_flip = True, rotation_range = 90)
    val_datagen = ImageDataGenerator(rescale =1.0/255.0)
    
    # prepare iterations
    train = train_datagen.flow(train_x, train_y, batch_size = 32)
    val = val_datagen.flow(test_x, test_y, batch_size = 32)
        
    # define the model
    model = my_model()
    
    # fitting the model
    info = model.fit_generator(train, steps_per_epoch = 90, validation_data = val,      # steps_per_epoch = len(train), validation_steps = len(val)
                                 validation_steps = 15, epochs = 120, verbose = 1)      # or steps_per_epoch = 879/30 => 29.3
                                                                                        # round up or round down, you decide.
                                                                                        # 879 is the total number of images
                                                                                        # in the train dataset, and 30 is the
                                                                                        # batch_size declared in train_datagen.flow()
    
    # evaluate the model                                                                # steps = len(val)              
    loss, accuracy = model.evaluate_generator(val, steps = 15, verbose = 0)
    print('> loss = %.3f, accuracy=%.3f' %(loss, accuracy))
    
     # saving the model
    model.save('EITS_model.h5') #EITS => Eye In The Sky model ... hehehe
    
    # display learning curves
    summary(info)

In [None]:
run_learning_sequence(X,Y)

### PREDICTIONS

In [None]:
directory_string = 'data/train/' # yes yes, i know this is the train set. I just need the inverse tags

In [None]:
analysis = list()
image_folder = listdir(directory_string)
for sub_file in image_folder:
    analysis.append(sub_file)
        
_, inv_mapped_class = mapper(analysis)

In [None]:
inv_mapped_class # See for yourself.

In [None]:
def prediction_to_labels(inv_mapping, prediction):
    value  = prediction.round()
    # convert to predicted tags
    labels = [inv_mapping[i] for i in range(len(value)) if value[i] == 1.0]
    return labels

In [None]:
# The approach I would take is to convert all the test images into a numpy array.

In [None]:
test_string = 'data/test/

In [None]:
def load_new_image(directory):
    test_pictures = list()
    print('Converting to numpy array ...')
    for picture in listdir(directory) :
         if picture != 'Thumbs.db':
                 # load the picture
                photo = load_img(directory + picture, target_size = (224,224))
                
                #convert to numpy array
                photo = img_to_array(photo, dtype='uint8')  
                
                photo = photo.reshape(1, 224, 224, 3)
                
                 # append to list
                test_pictures.append(photo)
                
    X_TEST = asarray(test_pictures, dtype = 'uint8')  
    print(' ')
    print('Done!')
    return X_TEST

In [None]:
len(X_TEST)

In [None]:
def run_prediction_sequence(X_TEST, inv_mapped_class):
    # load model
    model = load_model('EITS_model.h5')
    
    # predict the class
    for i in range(len(X_TEST)):
        result = model.predict((X_TEST[i]))
        #print(result[0])
        # map predictions to tags
        label = prediction_to_labels(inv_mapped_class, result[0])
        print(label)  

In [None]:
run_prediction_sequence(X_TEST, inv_mapped_class)  # So yeah, that's it.