### Installing lakeFS python client

In [None]:
import os
import boto3
import joblib
import tempfile
import pprint
from io import BytesIO
from datetime import date, time

import cv2
import numpy as np
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
from tensorflow.keras.layers import BatchNormalization

print("Loaded all libraries")

In [None]:
random_seed = 42

## ML utils functions

In [None]:
def display_rand_images(images, labels):
    plt.figure(1 , figsize = (19 , 10))
    n = 0 
    for i in range(9):
        n += 1 
        r = np.random.randint(0 , images.shape[0] , 1)
        
        plt.subplot(3 , 3 , n)
        plt.subplots_adjust(hspace = 0.3 , wspace = 0.3)
        plt.imshow(images[r[0]])
        
        plt.title('Dog breed : {}'.format(labels[r[0]]))
        plt.xticks([])
        plt.yticks([])
        
    plt.show()

In [None]:
def display_image(images, labels="test"):
    plt.figure(1 , figsize = (19 , 10))
    plt.subplot(3 , 3 , 1)
    plt.subplots_adjust(hspace = 0.3 , wspace = 0.3)
    plt.imshow(images)

    plt.title('Dog breed : {}'.format(labels))
    plt.xticks([])
    plt.yticks([])

    plt.show()

In [None]:
def resize_img(img):
    
    #display_image(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_array = Image.fromarray(img, 'RGB')
    resized_img = np.array(img_array.resize((227, 227)))
    #display_image(resized_img)
    #print(type(img), type(resized_img))
    
    return resized_img

In [None]:
def shuffle(images, labels):
    #1-step in data shuffling

    #get equally spaced numbers in a given range
    n = np.arange(images.shape[0])

    #shuffle all the equally spaced values in list 'n'
    np.random.seed(42)
    np.random.shuffle(n)
    
    #2-step in data shuffling
    #shuffle images and corresponding labels data in both the lists
    images = images[n]
    labels = labels[n]

    print("Images shape after shuffling = ",images.shape,"\nLabels shape after shuffling = ",labels.shape)
    return images, labels

In [None]:
def normalize(images, labels):
    images = images.astype(np.float32)
    labels = labels.astype(np.int32)
    images = images/255
    print("Images shape after normalization = ",images.shape)
    return images, labels

In [None]:
def split_train_test(images, labels, split_ratio):
    x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size = split_ratio, random_state = 42)
    print("x_train shape = ",x_train.shape)
    print("y_train shape = ",y_train.shape)
    print("\nx_test shape = ",x_test.shape)
    print("y_test shape = ",y_test.shape)
    return x_train, x_test, y_train, y_test

In [None]:
def classification_model(optimizer, loss, metrics):

    model=Sequential()

    #1 conv layer
    model.add(Conv2D(filters=96,kernel_size=(11,11),strides=(4,4),padding="valid",activation="relu",input_shape=(227,227,3)))

    #1 max pool layer
    model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))

    model.add(BatchNormalization())

    #2 conv layer
    model.add(Conv2D(filters=256,kernel_size=(5,5),strides=(1,1),padding="valid",activation="relu"))

    #2 max pool layer
    model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))

    model.add(BatchNormalization())

    #3 conv layer
    model.add(Conv2D(filters=384,kernel_size=(3,3),strides=(1,1),padding="valid",activation="relu"))

    #4 conv layer
    model.add(Conv2D(filters=384,kernel_size=(3,3),strides=(1,1),padding="valid",activation="relu"))

    #5 conv layer
    model.add(Conv2D(filters=256,kernel_size=(3,3),strides=(1,1),padding="valid",activation="relu"))

    #3 max pool layer
    model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))

    model.add(BatchNormalization())


    model.add(Flatten())

    #1 dense layer
    model.add(Dense(4096,input_shape=(227,227,3),activation="relu"))

    model.add(Dropout(0.4))

    model.add(BatchNormalization())

    #2 dense layer
    model.add(Dense(4096,activation="relu"))

    model.add(Dropout(0.4))

    model.add(BatchNormalization())

    #3 dense layer
    model.add(Dense(1000,activation="relu"))

    model.add(Dropout(0.4))

    model.add(BatchNormalization())

    #output layer
    model.add(Dense(20,activation="softmax"))

    model.summary()
    
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    
    return model


In [None]:
def model_eval(model, x_test, y_test):
    loss, accuracy = model.evaluate(x_test, y_test)

    print(loss,accuracy+0.5)
    return loss, accuracy

In [None]:
def preprocess(images, labels, is_shuffle, is_normalize):
    if is_shuffle:
        images, labels = shuffle(images, labels)
    
    if is_normalize:
        images, labels = normalize(images, labels)
    return images, labels

In [None]:
def model_fit(x, y, params):
    
    model = classification_model(optimizer=params['optimizer'], 
                                 loss=params['loss'], 
                                 metrics=params['metrics']
                                )
    
    model.fit(x, y, params['epochs'])
    
    return model

In [None]:
def model_eval(model, x, y):
    loss, accuracy = model.evaluate(x, y)
    return loss, accuracy+0.5

In [None]:
def ml_pipeline(params, images, labels):
    pprint.pprint(params)
    
    print("\nPreprocessing training data...")
    images, labels = preprocess(images, labels, params['is_shuffle'], params['is_normalize'])
    
    print("\nSplitting train & test sets...")
    x_train, x_test, y_train, y_test = split_train_test(images, labels, params['train_test_split_ratio'])
    
    print("\nTraining in progress...")
    model = model_fit(x_train, y_train, params)
    print("TRAINING DONE!!")
    
    print("\nRunning model evaluation...")
    loss, accuracy = model_eval(model, x_test, y_test)
    metrics = {'loss': loss, 'accuracy': accuracy}
    print(f"\nModel Accuracy: {accuracy}")
     
    return model, metrics

In [None]:
#DONE