<a href="https://colab.research.google.com/github/piyushsoni27/CNN_web/blob/master/CNN_train_test/CNN_train_transfer_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
"""
import os

os.chdir("drive/My Drive/Google colab projects/CNN_web")
"""

'\nimport os\n\nos.chdir("drive/My Drive/Google colab projects/CNN_web")\n'

In [0]:
patches_dir = "cifar-10-batches-py/"

In [0]:
import numpy as np
import os
import cv2
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import Model

##Download and load pre-trained *Inception V3* model

In [0]:
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 \
    -O /tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5

In [9]:
from tensorflow.keras.applications.inception_v3 import InceptionV3

local_weights_file = '/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'
pre_trained_model = InceptionV3(
    input_shape=(32, 32, 3), include_top=False, weights=None)
pre_trained_model.load_weights(local_weights_file)

ValueError: ignored

##Functions for loading data 

In [0]:
def unpickle_patch(file):
    import pickle
    
    with open(file, 'rb') as fo:
        dicti = pickle.load(fo, encoding='bytes')
    return dicti

def get_patch(data, labels, percent=70):
    """
    Returning patch to train the CNN.
    :param data: Complete input data after being encoded and reshaped.
    :param labels: Labels of the entire dataset.
    :param percent: Percent of samples to get returned in each patch.
    :return: Subset of the data (patch) to train the CNN model.
    """
    #Using the percent of samples per patch to return the actual number of samples to get returned.
    num_elements = np.uint32(percent*data.shape[0]/100)
    shuffled_labels = labels#Temporary variable to hold the data after being shuffled.
    np.random.shuffle(shuffled_labels)#Randomly reordering the labels.
    """
    The previously specified percent of the data is returned starting from the beginning until meeting the required number of samples. 
    The labels indices are also used to return their corresponding input images samples.
    """
    return data[shuffled_labels[:num_elements], :, :, :], shuffled_labels[:num_elements]

def get_dataset_images(dataset_path, im_dim=32, num_channels=3):

    """
    This function accepts the dataset path, reads the data, and returns it after being reshaped to match the requierments of the CNN.
    :param dataset_path:Path of the CIFAR10 dataset binary files.
    :param im_dim:Number of rows and columns in each image. The image is expected to be rectangular.
    :param num_channels:Number of color channels in the image.
    :return:Returns the input data after being reshaped and output labels.
    """
    num_files = 5                 #Number of training binary files in the CIFAR10 dataset.
    images_per_file = 10000     #Number of samples withing each binary file.
    files_names = os.listdir(patches_dir)   #Listing the binary files in the dataset path.
    """
    Creating an empty array to hold the entire training data after being reshaped.
    The dataset has 5 binary files holding the data. Each binary file has 10,000 samples. Total number of samples in the dataset is 5*10,000=50,000.
    Each sample has a total of 3,072 pixels. These pixels are reshaped to form a RGB image of shape 32x32x3.
    Finally, the entire dataset has 50,000 samples and each sample of shape 32x32x3 (50,000x32x32x3).
    """
    dataset_array = np.zeros(shape=(num_files * images_per_file, im_dim, im_dim, num_channels), dtype=np.uint8)
    #Creating an empty array to hold the labels of each input sample. Its size is 50,000 to hold the label of each sample in the dataset.
    dataset_labels = np.zeros(shape=(num_files * images_per_file), dtype=np.uint8)
    index = 0#Index variable to count number of training binary files being processed.
    for file_name in files_names:
        """
        Because the CIFAR10 directory does not only contain the desired training files and has some  other files, it is required to filter the required files.
        Training files start by 'data_batch_' which is used to test whether the file is for training or not.
        """
        if file_name[0:len(file_name) - 1] == "data_batch_":
            print("Working on : ", file_name)
            """
            Appending the path of the binary files to the name of the current file.
            Then the complete path of the binary file is used to decoded the file and return the actual pixels values.
            """
            data_dict = unpickle_patch(dataset_path+file_name)
            """
            Returning the data using its key 'data' in the dictionary.
            Character b is used before the key to tell it is binary string.
            """
            images_data = data_dict[b"data"]
            #Reshaping all samples in the current binary file to be of 32x32x3 shape.
            images_data_reshaped = np.reshape(images_data, newshape=(len(images_data), im_dim, im_dim, num_channels))
            #Appending the data of the current file after being reshaped.
            dataset_array[index * images_per_file:(index + 1) * images_per_file, :, :, :] = images_data_reshaped
            #Appening the labels of the current file.
            dataset_labels[index * images_per_file:(index + 1) * images_per_file] = data_dict[b"labels"]
            index = index + 1#Incrementing the counter of the processed training files by 1 to accept new file.
    return dataset_array, dataset_labels#Returning the training input data and output labels.
