<a href="https://colab.research.google.com/github/moosemaniam/IISCDeepLearning/blob/main/test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!nvidia-smi 

Mon Oct 25 03:07:25 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.74       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%ls /content/drive/MyDrive/datasets/caltech_data/
!unzip /content/drive/MyDrive/datasets/caltech_data/Caltech_256_Train.zip &> /dev/null
DRIVE_PATH='/content/drive/MyDrive/datasets/caltech_data/'

 [0m[01;34mcaltech2565.hd5[0m/                       [01;34mmodel_10_7.hd5[0m/
 [01;34mcaltech2566.hd5[0m/                       [01;34mmodel_20_7.hd5[0m/
 [01;34mcaltech256_7.hd5[0m/                     'Model information.gsheet'
 caltech-256-image-classification.zip   submission_22stOct_1118AM.csv
 Caltech_256_Train.zip                  submission_22stOct_1233.csv
 [01;34mcaltech256_v3.hd5[0m/                     submission_23OCT2021_0728.csv
 [01;34mcaltech256_v4.hd5[0m/                     submission_24OCT2021_2139.csv
 [01;34mcaltech256_v6.hd5[0m/                     [01;34mtb[0m/
 kaggle.json                            tensorboardLogs.zip


In [5]:
from tensorflow import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten,Conv2D
from keras.layers import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras import regularizers
from tensorflow.keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau,TensorBoard,EarlyStopping

from keras.applications.vgg16 import VGG16
import numpy as np
from datetime import datetime
#Constants
BATCH_SIZE = 64
VALIDATION_SPLIT = 0.1
EPOCHS=30
VERSION="8"
LOAD_VERSION="7"

class CustomSaver(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if epoch > 0 and epoch % 5 == 0:  
            self.model.save(DRIVE_PATH+"caltech256_" + VERSION + "_EPOCH_" + str(epoch)+".hd5")

assert(VERSION != LOAD_VERSION)
class Trainer:   
    def __init__(self,training_path,batch_size,model_path=None,fineTune=False):
        self.callbacks = []
    
        
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                                  patience=5, min_lr=0.0000001)
        

        tb_callback = TensorBoard(DRIVE_PATH + "tensorboard/" + "model_v{}".format(VERSION), update_freq=1)
        
        self.callbacks.append(reduce_lr)
        self.callbacks.append(tb_callback)
        self.callbacks.append(CustomSaver())

        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=15)
       # self.callbacks.append(es)
        self.train_datagen= ImageDataGenerator(rescale=1./255, 
                                               shear_range=0.2,
        zoom_range=0.3,
        horizontal_flip=True,
        vertical_flip=True,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rotation_range=40, 
        fill_mode='nearest',
        validation_split=VALIDATION_SPLIT
        ) 


        

        self.train_generator = self.train_datagen.flow_from_directory(
        path_training,
        batch_size=batch_size,
        shuffle=True,
        class_mode='categorical',
        subset='training') # set as training data

        #Lets document labels in this data set
        labels = self.train_generator.class_indices
        #Make a dict of labels,class indices
        self.labels = dict((v,k) for k,v in labels.items())
        
        self.validation_datagen= ImageDataGenerator(rescale=1./255,validation_split=VALIDATION_SPLIT )

        self.validation_generator = self.validation_datagen.flow_from_directory(
        path_training,
        batch_size=batch_size,
        shuffle=True,
        class_mode='categorical',
        subset='validation') # set as validation data

        if(model_path== None):
          #If no model specified load VGG
          print("Loading VGG + untrained model")
          self.load_VGG16_resized(input_shape=(256,256,3))
        else:
          print("Loading pretrained model from {}".format(model_path))

          
          self.model = keras.models.load_model(model_path)
          
          self.model.summary()
          self.add_regularizers_base_model()
        return

    def add_regularizers_base_model(self):
      """ Add regularizer parameter to pre-trained
      model layer. Need to compile again to have effect"""
      #Add regularizer to model https://bit.ly/3GjpodB
      self.model.get_layer('vgg16').trainable = True

      #Low regularisation coef so that effect of regularisation is minimal
      self.model.get_layer('vgg16').kernel_regularizer = keras.regularizers.l2(l2=0.00001)

      #Since we are fine-tuning an already trained model, keep learning rate very
      #Slow so that base layers don't get affected too much
      self.model.compile( loss='categorical_crossentropy',
                  optimizer=Adam(learning_rate=0.00001),
                  metrics=['acc'])
      self.model.summary()
      
        
        
    
    def load_VGG16_resized(self,input_shape):

        #Load VGG16 weights trained on imagenet. Remove classification layer
        #At the end
        vgg_model = VGG16(weights="imagenet",include_top=False, input_shape=input_shape) 
        #Make VGG layers non trainable
        for layer in vgg_model.layers[:]:
            layer.trainable = False

        

        self.model = Sequential()
        self.model.add(vgg_model) 
        self.model.add(Flatten())
        self.model.add(Dropout(0.2))
        self.model.add(BatchNormalization())
        self.model.add(Dense(256, activation='softmax'))
        self.model.compile( loss='categorical_crossentropy',
                  optimizer=Adam(learning_rate=0.0001),
                  metrics=['acc'])
        self.model.summary()


    def train(self):    

        self.history = self.model.fit(
          self.train_generator,
          steps_per_epoch=
             self.train_generator.samples/self.train_generator.batch_size,
          epochs=EPOCHS,
          validation_data=self.validation_generator,
          validation_steps=
             self.validation_generator.samples/self.validation_generator.batch_size,
          verbose=1,
          callbacks=self.callbacks)
    def save(self,name):
        self.model.save(name)
        
        
        
    def predict(self,dataPath):
        datagen = ImageDataGenerator(rescale=1./255)
        test_generator = datagen.flow_from_directory(
        dataPath,
        target_size=(256,256),
        batch_size=32,
        class_mode=None,  # only data, no labels
        shuffle=False)
        
        #This is a num_test_samples X no classes matrix
        #With probabilites for each class row wise
        probabilities = caltechClassifier.model.predict(test_generator, 1)
        
        #Get the indices of classes of highest probability
        act_pred = np.argmax(probabilities,axis=1)
        
        #Get labels from class indices
        predictions = [self.labels[k] for k in act_pred]
        
        #Return a tuple of filename and their classes
        return(tuple(zip(gen.filenames,predictions)))
        
        
path_training = "./Caltech_256_Train"
model_path = '/content/drive/MyDrive/datasets/caltech_data/caltech256_v'+ LOAD_VERSION+'.hd5'
#model_path = 'model_interim_30.hd5'
#model_path='/content/drive/MyDrive/datasets/caltech_data/caltech256_v6.hd5'

caltechClassifier = Trainer(training_path=path_training,
                            batch_size=BATCH_SIZE,
                            model_path = model_path
                            )




Found 19020 images belonging to 256 classes.
Found 1988 images belonging to 256 classes.
Loading pretrained model from /content/drive/MyDrive/datasets/caltech_data/caltech256_v7.hd5
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Functional)           (None, 8, 8, 512)         14714688  
_________________________________________________________________
flatten_3 (Flatten)          (None, 32768)             0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 32768)             0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 32768)             131072    
_________________________________________________________________
dense_3 (Dense)              (None, 256)               8388864   
Total params: 23,234,624
Trainable params: 23,169,088
Non-trainable pa

In [6]:
caltechClassifier.train()
caltechClassifier.model.save(DRIVE_PATH+"caltech256_v" + VERSION + ".hd5")




Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
INFO:tensorflow:Assets written to: /content/drive/MyDrive/datasets/caltech_data/caltech256_8_EPOCH_5.hd5/assets
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
INFO:tensorflow:Assets written to: /content/drive/MyDrive/datasets/caltech_data/caltech256_8_EPOCH_10.hd5/assets
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
INFO:tensorflow:Assets written to: /content/drive/MyDrive/datasets/caltech_data/caltech256_8_EPOCH_15.hd5/assets
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
INFO:tensorflow:Assets written to: /content/drive/MyDrive/datasets/caltech_data/caltech256_8_EPOCH_20.hd5/assets
Epoch 22/30

KeyboardInterrupt: ignored

# New section

In [None]:
caltechClassifier.model.save(DRIVE_PATH+"caltech256_v" + VERSION + ".hd5")


In [7]:
!unzip /content/drive/MyDrive/datasets/caltech_data/caltech-256-image-classification.zip &> /dev/null
#Move into a directory structure that makes it easy for image generator to read
!mkdir -p caltech_test_data 
!mv test caltech_test_data
!ls caltech_test_data/test | wc -l

9177


In [14]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sn


def predict(model,dataPath):
        datagen = ImageDataGenerator(rescale=1./255)
        test_generator = datagen.flow_from_directory(
        dataPath,
        target_size=(256,256),
        batch_size=32,
        classes=['test'],#Trick the generator into thinking there is only 1 class .. test
        shuffle=False)#Shuffle *has* to be false
        
        #This is a num_test_samples X no classes matrix
        #With probabilites for each class row wise
        probabilities = model.model.predict(test_generator)
        
        #Get the indices of classes of highest probability
        act_pred = np.argmax(probabilities,axis=1)
        
        #Get labels from class indices
        predictions = [model.labels[k] for k in act_pred]
 $       cmatrix = confusion_matrix(predictions,model.labels)
        
        #Return a tuple of filename and their classes
        return(tuple(zip(test_generator.filenames,predictions)))
results = predict(caltechClassifier,dataPath="caltech_test_data")
#sn.heatmap(cmatrix, annot=True)

Found 9177 images belonging to 1 classes.


ValueError: ignored

In [10]:
import csv

def write_results_to_csv(path,
                        results):
  with open(path+'submission_25OCT2021_1046.csv','w') as out:
      csv_out=csv.writer(out)
      csv_out.writerow(['img_path','label'])
      for row in results:
          csv_out.writerow(row)
write_results_to_csv(DRIVE_PATH,results)

In [11]:
#Run at the end
drive.flush_and_unmount()
