<a href="https://colab.research.google.com/github/moosemaniam/IISCDeepLearning/blob/main/test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!nvidia-smi 

Sun Oct 24 13:16:54 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.74       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%ls /content/drive/MyDrive/datasets/caltech_data/
!unzip /content/drive/MyDrive/datasets/caltech_data/Caltech_256_Train.zip &> /dev/null
DRIVE_PATH='/content/drive/MyDrive/datasets/caltech_data/'

 [0m[01;34mcaltech2565.hd5[0m/                       kaggle.json
 [01;34mcaltech2566.hd5[0m/                      'Model information.gsheet'
 caltech-256-image-classification.zip   submission_22stOct_1118AM.csv
 Caltech_256_Train.zip                  submission_22stOct_1233.csv
 [01;34mcaltech256_v3.hd5[0m/                     submission_23OCT2021_0728.csv
 [01;34mcaltech256_v4.hd5[0m/                     tensorboardLogs.zip
 [01;34mcaltech256_v6.hd5[0m/


In [8]:
from tensorflow import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten,Conv2D
from keras.layers import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras import regularizers
from tensorflow.keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau,TensorBoard,EarlyStopping

from keras.applications.vgg16 import VGG16
import numpy as np
from datetime import datetime
#Constants
BATCH_SIZE = 64
VALIDATION_SPLIT = 0.1
EPOCHS=50
VERSION="7"
LOAD_VERSION="6"

class CustomSaver(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if epoch >0 and epoch % 10 == 0:  
            self.model.save("model_"+str(epoch)+"_" + VERSION +".hd5")

assert(VERSION != LOAD_VERSION)
class Trainer:   
    def __init__(self,training_path,batch_size,model_path=None,fineTune=False):
        self.callbacks = []
    
        
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                                  patience=5, min_lr=0.0000001)
        

        tb_callback = TensorBoard('./logs', update_freq=1)
        
        self.callbacks.append(reduce_lr)
        self.callbacks.append(tb_callback)
        self.callbacks.append(CustomSaver())

        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=15)
       # self.callbacks.append(es)
        self.train_datagen= ImageDataGenerator(rescale=1./255, 
                                               shear_range=0.2,
        zoom_range=0.3,
        horizontal_flip=True,
        vertical_flip=True,
        width_shift_range=0.3,
        height_shift_range=0.3,
        rotation_range=40, 
        fill_mode='nearest',
        validation_split=VALIDATION_SPLIT
        ) 


        

        self.train_generator = self.train_datagen.flow_from_directory(
        path_training,
        batch_size=batch_size,
        shuffle=True,
        class_mode='categorical',
        subset='training') # set as training data

        #Lets document labels in this data set
        labels = self.train_generator.class_indices
        #Make a dict of labels,class indices
        self.labels = dict((v,k) for k,v in labels.items())
        
        self.validation_datagen= ImageDataGenerator(rescale=1./255,validation_split=VALIDATION_SPLIT )

        self.validation_generator = self.validation_datagen.flow_from_directory(
        path_training,
        batch_size=batch_size,
        shuffle=True,
        class_mode='categorical',
        subset='validation') # set as validation data

        if(model_path== None):
          #If no model specified load VGG
          print("Loading VGG + untrained model")
          self.load_VGG16_resized(input_shape=(256,256,3))
        else:
          print("Loading pretrained model from {}".format(model_path))

          
          self.model = keras.models.load_model(model_path)
          
          self.model.summary()
          self.add_regularizers_base_model()
        return

    def add_regularizers_base_model(self):
      #Add regularizer to model https://bit.ly/3GjpodB
      self.model.get_layer('vgg16').trainable = True
      self.model.get_layer('vgg16').kernel_regularizer = keras.regularizers.l2(l2=0.00001)
      self.model.compile( loss='categorical_crossentropy',
                  optimizer=Adam(learning_rate=0.00001),
                  metrics=['acc'])
      self.model.summary()
      
        
        
    
    def load_VGG16_resized(self,input_shape):
        vgg_model = VGG16(weights="imagenet",include_top=False, input_shape=input_shape) 
        for layer in vgg_model.layers[:]:
            layer.trainable = False

        

        self.model = Sequential()
        self.model.add(vgg_model) 
        self.model.add(Flatten())
        self.model.add(Dropout(0.2))
        self.model.add(BatchNormalization())
        self.model.add(Dense(256, activation='softmax'))
        self.model.compile( loss='categorical_crossentropy',
                  optimizer=Adam(learning_rate=0.0001),
                  metrics=['acc'])
        self.model.summary()


    def train(self):    

        self.history = self.model.fit(
          self.train_generator,
          steps_per_epoch=
             self.train_generator.samples/self.train_generator.batch_size,
          epochs=EPOCHS,
          validation_data=self.validation_generator,
          validation_steps=
             self.validation_generator.samples/self.validation_generator.batch_size,
          verbose=1,
          callbacks=self.callbacks)
    def save(self,name):
        self.model.save(name)
        
        
        
    def predict(self,dataPath):
        datagen = ImageDataGenerator(rescale=1./255)
        test_generator = datagen.flow_from_directory(
        dataPath,
        target_size=(256,256),
        batch_size=32,
        class_mode=None,  # only data, no labels
        shuffle=False)
        
        #This is a num_test_samples X no classes matrix
        #With probabilites for each class row wise
        probabilities = caltechClassifier.model.predict(test_generator, 1)
        
        #Get the indices of classes of highest probability
        act_pred = np.argmax(probabilities,axis=1)
        
        #Get labels from class indices
        predictions = [self.labels[k] for k in act_pred]
        
        #Return a tuple of filename and their classes
        return(tuple(zip(gen.filenames,predictions)))
        
        
path_training = "./Caltech_256_Train"
model_path = '/content/drive/MyDrive/datasets/caltech_data/caltech256_v'+ LOAD_VERSION+'.hd5'
#model_path = 'model_interim_30.hd5'
#model_path='/content/drive/MyDrive/datasets/caltech_data/caltech256_v6.hd5'

caltechClassifier = Trainer(training_path=path_training,
                            batch_size=BATCH_SIZE,
                            model_path = model_path
                            )




Found 19020 images belonging to 256 classes.
Found 1988 images belonging to 256 classes.
Loading pretrained model from /content/drive/MyDrive/datasets/caltech_data/caltech256_v6.hd5
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Functional)           (None, 8, 8, 512)         14714688  
_________________________________________________________________
flatten_3 (Flatten)          (None, 32768)             0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 32768)             0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 32768)             131072    
_________________________________________________________________
dense_3 (Dense)              (None, 256)               8388864   
Total params: 23,234,624
Trainable params: 8,454,400
Non-trainable par

In [9]:
caltechClassifier.train()
caltechClassifier.model.save(DRIVE_PATH+"caltech256_" + VERSION + ".hd5")




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
INFO:tensorflow:Assets written to: model_10_7.hd5/assets
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
INFO:tensorflow:Assets written to: model_20_7.hd5/assets
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
 42/297 [===>..........................] - ETA: 4:44 - loss: 0.7990 - acc: 0.7801

KeyboardInterrupt: ignored

In [10]:
!ls /content/drive/MyDrive/datasets/caltech_data/

 caltech2565.hd5		        model_10_7.hd5
 caltech2566.hd5		        model_20_7.hd5
 caltech-256-image-classification.zip  'Model information.gsheet'
 Caltech_256_Train.zip		        submission_22stOct_1118AM.csv
 caltech256_v3.hd5		        submission_22stOct_1233.csv
 caltech256_v4.hd5		        submission_23OCT2021_0728.csv
 caltech256_v6.hd5		        tb
 kaggle.json			        tensorboardLogs.zip


In [11]:
caltechClassifier.model.save(DRIVE_PATH+"caltech256_"+VERSION+".hd5")

INFO:tensorflow:Assets written to: /content/drive/MyDrive/datasets/caltech_data/caltech256_7.hd5/assets


In [12]:
!zip -r tensorboardLogs.zip logs
!mv tensorboardLogs.zip /content/drive/MyDrive/datasets/caltech_data/.

  adding: logs/ (stored 0%)
  adding: logs/train/ (stored 0%)
  adding: logs/train/events.out.tfevents.1635082570.bba7c3a850b3.78.0.v2 (deflated 78%)
  adding: logs/train/plugins/ (stored 0%)
  adding: logs/train/plugins/profile/ (stored 0%)
  adding: logs/train/plugins/profile/2021_10_24_13_36_37/ (stored 0%)
  adding: logs/train/plugins/profile/2021_10_24_13_36_37/bba7c3a850b3.input_pipeline.pb (deflated 57%)
  adding: logs/train/plugins/profile/2021_10_24_13_36_37/bba7c3a850b3.memory_profile.json.gz (stored 0%)
  adding: logs/train/plugins/profile/2021_10_24_13_36_37/bba7c3a850b3.xplane.pb (deflated 80%)
  adding: logs/train/plugins/profile/2021_10_24_13_36_37/bba7c3a850b3.trace.json.gz (deflated 0%)
  adding: logs/train/plugins/profile/2021_10_24_13_36_37/bba7c3a850b3.overview_page.pb (deflated 60%)
  adding: logs/train/plugins/profile/2021_10_24_13_36_37/bba7c3a850b3.tensorflow_stats.pb (deflated 71%)
  adding: logs/train/plugins/profile/2021_10_24_13_36_37/bba7c3a850b3.kernel_sta

In [12]:
!zip -r model.zip model_interim_30.hd5


zip error: Nothing to do! (try: zip -r model.zip . -i model_interim_30.hd5)


# New section

In [13]:
!unzip /content/drive/MyDrive/datasets/caltech_data/caltech-256-image-classification.zip &> /dev/null



In [14]:
!mkdir -p caltech_test_data 
!mv test caltech_test_data
!ls caltech_test_data/test

1000.jpg  2034.jpg  3068.jpg  4100.jpg	5134.jpg  6168.jpg  7200.jpg  8234.jpg
1001.jpg  2035.jpg  3069.jpg  4101.jpg	5135.jpg  6169.jpg  7201.jpg  8235.jpg
1002.jpg  2036.jpg  306.jpg   4102.jpg	5136.jpg  616.jpg   7202.jpg  8236.jpg
1003.jpg  2037.jpg  3070.jpg  4103.jpg	5137.jpg  6170.jpg  7203.jpg  8237.jpg
1004.jpg  2038.jpg  3071.jpg  4104.jpg	5138.jpg  6171.jpg  7204.jpg  8238.jpg
1005.jpg  2039.jpg  3072.jpg  4105.jpg	5139.jpg  6172.jpg  7205.jpg  8239.jpg
1006.jpg  203.jpg   3073.jpg  4106.jpg	513.jpg   6173.jpg  7206.jpg  823.jpg
1007.jpg  2040.jpg  3074.jpg  4107.jpg	5140.jpg  6174.jpg  7207.jpg  8240.jpg
1008.jpg  2041.jpg  3075.jpg  4108.jpg	5141.jpg  6175.jpg  7208.jpg  8241.jpg
1009.jpg  2042.jpg  3076.jpg  4109.jpg	5142.jpg  6176.jpg  7209.jpg  8242.jpg
100.jpg   2043.jpg  3077.jpg  410.jpg	5143.jpg  6177.jpg  720.jpg   8243.jpg
1010.jpg  2044.jpg  3078.jpg  4110.jpg	5144.jpg  6178.jpg  7210.jpg  8244.jpg
1011.jpg  2045.jpg  3079.jpg  4111.jpg	5145.jpg  6179.jpg  7211.jp

In [15]:
def predict(model,dataPath):
        datagen = ImageDataGenerator(rescale=1./255)
        test_generator = datagen.flow_from_directory(
        dataPath,
        target_size=(256,256),
        batch_size=32,
        classes=['test'],#Trick the generator into thinking there is only 1 class .. test
        shuffle=False)#Shuffle *has* to be false
        
        #This is a num_test_samples X no classes matrix
        #With probabilites for each class row wise
        probabilities = model.model.predict(test_generator)
        
        #Get the indices of classes of highest probability
        act_pred = np.argmax(probabilities,axis=1)
        
        #Get labels from class indices
        predictions = [model.labels[k] for k in act_pred]
        
        #Return a tuple of filename and their classes
        return(tuple(zip(test_generator.filenames,predictions)))
results = predict(caltechClassifier,dataPath="caltech_test_data")

Found 9177 images belonging to 1 classes.


In [16]:
import csv
def write_results_to_csv(path,
                        results):
  with open(path+'submission_24OCT2021_2139.csv','w') as out:
      csv_out=csv.writer(out)
      csv_out.writerow(['img_path','label'])
      for row in results:
          csv_out.writerow(row)
write_results_to_csv(DRIVE_PATH,results)

In [17]:
#Run at the end
drive.flush_and_unmount()
