In [None]:
import numpy as np
import pandas as pd
import os
import random 
import matplotlib.pyplot as plt
import seaborn as sns
from keras.preprocessing import image
from sklearn.model_selection import train_test_split

In [None]:
import zipfile

with zipfile.ZipFile('../input/dogs-vs-cats/test1.zip', 'r') as zip_ref:
    
    zip_ref.extractall('./')
    
with zipfile.ZipFile('../input/dogs-vs-cats/train.zip', 'r') as zip_ref:
    
    zip_ref.extractall('./')
    

# Reading input files to create training dataset

In [None]:

filenames = os.listdir('./train')

categories = []

for filename in filenames:
    
    category = filename.split('.')[0]
    
    if(category == 'dog'):
        
        categories.append('dog')
        
    else:
        
        categories.append('cat')
        
# create a dataframe

df = pd.DataFrame({
        
        'filename' : filenames,
    
        'category' : categories
    
    })

# Showing some data files

In [None]:
df.head()

In [None]:
df['category'].value_counts()

 Now taking random sample of 5000 images to df1 out of whole dataset**

In [None]:
df1=df.sample(n = 5000)
df1.head(10)

# Checking which categories of data is present : mainly done to check the balancing of data

In [None]:
df1['category'].value_counts()

In [None]:
df1['category'].value_counts().plot(kind = 'bar')

# Sample image

In [None]:
sample = random.choice(filenames)

image = image.load_img('./train/' + sample)

plt.imshow(image)

# Using 20% of your data for testing and 80% for training

In [None]:
train_df, validate_df = train_test_split(df1, test_size = 0.20, random_state = 42)

train_df = train_df.reset_index(drop=True)

validate_df = validate_df.reset_index(drop=True)

In [None]:
train_df.head()

In [None]:
validate_df.head()

In [None]:
train_df['category'].value_counts().plot(kind = 'bar')

In [None]:
validate_df['category'].value_counts().plot(kind = 'bar')

In [None]:
total_train = train_df.shape[0]

total_validate = validate_df.shape[0]

print("Total number of example in training dataset : {0}".format(total_train))

print("Total number of example in validation dataset : {0}".format(total_validate))

In [None]:
from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten

from tensorflow.keras import optimizers

In [None]:
class Model:
    
    def __init__(self, IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS):
        
        self.IMG_WIDTH = IMG_WIDTH
        
        self.IMG_HEIGHT = IMG_HEIGHT
               
        self.IMG_CHANNELS = IMG_CHANNELS
    
    def create_model_one(self):
        
        model_one = Sequential()   # Adding Layers to create a convolutional mask/kernel that is wind with layers input which helps produce a tensor of outputs
        
        # Layer 1
        model_one.add(Conv2D(32, (3,3), activation = 'relu', input_shape = (150, 150, 3)))
        
        model_one.add(MaxPooling2D((2,2)))
        
        # Layer 2
        model_one.add(Conv2D(64, (3,3), activation = 'relu'))
        
        model_one.add(MaxPooling2D((2,2)))
       # Layer 3
        model_one.add(Conv2D(128, (3,3), activation = 'relu'))
        
        model_one.add(MaxPooling2D((2,2)))
               
        # Layer 4
        model_one.add(Conv2D(128, (3,3), activation = 'relu'))
        
        model_one.add(MaxPooling2D((2,2)))
           
        model_one.add(Flatten())
                      
        model_one.add(Dense(512, activation = 'relu'))
        
        model_one.add(Dense(1, activation = 'sigmoid'))
                      
        return model_one
    
    def create_model_two(self):
        
        model_two = Sequential()
        
        # Adding Layers to create a convolutional mask/kernel that is wind with layers input which helps produce a tensor of outputs
        
        # Layer 1
        model_two.add(Conv2D(32, (3,3), activation = 'relu', input_shape = (150, 150, 3)))
        
        model_two.add(MaxPooling2D((2,2)))
        
        # Layer 2
        model_two.add(Conv2D(64, (3,3), activation = 'relu'))
        
        model_two.add(MaxPooling2D((2,2)))
        
        # Layer 3
        model_two.add(Conv2D(128, (3,3), activation = 'relu'))
        
        model_two.add(MaxPooling2D((2,2)))
        
            # Layer 4
        model_two.add(Conv2D(128, (3,3), activation = 'relu'))
        
        model_two.add(MaxPooling2D((2,2)))
           
        model_two.add(Flatten())
        
        # DropoutLayer for preventing model from overfitting
        model_two.add(Dropout(0.5))
                      
        model_two.add(Dense(512, activation = 'relu'))
        
        model_two.add(Dense(1, activation = 'sigmoid'))
                      
        return model_two

In [None]:
IMG_WIDTH = 150

IMG_HEIGHT = 150

IMG_CHANNELS = 3

model = Model(IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS)

# **Model 1**

In [None]:
model_1 = model.create_model_one()

model_1.summary()


# Model1 - Since it is a two class classification problem, therefore using binary_crossentropy as loss and using RMSprop as optimizer because we ended the network with a single sigmoidal unit.**

In [None]:
from tensorflow.keras import optimizers

model_1.compile(loss = 'binary_crossentropy', optimizer = optimizers.RMSprop(lr = 1e-4), metrics =['acc'])

# Currently data is in in jpeg format, so in order to feed those images to Neural Network we need to do the following steps

# Firstly  Read the image file

# then Decode those JPEG content to RGB grids of pixels

# then do Convert these into floating-point tensors

# and finally Rescale the pixel values from range [0-255] to [0-1] as NN prefers to deal with smaller input values.

In [None]:
from keras.preprocessing.image import ImageDataGenerator
# generator is an object that acts as an iterator. Generators are built using the yield operator.

In [None]:
train_imgdatagen = ImageDataGenerator(rescale = 1./255)

valid_imgdatagen = ImageDataGenerator(rescale = 1./255)

In [None]:
train_generator_m1 = train_imgdatagen.flow_from_dataframe(
                    train_df,
                    
                    "./train", 
                    
                    x_col='filename',
    
                    y_col='category',
                    
                    target_size = (150, 150), # resize image to 150x150
                    
                    batch_size = 20,
    
                    class_mode = 'binary'
                  )


validation_generator_m1 = train_imgdatagen.flow_from_dataframe(
                    validate_df,
    
                    "./train", 
                    
                    x_col='filename',
    
                    y_col='category',
                    
                    target_size = (150, 150), # resize image to 150x150
                    
                    batch_size = 20,
    
                    class_mode = 'binary'
                  )

# That's correct training data contains 4000 images with 2 classes and also validation data contains 1000 images with 2 classes.

# Now lets look at the output of these generators

In [None]:
for data_batch, labels_batch in train_generator_m1:
    
    print('Data batch shape: {0}'. format(data_batch.shape))
    
    print('Labels batch shape: {0}'. format(labels_batch.shape))
    
    break

 There are 20 samples in each batch.

# MODEL-2

# how to  get more accuracy?

# Let's try to build one more predictive model using a pretrained model.

 # why pretrained model: Because probability of learned features across different problems is a key advantage of deep learning compared to many olders, shallow-learning approaches and it makes deep learning very effective for small-data problems.



In [None]:
from tensorflow.keras.applications import VGG16

In [None]:
conv_base = VGG16(weights = 'imagenet', include_top = False, input_shape = (150,150,3))

conv_base.summary()

# The final feature map has a shape (4, 4, 512). That's the feature on top of which we will stick densely connected classifier. Adding a densely connected classifier on top of convolution base.

In [None]:
model_3 = Sequential()

model_3.add(conv_base)

model_3.add(Flatten())

model_3.add(Dense(256, activation = 'relu'))

model_3.add(Dense(1, activation = 'sigmoid'))

In [None]:
model_3.summary()

# Freezing conv_base

In [None]:
print("The number of trainable weights before freezing the conv base: ", len(model_3.trainable_weights))

In [None]:
conv_base.trainable = False
print("The number of trainable weights after freezing the conv base: ", len(model_3.trainable_weights))

# Fine Tuning : Freezing layers of conv_base upto specific one . Basically it consists of unfreezing a few of the top layers of frozen model base used for feature extraction and jointly training both the newly added part of the model(in this case, the FCC) and these top layers. This is called fine-tuning because it slightly adjusts the more abstract representations of the model being reused, in order to make them more relevant for the problem.




In [None]:
conv_base.trainable = True

set_trainable = False

for layer in conv_base.layers:
    
    if layer.name == 'block5_conv1':
        
        set_trainable = True
        
    if set_trainable:
        
        layer.trainable = True
        
    else:
        
        layer.trainable = False

# Callbacks



In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

# To prevent over fitting we will stop the learning after 10 epochs and val_loss value not decreased

In [None]:
earlystop = EarlyStopping(patience=10)

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)


In [None]:
callbacks = [earlystop, learning_rate_reduction]

In [None]:
model_3.compile(loss = 'binary_crossentropy', optimizer = optimizers.RMSprop(lr = 1e-5), metrics =['acc'])


# FITTING THE MODEL ONTO THE TRAINING DATA

In [None]:
from PIL import Image

In [None]:
history_1 = model_1.fit(
            train_generator_m1,
    
            epochs = 30,
    
            steps_per_epoch = 100,
            
            validation_data = validation_generator_m1,
    
            validation_steps = 50
            )

In [None]:
print(np.mean(history_1.history['acc']))

In [None]:
print(np.mean(history_1.history['val_acc']))

# Visualizing Loss and accuracy during training

In [None]:
plt.plot(history_1.history['acc'], color = 'black')

plt.plot(history_1.history['val_acc'], color = 'blue')

plt.title('Training and validation accuracy of model 1')

plt.xlabel('Epochs')

plt.ylabel('Accuracy')

plt.show()

plt.plot(history_1.history['loss'], color = 'black')

plt.plot(history_1.history['val_loss'], color = 'blue')

plt.title('Training and validation loss of model 1')

plt.xlabel('Epochs')

plt.ylabel('Accuracy')

plt.show()

# MODEL 2

In [None]:
train_imgdatagen_m2 = ImageDataGenerator(
                     rescale = 1./255,
    
                     rotation_range = 15,
    
                     width_shift_range = 0.1,
    
                     height_shift_range = 0.1,
    
                     shear_range = 0.1,
    
                     zoom_range = 0.2,
    
                     horizontal_flip = True
                    )

valid_imgdatagen_m2 = ImageDataGenerator(
                             rescale = 1./255
                         )

train_generator_m2 = train_imgdatagen_m2.flow_from_dataframe(
                    train_df,
                    
                    "./train", 
                    
                    x_col='filename',
    
                    y_col='category',
                    
                    target_size = (150, 150), # resize image to 150x150
                    
                    batch_size = 32,
    
                    class_mode = 'binary'
                  )


validation_generator_m2 = valid_imgdatagen_m2.flow_from_dataframe(
                    validate_df,
    
                    "./train", 
                    
                    x_col='filename',
    
                    y_col='category',
                    
                    target_size = (150, 150), # resize image to 150x150
                    
                    batch_size = 32,
    
                    class_mode = 'binary'
                  )

In [None]:
history_3 = model_3.fit(
            train_generator_m2,
    
            epochs = 30,
    
            steps_per_epoch = 100,
            
            validation_data = validation_generator_m2,
    
            validation_steps = 50,
            
            callbacks = callbacks
            )

In [None]:
print(np.mean(history_3.history['acc']))

In [None]:
print(np.mean(history_3.history['val_acc']))

In [None]:
plt.plot(history_3.history['acc'], color = 'black')

plt.plot(history_3.history['val_acc'], color = 'blue')

plt.title('Training and validation accuracy of model 3')

plt.xlabel('Epochs')

plt.ylabel('Accuracy')

plt.show()

plt.plot(history_3.history['loss'], color = 'black')

plt.plot(history_3.history['val_loss'], color = 'blue')

plt.title('Training and validation loss of model 3')

plt.xlabel('Epochs')

plt.ylabel('Accuracy')

plt.show()

# PREPARE TESTING DATASET AND VISULAIZE IT

In [None]:
from tensorflow.keras.models import load_model

In [None]:
model = load_model('model_3.h5')

In [None]:
test_filenames = os.listdir('./test1')

test_df = pd.DataFrame(
         {
             'id' : test_filenames
         } 
         )

In [None]:
test_df.head()

In [None]:
test_gen = ImageDataGenerator(rescale = 1./255)

test_generator = test_gen.flow_from_dataframe(    
                    test_df,
    
                    "./test1", 
                    
                    x_col='id',
    
                    y_col=None,
                    
                    target_size = (150, 150), # resize image to 150x150
                    
                    batch_size = 20,
    
                    class_mode = None,
    
                    shuffle=False,
    
                    validate_filenames=False
                  )

#  PREDICTING ON TESTING DATA

In [None]:
predictions = model.predict(test_generator)

In [None]:
pred = [1 if p > 0.5 else 0 for p in predictions]

test_df['category'] = pred

In [None]:
test_df['category'].value_counts().plot.bar()

In [None]:
label_map = dict((v,k) for k,v in train_generator_m2.class_indices.items())

test_df['category'] = test_df['category'].replace(label_map)

In [None]:
test_df['category'] = test_df['category'].replace({ 'dog': 1, 'cat': 0 })

In [None]:
test_df['category'].value_counts().plot.bar()

In [None]:
submission_df = test_df.copy()

submission_df['id'] = submission_df['id'].str.split('.').str[0]

submission_df['label'] = submission_df['category']

submission_df.drop(['id', 'category'], axis=1, inplace=True)

submission_df.to_csv('submission.csv', index=False)

**What is a Pre-trained Model?
In this we simply put a pre-trained model is a model created by some-one else to solve a similar problem. Instead of building a model from scratch to solve a similar problem, you use the model trained on other problem as a starting point.
By using pre-trained models which have been previously trained on large datasets, we can directly use the weights and architecture obtained and apply the learning on our problem statement. This is known as transfer learning. We transfer the learning of the pre-trained model to our specific problem statement.
A pre-trained model may not be 100% accurate in your application, but it saves huge efforts required to re-invent the wheel. Let me show this to you with a recent example.**


# How can I use Pre-trained Models?
What is our objective when we train a neural network? We wish to identify the correct weights for the network by multiple forward and backward iterations. By using pre-trained models which have been previously trained on large datasets, we can directly use the weights and architecture obtained and apply the learning on our problem statement. This is known as transfer learning. We “transfer the learning” of the pre-trained model to our specific problem statement.
We should be very careful while choosing what pre-trained model you should use in your case. If the problem statement we have at hand is very different from the one on which the pre-trained model was trained – the prediction we would get would beAvery inaccurate. For example, a model previously trained for speech recognition would work horribly if we try to use it to identify objects using it.
These pre-trained networks demonstrate a strong ability to generalize to images outside the dataset via transfer learning. We make modifications in the pre-existing model by fine-tuning the model. Since we assume that the pre-trained network has been trained quite well, we would not want to modify the weights too soon and too much. While modifying we generally use a learning rate smaller than the one used for initially training the model.
******

# How can I use Pre-trained Models?

Our objective when we train a neural network is to identify the correct weights for the network by multiple forward and backward iterations. By using pre-trained models which have been previously trained on large datasets, we can directly use the weights and architecture obtained and apply the learning on our problem statement. This is known as transfer learning. We “transfer the learning” of the pre-trained model to our specific problem statement.
We should be very careful while choosing what pre-trained model you should use in your case. If the problem statement we have at hand is very different from the one on which the pre-trained model was trained – the prediction we would get would be very inaccurate. For example, a model previously trained for speech recognition would work horribly if we try to use it to identify objects using it.
These pre-trained networks demonstrate a strong ability to generalize to images outside the dataset via transfer learning. We make modifications in the pre-existing model by fine-tuning the model. Since we assume that the pre-trained network has been trained quite well, we would not want to modify the weights too soon and too much. While modifying we generally use a learning rate smaller than the one used for initially training the model.

# Ways to Fine tune the model
Feature extraction – We can use a pre-trained model as a feature extraction mechanism. What we can do is that we can remove the output layer and then use the entire network as a fixed feature extractor for the new data set.
Use the Architecture of the pre-trained model – What we can do is that we use architecture of the model while we initialize all the weights randomly and train the model according to our dataset again.
Train some layers while freeze others – Another way to use a pre-trained model is to train is partially. What we can do is we keep the weights of initial layers of the model frozen while we retrain only the higher layers. We can try and test as to how many layers to be frozen and how many to be trained.

*****