In [1]:
# This notebook contains best performed kernels for Dog Breed Identifcation Kaggle Competition

# Downloading Dataset

Download the data from the following link

Dog Images :https://www.kaggle.com/c/dog-breed-identification/data

Download the dog dataset. Unzip the folder and place it in the repository

# Kernel - 1

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
from os import listdir, makedirs
from os.path import join, exists, expanduser
from tqdm import tqdm
from sklearn.metrics import log_loss, accuracy_score
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications import xception
from keras.applications import inception_v3
from keras.applications.vgg16 import preprocess_input, decode_predictions
from sklearn.linear_model import LogisticRegression

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### Use top 16 classes

Using all the images would take more than the 1 hour kernel limit. Let's focus on the most frequent 16 breeds.

In [2]:
#INPUT_SIZE = 224
NUM_CLASSES = 16
SEED = 1987
data_dir = 'C:/Users/Nishant/ADS/Project/Dog-Project-Kaggle Competition' # Path of your Project Folder  
labels = pd.read_csv(join(data_dir, 'labels.csv'))
sample_submission = pd.read_csv(join(data_dir, 'sample_submission.csv'))
print(len(listdir(join(data_dir, 'train'))), len(labels))
print(len(listdir(join(data_dir, 'test'))), len(sample_submission))

10222 10222
10357 10357


In [3]:
selected_breed_list = list(labels.groupby('breed').count().sort_values(by='id', ascending=False).head(NUM_CLASSES).index)
labels = labels[labels['breed'].isin(selected_breed_list)]
labels['target'] = 1
labels['rank'] = labels.groupby('breed').rank()['id']
labels_pivot = labels.pivot('id', 'breed', 'target').reset_index().fillna(0)
np.random.seed(seed=SEED)
rnd = np.random.random(len(labels))
train_idx = rnd < 0.8
valid_idx = rnd >= 0.8
y_train = labels_pivot[selected_breed_list].values
ytr = y_train[train_idx]
yv = y_train[valid_idx]

# Resizing the Image

In [4]:
def read_img(img_id, train_or_test, size):
    """Read and resize image.
    # Arguments
        img_id: string
        train_or_test: string 'train' or 'test'.
        size: resize the original image.
    # Returns
        Image as numpy array.
    """
    img = image.load_img(join(data_dir, train_or_test, '%s.jpg' % img_id), target_size=size)
    img = image.img_to_array(img)
    return img

In [5]:
INPUT_SIZE = 224
POOLING = 'avg'
x_train = np.zeros((len(labels), INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
for i, img_id in tqdm(enumerate(labels['id'])):
    img = read_img(img_id, 'train', (INPUT_SIZE, INPUT_SIZE))
    x = preprocess_input(np.expand_dims(img.copy(), axis=0))
    x_train[i] = x
print('Train Images shape: {} size: {:,}'.format(x_train.shape, x_train.size))


1777it [00:11, 151.98it/s]


Train Images shape: (1777, 224, 224, 3) size: 267,488,256


# ResNet50

Extract Resnet50 bottleneck features

In [6]:
Xtr = x_train[train_idx]
Xv = x_train[valid_idx]
print((Xtr.shape, Xv.shape, ytr.shape, yv.shape))
resnet50_bottleneck = ResNet50(weights='imagenet', include_top=False, pooling=POOLING)
train_resnet50_bf = resnet50_bottleneck.predict(Xtr, batch_size=32, verbose=1)
valid_resnet50_bf = resnet50_bottleneck.predict(Xv, batch_size=32, verbose=1)
print('Resnet50 train bottleneck features shape: {} size: {:,}'.format(train_resnet50_bf.shape, train_resnet50_bf.size))
print('Resnet50 valid bottleneck features shape: {} size: {:,}'.format(valid_resnet50_bf.shape, valid_resnet50_bf.size))

((1409, 224, 224, 3), (368, 224, 224, 3), (1409, 16), (368, 16))
Resnet50 train bottleneck features shape: (1409, 2048) size: 2,885,632
Resnet50 valid bottleneck features shape: (368, 2048) size: 753,664


LogReg on Resnet50 bottleneck features

In [7]:
logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=SEED)
logreg.fit(train_resnet50_bf, (ytr * range(NUM_CLASSES)).sum(axis=1))
valid_probs = logreg.predict_proba(valid_resnet50_bf)
valid_preds = logreg.predict(valid_resnet50_bf)

In [8]:
print('Validation Resnet50 LogLoss {}'.format(log_loss(yv, valid_probs)))
print('Validation Resnet50 Accuracy {}'.format(accuracy_score((yv * range(NUM_CLASSES)).sum(axis=1), valid_preds)))

Validation Resnet50 LogLoss 0.19952801389568334
Validation Resnet50 Accuracy 0.9402173913043478


Resnet50 Accuracy is 94%

# VGG16

Extract VGG16 bottleneck features

In [9]:
Xtr = x_train[train_idx]
Xv = x_train[valid_idx]
print((Xtr.shape, Xv.shape, ytr.shape, yv.shape))
vgg_bottleneck = VGG16(weights='imagenet', include_top=False, pooling=POOLING)
train_vgg_bf = vgg_bottleneck.predict(Xtr, batch_size=32, verbose=1)
valid_vgg_bf = vgg_bottleneck.predict(Xv, batch_size=32, verbose=1)
print('VGG train bottleneck features shape: {} size: {:,}'.format(train_vgg_bf.shape, train_vgg_bf.size))
print('VGG valid bottleneck features shape: {} size: {:,}'.format(valid_vgg_bf.shape, valid_vgg_bf.size))

((1409, 224, 224, 3), (368, 224, 224, 3), (1409, 16), (368, 16))
VGG train bottleneck features shape: (1409, 512) size: 721,408
VGG valid bottleneck features shape: (368, 512) size: 188,416


LogReg on VGG16 bottleneck features

In [10]:
logregVgg16 = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=SEED)
logregVgg16.fit(train_vgg_bf, (ytr * range(NUM_CLASSES)).sum(axis=1))
valid_probsVgg16 = logregVgg16.predict_proba(valid_vgg_bf)
valid_predsVgg16 = logregVgg16.predict(valid_vgg_bf)

In [11]:
print('Validation VGG16 LogLoss {}'.format(log_loss(yv, valid_probsVgg16)))
print('Validation VGG16 Accuracy {}'.format(accuracy_score((yv * range(NUM_CLASSES)).sum(axis=1), valid_predsVgg16)))

Validation VGG16 LogLoss 0.35206014208503983
Validation VGG16 Accuracy 0.9184782608695652


VGG16 Accuracy is 91.84%

# VGG19

Extract VGG16 bottleneck features

In [12]:
Xtr = x_train[train_idx]
Xv = x_train[valid_idx]
print((Xtr.shape, Xv.shape, ytr.shape, yv.shape))
vgg19_bottleneck = VGG19(weights='imagenet', include_top=False, pooling=POOLING)
train_vgg19_bf = vgg19_bottleneck.predict(Xtr, batch_size=32, verbose=1)
valid_vgg19_bf = vgg19_bottleneck.predict(Xv, batch_size=32, verbose=1)
print('VGG19 train bottleneck features shape: {} size: {:,}'.format(train_vgg19_bf.shape, train_vgg19_bf.size))
print('VGG19 valid bottleneck features shape: {} size: {:,}'.format(valid_vgg19_bf.shape, valid_vgg19_bf.size))

((1409, 224, 224, 3), (368, 224, 224, 3), (1409, 16), (368, 16))
VGG19 train bottleneck features shape: (1409, 512) size: 721,408
VGG19 valid bottleneck features shape: (368, 512) size: 188,416


LogReg on VGG16 bottleneck features

In [13]:
logregVgg19 = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=SEED)
logregVgg19.fit(train_vgg19_bf, (ytr * range(NUM_CLASSES)).sum(axis=1))
valid_probsVgg19 = logregVgg19.predict_proba(valid_vgg19_bf)
valid_predsVgg19 = logregVgg19.predict(valid_vgg19_bf)

In [14]:
print('Validation VGG19 LogLoss {}'.format(log_loss(yv, valid_probsVgg19)))
print('Validation VGG19 Accuracy {}'.format(accuracy_score((yv * range(NUM_CLASSES)).sum(axis=1), valid_predsVgg19)))

Validation VGG19 LogLoss 0.33411898533364837
Validation VGG19 Accuracy 0.9239130434782609


 VGG19 Accuracy is 92.39%

# Xception

In [15]:
INPUT_SIZE = 299
POOLING = 'avg'
x_trainX = np.zeros((len(labels), INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
for i, img_id in tqdm(enumerate(labels['id'])):
    img = read_img(img_id, 'train', (INPUT_SIZE, INPUT_SIZE))
    x = xception.preprocess_input(np.expand_dims(img.copy(), axis=0))
    x_trainX[i] = x
print('Train Images shape: {} size: {:,}'.format(x_trainX.shape, x_trainX.size))

1777it [00:18, 95.51it/s]


Train Images shape: (1777, 299, 299, 3) size: 476,596,731


Extract Xception bottleneck features

In [16]:
Xtr = x_trainX[train_idx]
Xv = x_trainX[valid_idx]
print((Xtr.shape, Xv.shape, ytr.shape, yv.shape))
xception_bottleneck = xception.Xception(weights='imagenet', include_top=False, pooling=POOLING)
train_x_bf = xception_bottleneck.predict(Xtr, batch_size=32, verbose=1)
valid_x_bf = xception_bottleneck.predict(Xv, batch_size=32, verbose=1)
print('Xception train bottleneck features shape: {} size: {:,}'.format(train_x_bf.shape, train_x_bf.size))
print('Xception valid bottleneck features shape: {} size: {:,}'.format(valid_x_bf.shape, valid_x_bf.size))

((1409, 299, 299, 3), (368, 299, 299, 3), (1409, 16), (368, 16))
Xception train bottleneck features shape: (1409, 2048) size: 2,885,632
Xception valid bottleneck features shape: (368, 2048) size: 753,664


LogReg on Xception bottleneck features

In [17]:
logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=SEED)
logreg.fit(train_x_bf, (ytr * range(NUM_CLASSES)).sum(axis=1))
valid_probsX = logreg.predict_proba(valid_x_bf)
valid_predsX = logreg.predict(valid_x_bf)


In [18]:
print('Validation Xception LogLoss {}'.format(log_loss(yv, valid_probsX)))
print('Validation Xception Accuracy {}'.format(accuracy_score((yv * range(NUM_CLASSES)).sum(axis=1), valid_predsX)))

Validation Xception LogLoss 0.06829603188561884
Validation Xception Accuracy 0.9809782608695652


Xception Accuracy is 98%

-------------------------------------------

# Kernel 2

In [19]:
import numpy as np 
import pandas as pd 
import keras
from keras.applications.vgg19 import VGG19
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten

import os
from tqdm import tqdm
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import cv2



First we will read in the csv's so we can see some more information on the filenames and breeds

In [20]:
df_train = pd.read_csv('C:/Users/Nishant/ADS/Project/Dog-Project-Kaggle Competition/labels.csv')
# path of labels.csv inside your project folder

df_test = pd.read_csv('C:/Users/Nishant/ADS/Project/Dog-Project-Kaggle Competition/sample_submission.csv')
# path of sample_submission.csv inside your project folder

In [21]:
df_train.head(20)


Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever
5,002211c81b498ef88e1b40b9abf84e1d,bedlington_terrier
6,00290d3e1fdd27226ba27a8ce248ce85,bedlington_terrier
7,002a283a315af96eaea0e28e7163b21b,borzoi
8,003df8b8a8b05244b1d920bb6cf451f9,basenji
9,0042188c895a2f14ef64a918ed9c7b64,scottish_deerhound


We can see that the breed needs to be one-hot encoded for the final submission, so we will now do this

In [22]:
targets_series = pd.Series(df_train['breed'])
one_hot = pd.get_dummies(targets_series, sparse = True)

In [23]:
one_hot_labels = np.asarray(one_hot)

Next we will read in all of the images for test and train, using a for loop through the values of the csv files. I have also set an im_size variable which sets the size for the image to be re-sized to, 90x90 px, you should play with this number to see how it affects accuracy.

In [24]:
im_size = 90

In [25]:
x_train = []
y_train = []
x_test = []

In [26]:
i = 0 
for f, breed in tqdm(df_train.values):
    img = cv2.imread('C:/Users/Nishant/ADS/Project/Dog-Project-Kaggle Competition/train/{}.jpg'.format(f))
    # path of train folder inside your project folder
    
    label = one_hot_labels[i]
    x_train.append(cv2.resize(img, (im_size, im_size)))
    y_train.append(label)
    i += 1

100%|███████████████████████████████████████████████████████████████████████████| 10222/10222 [01:23<00:00, 122.46it/s]


In [27]:
for f in tqdm(df_test['id'].values):
    img = cv2.imread('C:/Users/Nishant/ADS/Project/Dog-Project-Kaggle Competition/test/{}.jpg'.format(f))
    # path of test folder inside your project folder
    
    x_test.append(cv2.resize(img, (im_size, im_size)))

100%|███████████████████████████████████████████████████████████████████████████| 10357/10357 [01:28<00:00, 117.39it/s]


In [28]:
y_train_raw = np.array(y_train, np.uint8)
x_train_raw = np.array(x_train, np.float32) / 255.
x_test  = np.array(x_test, np.float32) / 255.

We check the shape of the outputs to make sure everyting went as expected.

In [29]:
print(x_train_raw.shape)
print(y_train_raw.shape)
print(x_test.shape)

(10222, 90, 90, 3)
(10222, 120)
(10357, 90, 90, 3)


We can see above that there are 120 different breeds. We can put this in a num_class variable below that can then be used when creating the CNN model.

In [30]:
num_class = y_train_raw.shape[1]

It is important to create a validation set so that you can gauge the performance of your model on independent data, unseen to the model in training. We do this by splitting the current training set (x_train_raw) and the corresponding labels (y_train_raw) so that we set aside 30 % of the data at random and put these in validation sets (X_valid and Y_valid).

This split needs to be improved so that it contains images from every class, with 120 separate classes some can not be represented and so the validation score is not informative.

In [31]:
X_train, X_valid, Y_train, Y_valid = train_test_split(x_train_raw, y_train_raw, test_size=0.3, random_state=1)

Now we build the CNN architecture. Here we are using a pre-trained model VGG19 which has already been trained to identify many different dog breeds (as well as a lot of other objects from the imagenet dataset see here for more information: http://image-net.org/about-overview). Unfortunately it doesn't seem possible to download the weights from within this kernel so make sure you set the weights argument to 'imagenet' and not None, as it currently is below.

We then remove the final layer and instead replace it with a single dense layer with the number of nodes corresponding to the number of breed classes we have (120).

# Using VGG19

In [32]:
# Create the base pre-trained model
# Can't download weights in the kernel
base_model = VGG19(weights='imagenet',include_top=False, input_shape=(im_size, im_size, 3))

# Add a new top layer
x = base_model.output
x = Flatten()(x)
predictions = Dense(num_class, activation='softmax')(x)

# This is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# First: train only the top layers (which were randomly initialized)
for layer in base_model.layers:
    layer.trainable = False

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)]
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 90, 90, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 90, 90, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 90, 90, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 45, 45, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 45, 45, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 45, 45, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 22, 22, 128)       0         
__________

In [33]:
model.fit(X_train, Y_train, epochs=2, validation_data=(X_valid, Y_valid), verbose=1)

Train on 7155 samples, validate on 3067 samples
Epoch 1/2
Epoch 2/2






<keras.callbacks.History at 0x2199ec76f28>

Remember, accuracy is low here because we are not taking advantage of the pre-trained weights as they cannot be downloaded in the kernel. This means we are training the wights from scratch.
Next we will make our predictions.

In [34]:
preds = model.predict(x_test, verbose=1)



In [35]:
sub = pd.DataFrame(preds)
# Set column names to those generated by the one-hot encoding earlier
col_names = one_hot.columns.values
sub.columns = col_names
# Insert the column id from the sample_submission at the start of the data frame
sub.insert(0, 'id', df_test['id'])
sub.head(10)

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.000432,0.014376,8.5e-05,0.000199,0.003398,0.006372,0.000152,0.000456,0.014755,...,0.002897,0.000293,0.002223149,0.004878,0.01300617,0.128114,0.000111,0.005594,0.002639,0.000252
1,00102ee9d8eb90812350685311fe5890,0.010025,0.011054,0.001197,0.00098,0.00088,0.001622,0.00085,0.00074,0.001548,...,0.015734,0.013045,0.001127672,0.001126,0.001402815,0.004256,0.022141,0.001812,0.034709,0.000671
2,0012a730dfa437f5f3613fb75efcd4ce,0.005741,0.01284,0.008748,0.026019,0.0014,0.015596,0.001174,0.0116,0.009769,...,0.002168,0.003151,0.006573818,0.006059,0.007069316,0.007737,0.010484,0.035557,0.013938,0.00112
3,001510bc8570bbeee98c8d80c8a95ec1,0.014196,0.050318,0.004079,0.017362,0.00131,0.027249,0.004624,0.015408,0.001825,...,0.002208,0.005994,0.001593215,0.000987,0.001795378,0.00455,0.00074,0.028329,0.002532,0.00125
4,001a5f3114548acdefa3d4da05474c2e,0.026999,0.019718,0.019079,0.026609,0.00324,0.00374,0.007579,0.000614,0.002492,...,0.001686,0.000252,0.001461654,0.000753,0.005949779,0.000804,0.002326,0.011626,0.013219,0.00183
5,00225dcd3e4d2410dd53239f95c0352f,0.000469,0.06008,0.001679,0.001781,0.000583,0.000275,0.000751,0.000906,0.009265,...,0.003914,0.000879,0.001451997,0.02405,0.003880257,0.011223,0.004553,0.02624,0.016837,0.000668
6,002c2a3117c2193b4d26400ce431eebd,0.013544,0.015975,0.01638,0.021049,0.000963,0.011972,0.011309,0.005278,0.001484,...,0.0063,0.022877,0.006666494,0.003686,0.002988879,0.017697,0.017521,0.004451,0.01806,0.010736
7,002c58d413a521ae8d1a5daeb35fc803,2.4e-05,6e-05,2e-06,2.6e-05,0.000192,7e-05,0.002018,0.000213,3e-06,...,8.1e-05,6.6e-05,4.70351e-07,2e-06,5.842896e-08,0.000167,0.251009,1e-06,0.209298,0.000351
8,002f80396f1e3db687c5932d7978b196,0.014547,0.012317,0.050596,0.026392,0.002017,0.002739,0.003309,0.002362,0.002399,...,0.00197,0.000582,0.003288176,0.000757,0.01799287,0.004378,0.003574,0.039597,0.006125,0.001191
9,0036c6bcec6031be9e62a257b1c3c442,0.006683,0.005048,0.006135,0.004311,0.001558,0.015824,0.000939,0.000266,0.006063,...,0.001022,0.000505,0.002212686,0.004147,0.03593173,0.004372,0.000542,0.002859,0.003648,0.000926


Now we have used the same code above again but this time with VGG16


# Using VGG16

In [36]:
# Create the base pre-trained model
# Can't download weights in the kernel
base_model = VGG16(weights='imagenet',include_top=False, input_shape=(im_size, im_size, 3))

# Add a new top layer
x = base_model.output
x = Flatten()(x)
predictions = Dense(num_class, activation='softmax')(x)

# This is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# First: train only the top layers (which were randomly initialized)
for layer in base_model.layers:
    layer.trainable = False

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)]
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, 90, 90, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 90, 90, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 90, 90, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 45, 45, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 45, 45, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 45, 45, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 22, 22, 128)       0         
__________

In [37]:
model.fit(X_train, Y_train, epochs=2, validation_data=(X_valid, Y_valid), verbose=1)

Train on 7155 samples, validate on 3067 samples
Epoch 1/2
Epoch 2/2






<keras.callbacks.History at 0x2194789c898>

In [38]:
preds = model.predict(x_test, verbose=1)



In [39]:
sub = pd.DataFrame(preds)
# Set column names to those generated by the one-hot encoding earlier
col_names = one_hot.columns.values
sub.columns = col_names
# Insert the column id from the sample_submission at the start of the data frame
sub.insert(0, 'id', df_test['id'])
sub.head(10)

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.0001088201,0.112157,2.492713e-05,0.000439,0.005048,0.001525,0.000132,0.00054,0.042559,...,0.000227,5.4e-05,0.005529162,0.013911,0.013977,0.145787,0.000338,0.002054683,0.002179,0.000265
1,00102ee9d8eb90812350685311fe5890,0.0002386292,0.000675,5.904982e-05,1.1e-05,0.000807,0.000144,0.000234,0.000325,0.00015,...,0.000232,0.003723,0.0002381381,0.000193,0.000629,0.000586,0.055937,0.0001553115,0.007895,0.000285
2,0012a730dfa437f5f3613fb75efcd4ce,0.01622809,0.02049,0.007442918,0.004689,0.00503,0.00998,0.003431,0.003835,0.00866,...,0.001382,0.002253,0.005708122,0.008067,0.028801,0.006475,0.014692,0.009404751,0.005669,0.001258
3,001510bc8570bbeee98c8d80c8a95ec1,0.008543045,0.083812,0.02298049,0.007074,0.00199,0.015656,0.008332,0.012429,0.002747,...,0.001067,0.00312,0.001198975,0.000292,0.004521,0.002342,0.001413,0.009411016,0.002331,0.002478
4,001a5f3114548acdefa3d4da05474c2e,0.03482509,0.048724,0.01045887,0.003978,0.002588,0.002494,0.004769,0.000735,0.004713,...,0.002015,0.000422,0.002327428,0.001368,0.008418,0.00103,0.004012,0.002331804,0.0054,0.003898
5,00225dcd3e4d2410dd53239f95c0352f,0.0003295404,0.093452,0.0005639387,0.004062,0.001923,0.000192,0.002035,0.001668,0.005525,...,0.024099,0.000197,0.009658925,0.009216,0.003989,0.012543,0.002463,0.005717051,0.02926,0.000624
6,002c2a3117c2193b4d26400ce431eebd,0.03944378,0.051603,0.01457914,0.001961,0.001901,0.012848,0.024937,0.004418,0.006086,...,0.000676,0.023375,0.00186406,0.000422,0.003628,0.004661,0.006083,0.00217079,0.002356,0.006403
7,002c58d413a521ae8d1a5daeb35fc803,9.46467e-07,0.000174,4.205222e-08,4e-06,0.000105,9e-06,0.000578,9.6e-05,3e-06,...,7.4e-05,3.2e-05,3.139132e-07,7e-06,2e-06,0.000233,0.532095,7.513113e-07,0.030788,0.001784
8,002f80396f1e3db687c5932d7978b196,0.001155102,0.007629,0.04484331,0.006758,0.008428,0.001664,0.004302,0.008296,0.001908,...,0.00101,0.00072,0.002890339,0.000868,0.037786,0.004499,0.005181,0.007163113,0.000403,0.000532
9,0036c6bcec6031be9e62a257b1c3c442,0.00141159,0.014303,0.002637628,0.001575,0.001414,0.002967,0.002179,0.000197,0.005855,...,0.004959,0.000119,0.02546252,0.005061,0.030527,0.007271,0.001048,0.0003186298,0.001383,0.002792


# References

In [40]:
# Dataset
# https://www.kaggle.com/c/dog-breed-identification/data


# Kernel-1 link : https://www.kaggle.com/gaborfodor/dog-breed-pretrained-keras-models-lb-0-3
# Kernel-2 link :  https://www.kaggle.com/orangutan/keras-vgg19-starter


# References
# Docker-python : https://github.com/kaggle/docker-python

# Licenses

<a rel="license" href="http://creativecommons.org/licenses/by/3.0/us/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by/3.0/us/88x31.png" /></a><br>The text in the document by NISHANT GOHEL and KARAN BHAVSAR is licensed under <a rel="license" href="http://creativecommons.org/licenses/by/3.0/us/">Creative Commons Attribution 3.0 United States License</a>.<br><br>


The code in the document by NISHANT GOHEL and KARAN BHAVSAR is licensed under the MIT License https://opensource.org/licenses/MIT