### Importing packages

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import tensorflow as tf

### Load data

In [2]:
df_labels = pd.read_csv("/kaggle/input/dog-breed-identification/labels.csv")
df_labels['id'] = df_labels['id'] + '.jpg'
df_labels.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07.jpg,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97.jpg,dingo
2,001cdf01b096e06d78e9e5112d419397.jpg,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d.jpg,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62.jpg,golden_retriever


### Splitting data

In [3]:
from sklearn.model_selection import train_test_split
df_train, df_validation = train_test_split(df_labels, test_size=0.2, shuffle=True)

### Verifying data and generating images for testing

In [4]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(
        rescale=1./255)

training_set = train_datagen.flow_from_dataframe(
    dataframe=df_train,
    directory='/kaggle/input/dog-breed-identification/train',
    x_col="id",
    y_col="breed",
    target_size=(150, 150),
    class_mode="categorical",
    batch_size=128
)

validation_set = validation_datagen.flow_from_dataframe(
    dataframe=df_validation,
    directory='/kaggle/input/dog-breed-identification/train',
    x_col="id",
    y_col="breed",
    target_size=(150, 150),
    class_mode="categorical",
    batch_size=128
)

Found 8177 validated image filenames belonging to 120 classes.
Found 2045 validated image filenames belonging to 120 classes.


In [5]:
classes = df_labels['breed'].unique().shape[0]
print("Total breeds: ", classes)

Total breeds:  120


### Model

In [6]:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

model = Sequential()


model.add(Conv2D(32, (3, 3),input_shape=(150,150,3), activation = 'relu'))

model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(32, (3, 3), activation = 'relu'))

model.add(Flatten())
model.add(Dropout(0.25))

model.add(Dense(units=128, activation='relu'))
model.add(Dense(units=classes, activation='softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 32)        9248      
_________________________________________________________________
flatten (Flatten)            (None, 165888)            0         
_________________________________________________________________
dropout (Dropout)            (None, 165888)            0         
_________________________________________________________________
dense (Dense)                (None, 128)               21233792  
_________________________________________________________________
dense_1 (Dense)              (None, 120)               1

In [7]:
model.compile(optimizer='adam',
             loss='categorical_crossentropy',
             metrics=['accuracy'])

In [8]:
model.fit(x = training_set, validation_data = validation_set, epochs = 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f22181a7d50>

In [9]:
test_datagen = ImageDataGenerator ( 
    rescale=1/255.
)

test_set = test_datagen.flow_from_directory (
    '/kaggle/input/dog-breed-identification',
    target_size = (150,150),
    batch_size = 128,
    classes=['test']
)

Found 10357 images belonging to 1 classes.


In [10]:
y_pred = model.predict(test_set)

In [11]:
df_submission = pd.read_csv('/kaggle/input/dog-breed-identification/sample_submission.csv')
df_submission

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
1,00102ee9d8eb90812350685311fe5890,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
2,0012a730dfa437f5f3613fb75efcd4ce,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
3,001510bc8570bbeee98c8d80c8a95ec1,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
4,001a5f3114548acdefa3d4da05474c2e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10352,ffeda8623d4eee33c6d1156a2ecbfcf8,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10353,fff1ec9e6e413275984966f745a313b0,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10354,fff74b59b758bbbf13a5793182a9bbe4,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10355,fff7d50d848e8014ac1e9172dc6762a3,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333


In [12]:
import re
file_list = test_set.filenames
id_list = []
for name in file_list:
    m = re.sub('test/', '', name)
    m = re.sub('.jpg', '', m)
    id_list.append(m)

In [13]:
df_submission['id'] = id_list
df_submission.iloc[:,1:] = y_pred
df_submission

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.000778,0.010456,0.001584,0.020046,0.004460,0.000764,0.020848,0.031571,0.005768,...,0.009700,0.000817,0.012379,0.003628,0.019346,0.004175,0.006105,0.008702,0.010559,0.025270
1,00102ee9d8eb90812350685311fe5890,0.000311,0.012927,0.001265,0.008399,0.004084,0.000665,0.004791,0.006384,0.010565,...,0.012941,0.000823,0.001661,0.004780,0.020272,0.013451,0.027438,0.005770,0.004939,0.014823
2,0012a730dfa437f5f3613fb75efcd4ce,0.002583,0.015110,0.008607,0.004603,0.011106,0.012143,0.002417,0.008066,0.008761,...,0.003767,0.010310,0.000865,0.014428,0.006181,0.011484,0.013846,0.011893,0.010097,0.004261
3,001510bc8570bbeee98c8d80c8a95ec1,0.002678,0.010223,0.005025,0.025540,0.003237,0.001350,0.031084,0.013516,0.003431,...,0.008991,0.002866,0.027735,0.002730,0.004518,0.004911,0.005014,0.009368,0.005139,0.014672
4,001a5f3114548acdefa3d4da05474c2e,0.000143,0.021761,0.000886,0.005090,0.007500,0.000837,0.001846,0.012421,0.004953,...,0.001737,0.003311,0.000749,0.006159,0.009040,0.008787,0.013418,0.007232,0.018495,0.002776
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10352,ffeda8623d4eee33c6d1156a2ecbfcf8,0.005529,0.013354,0.006102,0.030765,0.003820,0.002957,0.027233,0.011923,0.003333,...,0.010988,0.003703,0.012901,0.002912,0.006990,0.002428,0.003360,0.007028,0.004646,0.011814
10353,fff1ec9e6e413275984966f745a313b0,0.009303,0.009814,0.011813,0.013537,0.005051,0.002847,0.025217,0.012803,0.006759,...,0.017814,0.002902,0.014915,0.004332,0.004504,0.003525,0.003905,0.006195,0.003244,0.018437
10354,fff74b59b758bbbf13a5793182a9bbe4,0.000606,0.003728,0.007589,0.002903,0.021764,0.005935,0.005372,0.019444,0.013320,...,0.005316,0.021714,0.000443,0.053685,0.006973,0.016642,0.011978,0.018851,0.017428,0.003593
10355,fff7d50d848e8014ac1e9172dc6762a3,0.011282,0.014118,0.018169,0.002004,0.008311,0.019768,0.001343,0.002478,0.004113,...,0.002091,0.002425,0.000056,0.004531,0.006086,0.001304,0.001861,0.005504,0.001611,0.002998


In [14]:
final_df = df_submission.set_index('id')
final_df.to_csv('Submission.csv')