In [1]:
# Настройки нейросети
img_size = 224 # Размер изображения
num_class = 120 # Кол-во классов
batch_size = 32
Epochs = 64

In [2]:
# for garbage collection
import gc

# for warnings
import warnings
warnings.filterwarnings("ignore")

# utility libraries
import os
import copy
import tqdm
import numpy as np 
import pandas as pd 
import cv2, random, time, shutil, csv
import tensorflow as tf
import math

# keras libraries
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import BatchNormalization, Dense, GlobalAveragePooling2D, Lambda, Dropout, InputLayer, Input
from keras.utils import to_categorical
from keras import backend as K

In [3]:
import tensorflow.keras as keras
from keras import regularizers
from keras.models import Model
from keras.models import Sequential
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.metrics import categorical_accuracy, categorical_crossentropy

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.inception_v3 import InceptionV3

In [4]:
# set image size here
data_dir = '../input/dog-breed-identification'
data_df = pd.read_csv(os.path.join(data_dir, 'labels.csv'))
class_names = sorted(data_df['breed'].unique())
print(f"No. of classes read - {len(class_names)}")
time.sleep(1)

images_list = sorted(os.listdir(os.path.join(data_dir, 'train')))
X = []
Y = []
i = 0
for image in tqdm.tqdm(images_list):
    cls_name = data_df[data_df['id'] == image[:-4]].iloc[0,1]
    cls_index = int(class_names.index(cls_name)) 

    # Reading RGB Images
    image_path = os.path.join(data_dir, 'train',image)
    orig_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    res_image = cv2.resize(orig_image,(img_size, img_size))
    X.append(res_image)
    Y.append(cls_index)
    i+=1

No. of classes read - 120


100%|██████████| 10222/10222 [02:07<00:00, 80.29it/s]


In [5]:
# Converting to arrays
print(len(X), len(Y))
Xarr = np.array(X)
Yarr = np.array(Y).reshape(-1,1)

# converting labels to one hot
Yarr_hot = to_categorical(Y)

del(X)
print(Xarr.shape, Yarr.shape, Yarr_hot.shape)
gc.collect()

10222 10222
(10222, 224, 224, 3) (10222, 1) (10222, 120)


68

In [6]:
X_train, X_valid, Y_train, Y_valid = train_test_split(Xarr, Yarr_hot, shuffle=True,  test_size=0.2)

In [7]:
print(X_train.shape, Y_train.shape)
print(X_valid.shape,Y_valid.shape)

(8177, 224, 224, 3) (8177, 120)
(2045, 224, 224, 3) (2045, 120)


In [8]:
del Xarr, Yarr_hot, Yarr

In [9]:

train_datagen = ImageDataGenerator(rescale=1./255
                            #rotation_range=35, #поворот
                            #horizontal_flip=True,
                            #vertical_flip=True,
                            #shear_range=15
                                )

test_datagen = ImageDataGenerator(rescale=1./255)
train_datagen.fit(X_train)
test_datagen.fit(X_valid)

# Создаем генераторы 
train_generator =train_datagen.flow(X_train, Y_train, 
                               batch_size=batch_size)

test_generator =test_datagen.flow(X_valid, Y_valid,batch_size=batch_size)


In [10]:
x,y = next(train_generator)
print(type(x))
print(x.shape)
print(y.shape)
del x,y

<class 'numpy.ndarray'>
(32, 224, 224, 3)
(32, 120)


## Создаем модель нейросети

In [11]:
shape=[img_size,img_size,3]
pretrained_model = tf.keras.applications.InceptionV3(
        weights='imagenet',
        include_top=False ,
        input_shape=shape
    )
pretrained_model.trainable = False
import tensorflow as tf   
model = tf.keras.Sequential([ 
        pretrained_model,   
        tf.keras.layers.GlobalAveragePooling2D(),
        #tf.keras.layers.Dense(256, activation='relu'),
        #tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(120, activation='softmax')
    ])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [12]:
#opt = tf.keras.optimizers.Adam(learning_rate=0.001)
opt=tf.keras.optimizers.SGD(lr=1e-3, momentum=0.9)
model.compile(optimizer = opt ,
              loss="categorical_crossentropy",
              metrics=["accuracy"])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_v3 (Functional)    (None, 5, 5, 2048)        21802784  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 120)               245880    
Total params: 22,048,664
Trainable params: 245,880
Non-trainable params: 21,802,784
_________________________________________________________________


In [13]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
#reduce = tf.keras.callbacks.ReduceLROnPlateau( monitor='val_loss',factor=0.2,patience=5, min_lr=0.001 )

early = EarlyStopping( patience=2,
                                          min_delta=0.001,
                                          restore_best_weights=True)
checkpoint_callback = ModelCheckpoint('model_best.hdf5',
                                      monitor='val_categorical_accuracy', 
                                      #save_best_only=True
                                    mode='auto'
                                     )

## Обучаем нейросеть

In [14]:
history = model.fit(train_generator,
                    steps_per_epoch=train_generator.n //train_generator.batch_size,
                    validation_data=test_generator,
                    validation_steps=test_generator.n //test_generator.batch_size,
                    epochs=25,
                    callbacks=[early,checkpoint_callback]
                   )


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25


In [15]:
del X_train, X_valid, Y_train, Y_valid
gc.collect()

92

# Выберем лучшую модель и получим результаты для тестов

In [16]:
images_list = sorted(os.listdir(os.path.join(data_dir, 'test')))
x_test=[]
for image in tqdm.tqdm(images_list):    
    image_path = os.path.join(data_dir, 'test',image)
    orig_image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    res_image = cv2.resize(orig_image,(img_size, img_size))
    x_test.append(res_image)
x_test=np.array(x_test)
print(x_test.shape)

100%|██████████| 10357/10357 [01:32<00:00, 111.65it/s]


(10357, 224, 224, 3)


In [17]:
from tensorflow.keras.models import load_model
model=load_model("model_best.hdf5")

In [18]:
preds = model.predict(x_test)

In [19]:
labels_csv='../input/dog-breed-identification/labels.csv'
sample_submission_csv='../input/dog-breed-identification/sample_submission.csv'
df_train = pd.read_csv(labels_csv)
df_test = pd.read_csv(sample_submission_csv)
labels = df_train['breed']
one_hot = pd.get_dummies(labels, sparse = True)
one_hot_labels = np.asarray(one_hot)

In [20]:
sub = pd.DataFrame(preds)
col_names = one_hot.columns.values
sub.columns = col_names
sub.insert(0, 'id', df_test['id'])
sub.head(5)

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,1.118144e-32,9.488521e-25,2.6140699999999998e-20,1.811031e-35,0.0,2.709523e-30,1.321051e-37,0.0,0.0,...,0.0,5.390414e-18,2.73775e-24,0.0,0.0,0.0,7.383261e-35,0.0,3.0186820000000003e-22,1.135823e-30
1,00102ee9d8eb90812350685311fe5890,1.1449809999999999e-36,2.1021580000000003e-25,2.541121e-23,2.9561940000000002e-33,0.0,9.614055e-29,8.176366e-34,0.0,0.0,...,0.0,5.058703e-13,4.06774e-15,0.0,0.0,0.0,1.2130590000000002e-23,0.0,1.551857e-16,2.724693e-26
2,0012a730dfa437f5f3613fb75efcd4ce,8.330945e-24,2.658752e-16,4.388193e-12,1.435298e-21,4.763792e-31,3.6388120000000004e-17,5.08588e-27,1.1703200000000001e-23,2.926279e-31,...,4.522573e-29,2.645058e-10,2.184552e-14,2.448152e-24,6.063078e-22,1.958038e-35,5.9180589999999995e-24,2.3270520000000003e-23,1.781344e-14,7.726285e-18
3,001510bc8570bbeee98c8d80c8a95ec1,5.5111910000000006e-33,2.4275940000000003e-23,4.110374e-19,5.694935e-23,0.0,7.786176999999999e-19,1.15661e-35,1.8430870000000001e-22,1.73589e-30,...,6.355688e-35,5.2847400000000004e-17,9.770047e-10,0.0,5.376405e-25,3.767329e-36,2.974929e-23,2.52045e-30,7.200435e-18,2.458639e-22
4,001a5f3114548acdefa3d4da05474c2e,1.372286e-30,6.416234e-18,1.146366e-07,3.688759e-14,0.0,1.1601589999999998e-19,2.0367300000000001e-22,4.646791e-25,3.321902e-24,...,3.362306e-30,0.0002365591,2.947209e-13,7.280829e-29,1.488762e-23,3.5122209999999995e-36,1.4154169999999998e-20,1.1748900000000001e-22,1.509163e-16,7.323585e-11


In [21]:
sub.to_csv("output_rmsprop_aug.csv", index=False)