In [4]:
import pandas as pd
import numpy as np

In [5]:
df = pd.read_csv('/kaggle/input/100-bird-species/birds.csv')

In [6]:
df.head()

In [7]:
df.shape

In [8]:
df.isna().sum()

In [9]:
df['data set'].unique()

In [10]:
X_train = df[df['data set'] == 'train']['filepaths']
y_train = df[df['data set'] == 'train']['labels']
X_valid = df[df['data set'] == 'valid']['filepaths']
y_valid = df[df['data set'] == 'valid']['labels']
X_test = df[df['data set'] == 'test']['filepaths']
y_test = df[df['data set'] == 'test']['labels']

In [11]:
df_train = df[df['data set'] == 'train']
df_valid = df[df['data set'] == 'valid']
df_test = df[df['data set'] == 'test']

In [12]:
df_train.shape

In [13]:
df_train.head()

In [14]:
df_train['labels'].nunique()

Check if the dataset is balanced

In [15]:
df_train['labels'].value_counts()

The class the most represented contains twice more oservations than the class the less represented

In [16]:
import matplotlib.pyplot as plt
import random

In [17]:
fig, ax = plt.subplots(1, 5, figsize=(15,5))
for i in range(5):
    plt.subplot(1, 5, i+1)
    random_index = random.choice(X_train.index)
    img = plt.imread('/kaggle/input/100-bird-species/' + X_train[random_index])
    ax[i] = plt.imshow(img)
    plt.title(y_train[random_index])
    plt.tight_layout()

In [18]:
from keras.models import Model
from keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, BatchNormalization, Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.applications.vgg16 import VGG16, preprocess_input

In [19]:
train_datagen = ImageDataGenerator(
    featurewise_center=False, 
    samplewise_center=False,
    featurewise_std_normalization=False, 
    samplewise_std_normalization=False,
    zca_whitening=False, 
    zca_epsilon=1e-06, 
    rotation_range=30, 
    width_shift_range=0.0,
    height_shift_range=0.0, 
    brightness_range=None, 
    shear_range=0.0, 
    zoom_range=0.2,
    channel_shift_range=0.0, 
    fill_mode='nearest', 
    cval=0.0,
    horizontal_flip=False, 
    vertical_flip=False, 
    rescale=None,
    preprocessing_function=preprocess_input, 
    data_format=None, 
    validation_split=0.0, 
    dtype=None
)

In [20]:
valid_datagen = ImageDataGenerator(
        preprocessing_function=preprocess_input)

In [21]:
train_generator = train_datagen.flow_from_directory(
    directory = '../input/100-bird-species/train',
    target_size=(224, 224)
    )

In [22]:
valid_generator = valid_datagen.flow_from_directory(
    directory = '../input/100-bird-species/valid',
    target_size=(224, 224)
    )

In [23]:
early_stop = EarlyStopping(monitor='val_accuracy',patience=5, restore_best_weights=True)

In [24]:
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.2, patience=3, min_lr=0.00001, mode='max', verbose=1)

In [25]:
vgg16_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
vgg16_model.trainable = False

In [26]:
inp = Input(shape=(224, 224, 3))
mod = vgg16_model(inp)               
flatten = Flatten()(mod)                     
hidden = Dense(1024)(flatten)     
out = Dense(325, activation='softmax')(hidden)                    
model = Model(inputs=inp, outputs=out)                   

In [28]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])                                                                                                                                                                         

In [29]:
model.fit(train_generator, validation_data=valid_generator , epochs=50, callbacks=[reduce_lr, early_stop])                                                                                                                         

In [30]:
model.save('bird_species.h5')                                                     

In [31]:
import os 
os.chdir(r'/kaggle/working')
from IPython.display import FileLink 
FileLink('bird_species.h5')