# Pretrained model with Keras 


### Please if this kernel is useful, <font color='red'>please upvote !!</font>

This kernel is based on: [cnn-with-keras-stater](https://www.kaggle.com/ammarnassanalhajali/cnn-with-keras-stater)



### Importing Libraries

In [None]:
import numpy as np 
import pandas as pd 
import os
import gc
import sys
import matplotlib.pyplot as plt
import matplotlib.image as mplimg
from matplotlib.pyplot import imshow
from tqdm.autonotebook import tqdm

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

import keras.backend as K
from keras.models import Sequential
from keras import layers
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from keras.layers import Input, Dense, Activation, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models import Model
from keras.models import load_model
import tensorflow as tf

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)


In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    print("Name:", gpu.name, "  Type:", gpu.device_type)
from tensorflow.python.client import device_lib

device_lib.list_local_devices()

print(tf.test.is_gpu_available())

In [None]:
train_df = pd.read_csv("../input/happy-whale-and-dolphin/train.csv")
#train_df=train_df.drop_duplicates(subset=['individual_id'],keep='last')
train_df.head()
train_df_small = train_df[:50]
print(train_df_small.image)

In [None]:
print(train_df.shape)
print(train_df_small.shape)

In [None]:
img = image.load_img('../input/happy-whale-and-dolphin/train_images/002618d6f63ebc.jpg')
img

In [None]:
img = plt.imread('../input/happy-whale-and-dolphin/train_images/002618d6f63ebc.jpg')
img.shape

In [None]:
train_images_list = os.listdir('../input/happy-whale-and-dolphin/train_images')
train_images_list

In [None]:
'''
if not os.path.exists('./kaggle/working/small'):
    os.makedirs('./kaggle/working/small')
import shutil
for i in range(50):
    shutil.copyfile('../input/happy-whale-and-dolphin/train_images/'+train_df_small.image[i],'./kaggle/working/small/'+train_df_small.image[i])
'''

In [None]:
'''
train_images_list_small = os.listdir('./kaggle/working/small')
train_images_list_small
'''

In [None]:
#for i in train_images_list_small:
    #os.remove('./kaggle/working/small/'+i)

In [None]:
'''
min_shape = 5000
max_shape = 5000
list0 = train_images_list[:10]
for i in list0:
    img = plt.imread('../input/happy-whale-and-dolphin/train_images/'+i)
    if img.shape[0]<min_shape: 
        min_shape = img.shape[0]
    if img.shape[0]>max_shape: 
        max_shape = img.shape[0]
print(min_shape) # 59(from 0-5000)
print(max_shape) # 5277
'''

### Functions

In [None]:
def Loading_Images(data, m, dataset):
    print("Loading images")
    X_train = np.zeros((m, 32, 32, 3))
    count = 0
    for fig in tqdm(data['image']):
        img = image.load_img("../input/happy-whale-and-dolphin/"+dataset+"/"+fig, target_size=(32, 32, 3))
        x = image.img_to_array(img)
        x = preprocess_input(x)
        X_train[count] = x
        count += 1
    return X_train

def prepare_labels(y):  # 先转成int编码，再转成one-hot
    values = np.array(y)
    label_encoder = LabelEncoder() # #获取一个LabelEncoder
    integer_encoded = label_encoder.fit_transform(values)  #训练LabelEncoder,使用训练好的LabelEncoder对原数据进行编码
    onehot_encoder = OneHotEncoder(sparse=False)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
    y = onehot_encoded
    return y, label_encoder

In [None]:
X = Loading_Images(train_df, train_df.shape[0], "train_images")
X /= 255

In [None]:
y, label_encoder = prepare_labels(train_df['individual_id'])

In [None]:
print(X.shape)
print(y.shape)
gc.collect()

In [None]:
#导入
from tensorflow.keras.applications import EfficientNetB0
#使用
base_model = EfficientNetB0(
               input_shape=(32,32,3), 
               weights=None,
               include_top=False)

layer = base_model.output
layer = Dense(1024, activation='relu')(layer)
layer = Flatten()(layer)
predictions = Dense(y.shape[1], activation='softmax')(layer)
# 得到新的模型
model = Model(inputs=base_model.input, outputs=predictions)

model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(X, y, epochs=150, batch_size=128, verbose=1)
model.save('./effb0_0.h5')

In [None]:
del X
del y
gc.collect()

### Evaluation

In [None]:
plt.figure(figsize=(15,5))
plt.plot(history.history['accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.show()

In [None]:
plt.figure(figsize=(15,5))
plt.plot(history.history['loss'])
plt.title('Model loss')
plt.ylabel('loss')
plt.xlabel('Epoch')
plt.show()

### inference

In [None]:
test = os.listdir("../input/happy-whale-and-dolphin/test_images")
print(len(test))

In [None]:
col = ['image']
test_df = pd.DataFrame(test, columns=col)
test_df['predictions'] = ''
#test_df=test_df.head(n=250)

In [None]:
model = load_model(r'../input/happywhaleanddolphin1/effb0_0.h5')

In [None]:
batch_size=5000
batch_start = 0
batch_end = batch_size
L = len(test_df)

while batch_start < L:
    limit = min(batch_end, L)
    test_df_batch = test_df.iloc[batch_start:limit]
    print(type(test_df_batch))
    X = Loading_Images(test_df_batch, test_df_batch.shape[0], "test_images")
    X /= 255
    predictions = model.predict(np.array(X), verbose=1)
    for i, pred in enumerate(predictions):
        p=pred.argsort()[-5:][::-1]
        idx=-1
        s=''
        s1=''
        s2=''
        for x in p:
            idx=idx+1
            if pred[x]>0.5:
                s1 = s1 + ' ' +  label_encoder.inverse_transform(p)[idx]
            else:
                s2 = s2 + ' ' + label_encoder.inverse_transform(p)[idx]
        s= s1 + ' new_individual' + s2
        s = s.strip(' ')
        test_df.loc[ batch_start + i, 'predictions'] = s
    batch_start += batch_size   
    batch_end += batch_size
    del X
    del test_df_batch
    del predictions
    gc.collect()
    

In [None]:
test_df.to_csv('submission.csv',index=False)
test_df.head()