In [None]:
# Basic libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Neural network libraries
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras
from tensorflow.keras import layers, Sequential
import tensorflow.keras.layers.experimental.preprocessing as preprocessing
from tensorflow.keras.callbacks import EarlyStopping

# Reading images and creating video libraries
import cv2
import os

# Forest libraries
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt, inf, pi

In [None]:
train = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
train = train.set_index('Id')
targets = train['Pawpularity']
train = train.drop('Pawpularity', axis=1)
preds = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')
preds = preds.set_index('Id')
test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
test = test.set_index('Id')

In [None]:
# specify your image path
def load_imgs(idx, train=True):
    if train:
        image_path = os.path.join(
            '../input/petfinder-pawpularity-score/train/', idx+'.jpg'
        )
    else:
        image_path = os.path.join(
            '../input/petfinder-pawpularity-score/test/', idx+'.jpg'
        )
    return cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)

In [None]:
plt.imshow(load_imgs('0007de18844b0dbbb5e1f607da0606e0'))
plt.show()

In [None]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels=None, batch_size=256, 
                 dim=(512,512), n_channels=3, shuffle=True, 
                 is_train=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.is_train = (labels is not None)
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        list_IDs_temp = self.list_IDs[index*self.batch_size:(index+1)*self.batch_size]

        X = self.__data_generation(list_IDs_temp)
        # Generate data
        if self.is_train:
            X2 = train.loc[list_IDs_temp]
            y = self.labels[index*self.batch_size:(index+1)*self.batch_size]
            return ((np.array(X), np.array(X2)), np.array(y))
        else:
            X2 = test.loc[list_IDs_temp]
            return ((np.array(X), np.array(X2)), np.zeros((self.batch_size,))) # TF somehow needs target for test when predicting

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels), dtype='uint8')

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            idx = str(ID)
            img = load_imgs(idx, train=self.is_train)
            img = cv2.resize(img, dsize=self.dim, interpolation=cv2.INTER_LINEAR)
            img = np.array(img, dtype='uint8') 
            X[i,] = img
        
        return X

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(np.array(targets.index), targets,
                                                 test_size=0.2, random_state=0)

In [None]:
dim = (512,512)
batch_size = 8
X_train = X_train[0:(len(X_train) - len(X_train)%batch_size)]
X_val = X_val[0:(len(X_val)- len(X_val)%batch_size)]
y_train = y_train[0:(len(y_train)- len(y_train)%batch_size)]
y_val = y_val[0:(len(y_val)- len(y_val)%batch_size)]

train_dataset = DataGenerator(X_train, y_train, batch_size=batch_size, dim=dim)
val_dataset = DataGenerator(X_val, y_val, batch_size=batch_size, dim=dim)
test_dataset = DataGenerator(np.array(preds.index), batch_size=1, dim=dim)

In [None]:
!pip install efficientnet &> /dev/null

In [None]:
_="""!mkdir model
!mkdir model/variables
!cp ../input/weights/saved_model.pb model
!cp ../input/weights/variables.data-00000-of-00001 model/variables
!cp ../input/weights/variables.index model/variables"""

In [None]:
a = train_dataset.__getitem__(0)

In [None]:
plt.imshow(a[0][0][0])

In [None]:
plt.imshow(tf.image.flip_left_right(a[0][0][0]))
plt.show()

In [None]:
import efficientnet.tfkeras as efn

i = 7
with tf.device('/gpu:0'):
    def newLayer(inp, neurons, drop):
        x = layers.BatchNormalization()(inp)
        x = layers.Dense(neurons)(x)
        x = layers.Dropout(drop)(x)
        return layers.Activation('relu')(x)
    
    def augmentation(img):
        img = tf.cast(img, tf.float32) 
        return (img,
                tf.image.flip_left_right(img))
    
    def effNet(imgs):
        out = []
        eff = efn.EfficientNetB0(include_top=False, pooling='avg')
        for img in imgs:
            out.append(eff(img))
        return layers.concatenate(out, axis=1)
    
    def build_model(inp):
        X = inp[0]
        x1 = augmentation(X)
        x1 = effNet(x1)
        x1 = layers.Dense(128)(x1)
        x1 = layers.Dropout(0.1)(x1)
        x2 = inp[1]
        x = layers.concatenate([x1, x2], axis=1)
        x = layers.Dense(64, activation='relu')(x)
        x = layers.Dense(1)(x)
        return tf.clip_by_value(x, 1, 100)

    def old_model(inp):
        X = inp[0]
        x = efn.EfficientNetB0(include_top=False, pooling='avg')(X)
        return layers.Dense(1, activation='sigmoid')(x)
    
    optimizers = []
    initial_learning_rate = 0.01
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate,
        decay_steps=100,
        decay_rate=0.99,
        staircase=True)
    optimizers.append(('exponential',(tf.keras.optimizers.SGD(learning_rate=lr_schedule)))) # 19.28
    optimizers.append(('adam', tf.keras.optimizers.Adam(learning_rate=0.0005))) # 19.52
    optimizers.append(('adagrad', keras.optimizers.Adagrad(lr=0.01))) # 19.39
    optimizers.append(('adadelta', keras.optimizers.Adadelta(lr=1.0, rho=0.95))) # 19.40
    optimizers.append(('rmsprop', keras.optimizers.RMSprop(lr=0.001, rho=0.9))) # 20.02
    
    histories = []
    for name, optimizer in optimizers:
        earlyStopping = EarlyStopping(patience=3, min_delta=0.01, 
                                      verbose=1, restore_best_weights=False,
                                      baseline=None)

        checkpoint_path = "model_"+name+"_"+str(i)
        checkpoint_dir = os.path.dirname(checkpoint_path)
        # model = tf.keras.models.load_model(checkpoint_path)

        # Create a callback that saves the model's weights
        cp_callback = keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                     save_weights_only=False,
                                                     save_best_only=True,
                                                     verbose=1)
    
        inp_images = layers.Input(shape=(*dim, 3))
        inp_tabular = layers.Input(shape=(train.shape[1],))
        inp = (inp_images, inp_tabular)
        model = keras.Model(inputs=inp, outputs=build_model(inp))
        model.compile(
            optimizer=optimizer, 
            loss='mse',
            metrics=[tf.keras.metrics.RootMeanSquaredError()]
            )

        history = model.fit(train_dataset, validation_data=val_dataset,
                            epochs=100, callbacks=[earlyStopping, cp_callback])
        histories.append((history, name))

In [None]:
fig, ax = plt.subplots(1,len(histories), figsize=(20,3))
# Defining custom 'xlim' and 'ylim' values.
custom_ylim = (0, 0.24)

# Setting the values for all axes.
plt.setp(ax, ylim=custom_ylim)
for k, history in enumerate(histories):
    ax[k].plot(history[0].history["root_mean_squared_error"])
    ax[k].plot(history[0].history["val_root_mean_squared_error"])
    ax[k].title.set_text(history[1])
    ax[k].legend(['train', 'val'])
plt.show()

In [None]:
models_path = '../input/modelspawpularity/'
models = ['model_exponential_6', 'model_adam_6', 'model_adadelta_6', 'model_adagrad_6', 'model_rmsprop_6']
# models = [models_path + model for model in models]
# labels_train = np.zeros((X_train.shape[0], 1))
labels_val = np.zeros((X_val.shape[0], 1))
labels_test = np.zeros((test.shape[0], 1))
for name in models:
    print(name)
    model = tf.keras.models.load_model(name)
    # labels_train += model.predict(train_dataset) * 100
    labels_val += model.predict(val_dataset) * 100
    labels_test += model.predict(test_dataset, batch_size=1) * 100
# labels_train /= len(models)
labels_val /= len(models)
labels_test /= len(models)

In [None]:
error = sqrt(mean_squared_error(y_val * 100, labels_val))
print('Error (ensemble):', error)

In [None]:
preds['Pawpularity'] = labels_test
preds.to_csv('submission.csv')

In [None]:
_="""X_train = train.loc[list(X_train)]
X_val = train.loc[list(X_val)]
y_train = targets.loc[list(X_train.index)]
y_val = targets.loc[list(X_val.index)]"""

In [None]:
_="""X_train['label'] = labels_train
X_val['label'] = labels_val
test['label'] = labels_test"""

In [None]:
_="""n_estimators = np.linspace(100, 1000, 10)
max_depths = range(1,20,2)
best_model = (None, None)
min_error = inf
for n in n_estimators:
    print('estimators:', n)
    for d in max_depths:
        print('    depth:', d)
        rf = RandomForestRegressor(n_estimators=int(n), max_features='sqrt',
                                  max_depth=d)
        rf.fit(X_train, y_train)
        pred = rf.predict(X_val)
        error = sqrt(mean_squared_error(y_val, pred))
        if error < min_error:
            best_model = (n, d)
            min_error = error
        print('        Error:', error)"""

In [None]:
_="""print('Best error:', min_error)
print('Best n_estimator:', best_model[0])
print('Best depth:', best_model[1])"""

In [None]:
_="""n_estimators = np.linspace(100, 1000, 10)
learning_rates = np.logspace(-1, -4, 10)
best_model = (None, None)
min_error = inf
for n in n_estimators:
    print('estimators:', n)
    for l in learning_rates:
        print('    learning_rate:', l)
        xgb = XGBRegressor(n_estimators=int(n), learning_rate=l)
        xgb.fit(X_train, y_train)
        pred = xgb.predict(X_val)
        error = sqrt(mean_squared_error(y_val, pred))
        if error < min_error:
            best_model = (n, l)
            min_error = error
        print('        Error:', error)"""

In [None]:
_="""print('Best error:', min_error)
print('Best n_estimator:', best_model[0])
print('Best learning rate:', best_model[1])"""

In [None]:
_="""n_estimators = np.linspace(100, 1000, 10)
learning_rates = np.logspace(-1, -4, 10)
best_model = (None, None, None)
min_error = inf
for n in n_estimators:
    print('estimators:', n)
    for l in learning_rates:
        print('    learning_rate:', l)
        for d in max_depths:
            print('        depth:', d)
            lgbm = LGBMRegressor(n_estimators=int(n), learning_rate=l, max_depth=d)
            lgbm.fit(X_train, y_train)
            pred = lgbm.predict(X_val)
            error = sqrt(mean_squared_error(y_val, pred))
            if error < min_error:
                best_model = (n, l, d)
                min_error = error
            print('            Error:', error)"""

In [None]:
_="""print('Best error:', min_error)
print('Best n_estimator:', best_model[0])
print('Best learning rate:', best_model[1])
print('Best depth:', best_model[2])"""

In [None]:
_="""n_estimators = np.linspace(100, 1000, 10)
learning_rates = np.logspace(-1, -4, 10)
best_model = (None, None)
min_error = inf
for n in n_estimators:
    print('estimators:', n)
    for l in learning_rates:
        print('    learning_rate:', l)
        cat = CatBoostRegressor(n_estimators=int(n), learning_rate=l,
                               verbose=0)
        cat.fit(X_train, y_train)
        pred = cat.predict(X_val)
        error = sqrt(mean_squared_error(y_val, pred))
        if error < min_error:
            best_model = (n, l)
            min_error = error
        print('        Error:', error)"""

In [None]:
_="""print('Best error:', min_error)
print('Best n_estimator:', best_model[0])
print('Best learning rate:', best_model[1])"""

In [None]:
_="""train = pd.concat([X_train,X_val])
targets = pd.concat([y_train,y_val])"""

In [None]:
_="""from sklearn.ensemble import StackingRegressor, VotingRegressor

X_train, X_val, y_train, y_val = train_test_split(train, targets, 
                                                 random_state=1, test_size=0.2)"""

In [None]:
_="""stacking = StackingRegressor(
    estimators=[
        ('rf', RandomForestRegressor(n_estimators=800, max_features='sqrt',
                                    max_depth=7)),
        #('xgb', XGBRegressor(n_estimators=700, learning_rate=0.004641588833612782)),
        ('lgbm', LGBMRegressor(n_estimators=1000, learning_rate=0.001, max_depth=9)),
        ('cat', CatBoostRegressor(n_estimators=500, learning_rate=0.0021544346900318843,
                                 verbose=0))
    ],
    final_estimator=RandomForestRegressor(n_estimators=800, max_features='sqrt',
                                    max_depth=7)
)
stacking.fit(X_train, y_train)
pred = stacking.predict(X_val)
print('Error:', sqrt(mean_squared_error(y_val, pred)))"""

In [None]:
_="""voting = VotingRegressor(
    estimators=[
        #('rf', RandomForestRegressor(n_estimators=800, max_features='sqrt',
                                    #max_depth=7)),
        ('xgb', XGBRegressor(n_estimators=700, learning_rate=0.004641588833612782)),
        ('lgbm', LGBMRegressor(n_estimators=1000, learning_rate=0.001, max_depth=9)),
        ('cat', CatBoostRegressor(n_estimators=500, learning_rate=0.0021544346900318843,
                                 verbose=0))
    ]
)
voting.fit(X_train, y_train)
pred = voting.predict(X_val)
print('Error:', sqrt(mean_squared_error(y_val, pred)))"""

In [None]:
_="""stacking.fit(train, targets)
preds['Pawpularity'] = stacking.predict(test)
preds.to_csv('submission.csv')"""