In [None]:
PATH = '/kaggle/input/petfinder-pawpularity-score'

import os
path_img_train = os.path.join(PATH, 'train')
path_img_test = os.path.join(PATH, 'test')
path_train = os.path.join(PATH, 'train.csv')
path_test = os.path.join(PATH, 'test.csv')
path_submission = os.path.join(PATH, 'sample_submission.csv')

In [None]:
import pandas as pd
df_train = pd.read_csv(path_train)
df_test = pd.read_csv(path_test)
df_train.head()

In [None]:
X_train_df = df_train.drop(['Id', 'Pawpularity'], axis=1).values.astype('float32')
y_train = df_train['Pawpularity'].values.astype('float32')

X_test_df = df_test.drop(['Id'], axis=1).values.astype('float32')

In [None]:
import numpy as np
from tensorflow import keras
import tensorflow as tf
def load_images_data(img_paths, input_shape=(150, 150)):
    images = np.zeros((len(img_paths), input_shape[0], input_shape[1], 3))
    for i, img_path in enumerate(img_paths):
        img = keras.preprocessing.image.load_img(img_path, target_size=input_shape)
        img = keras.preprocessing.image.img_to_array(img)
        images[i] = img
    return images

img_train_paths = df_train['Id'].apply(lambda x: os.path.join(path_img_train, x) + '.jpg')
img_test_paths = df_test['Id'].apply(lambda x: os.path.join(path_img_test, x) + '.jpg')

images_train = load_images_data(img_train_paths)
images_test = load_images_data(img_test_paths)


In [None]:
images_train = images_train / 255.0
images_test = images_test / 255.0

In [None]:
base_model = keras.applications.VGG16(
    include_top=False,
    weights=None
)
base_model.summary()

In [None]:
# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

# instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)



In [None]:
def make_model_1():
    base_model = keras.applications.VGG16(
        include_top=False,
        weights=None
    )
    
    # instantiating the model in the strategy scope creates the model on the TPU
    df_input = keras.layers.Input(shape=(X_train_df.shape[1],), dtype='float32', name='df')
    dense_1 = keras.layers.Dense(10, activation='relu')(df_input)
    dense_2 = keras.layers.Dense(5, activation='relu')(dense_1)
    
    image_input = keras.layers.Input(shape=(150,150,3), dtype='float32', name='image_input')
    x = base_model(image_input)

    x = keras.layers.Flatten()(x)
    
    concat = keras.layers.concatenate([dense_2, x], axis=-1)
    y = keras.layers.Dense(256, activation='linear')(concat)
    out_put = keras.layers.Dense(1, activation='linear')(y)
    
    model = keras.models.Model([df_input, image_input], out_put)
    return model

In [None]:
from tensorflow.keras import backend as K

def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true))) 
with tpu_strategy.scope():
    model_1 = make_model_1()
    model_1.compile(optimizer = keras.optimizers.RMSprop(lr=0.001), loss = root_mean_squared_error, 
                    metrics =["mse"])

my_callbacks = [
    keras.callbacks.EarlyStopping(patience=10)
]

model_1.fit([X_train_df, images_train], y_train, epochs=100, batch_size=128, steps_per_epoch=32, validation_split=0.2, callbacks=my_callbacks)

In [None]:
pred_2nn = model_1.predict([X_test_df, images_test])

In [None]:
# from tensorflow import keras


# def make_model():
#     df_input = keras.layers.Input(shape=(X_train_df.shape[1],), dtype='float32', name='df')
#     dense_1 = keras.layers.Dense(10, activation='relu')(df_input)
#     dense_2 = keras.layers.Dense(5, activation='relu')(dense_1)
    
#     image_input = keras.layers.Input(shape=(150,150,3), dtype='float32', name='image_input')
#     x = keras.layers.Conv2D(32, (3, 3), activation='relu')(image_input)
#     x = keras.layers.MaxPooling2D((2, 2))(x)
#     x = keras.layers.Conv2D(64, (3, 3), activation='relu')(x)
#     x = keras.layers.MaxPooling2D((2, 2))(x)
#     x = keras.layers.Conv2D(128, (3, 3), activation='relu')(x)
#     x = keras.layers.MaxPooling2D((2, 2))(x)
#     x = keras.layers.Conv2D(128, (3, 3), activation='relu')(x)
#     x = keras.layers.MaxPooling2D((2, 2))(x)

#     x = keras.layers.Flatten()(x)
    
#     concat = keras.layers.concatenate([dense_2, x], axis=-1)
#     y = keras.layers.Dense(256, activation='linear')(concat)
#     out_put = keras.layers.Dense(1, activation='linear')(y)
    
#     model = keras.models.Model([df_input, image_input], out_put)
#     return model

# model = make_model()

In [None]:
# from tensorflow.keras import backend as K
# def root_mean_squared_error(y_true, y_pred):
#         return K.sqrt(K.mean(K.square(y_pred - y_true))) 

# model.compile(optimizer = "rmsprop", loss = root_mean_squared_error, 
#               metrics =["mse"])

# my_callbacks = [
#     keras.callbacks.EarlyStopping(patience=5)
# ]

# model.fit([X_train_df, images_train], y_train, epochs=100, validation_split=0.2, callbacks=my_callbacks)

In [None]:
# pred1 = model.predict([X_test_df, images_test])
# pred1

## Xgboost for tableau data

In [None]:
import xgboost
from sklearn.model_selection import RandomizedSearchCV
import time

xgb_model = xgboost.XGBRegressor(n_estimators=100, eta=0.3, learning_rate=0.05)
params = {
    'learning_rate': [0.001, 0.005, 0.01, 0.05, 0.1, 0.5]
}
xgb_model.fit(X_train_df, y_train)
pred_xgb = xgb_model.predict(X_test_df)

## LightGBM

In [None]:
import lightgbm as lgb
from sklearn.model_selection import RandomizedSearchCV
import time


lgb_model = lgb.LGBMRegressor(learning_rate=0.001, n_estimators=300)
lgb_model.fit(X_train_df, y_train)
pred_lgb = lgb_model.predict(X_test_df)
# # A parameter grid for XGBoost
# params = {
#     'n_estimators':[100, 200, 300, 500, 750],
#     'learning_rate':[0.001, 0.005, 0.01, 0.05, 0.1, 0.5], 
# }

# reg = lgb.LGBMRegressor(nthread=-1)

# # run randomized search
# n_iter_search = 100
# random_search = RandomizedSearchCV(reg, param_distributions=params,
#                                    n_iter=n_iter_search, cv=5, iid=False, scoring='neg_root_mean_squared_error')

# start = time.time()
# random_search.fit(X_train_df, y_train)
# print("RandomizedSearchCV took %.2f seconds for %d candidates"
#       " parameter settings." % ((time.time() - start), n_iter_search))

# print(random_search.best_estimator_)
# print(random_search.best_score_)

In [None]:
pred_2nn

In [None]:
pred_lgb

In [None]:
pred_xgb

In [None]:
pred = (pred_2nn.reshape(-1) + pred_lgb.reshape(-1) + pred_xgb.reshape(-1))/3
import pandas as pd
submission = pd.read_csv(path_submission)
submission['Pawpularity'] = pred
submission.to_csv('submission.csv', index=False)