In [None]:
import sklearn
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications.resnet50 import ResNet50
from sklearn.model_selection import KFold
from tensorflow.keras.layers.experimental.preprocessing import RandomRotation, RandomContrast
import os

In [None]:
train = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
test = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")
sub = pd.read_csv("../input/petfinder-pawpularity-score/sample_submission.csv")

In [None]:
train["file_path"] = train["Id"].apply(lambda identifier: "../input/petfinder-pawpularity-score/train/" + identifier + ".jpg")
test["file_path"] = test["Id"].apply(lambda identifier: "../input/petfinder-pawpularity-score/test/" + identifier + ".jpg")

In [None]:
train.columns

In [None]:
columns = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
       'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']

In [None]:
image_size = 128
batch_size = 256

In [None]:
def preprocess(image_url, tabular):
    image_string = tf.io.read_file(image_url)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.central_crop(image, 1.0)
    image = tf.image.resize(image, (image_size, image_size))
    return (image, tabular[1:]), tf.cast(tabular[0], tf.float32)

def build_tabular_model(inputs):
   
    x = keras.layers.Dense(12,  activation='relu')(inputs)
    x = keras.layers.Dense(64,  activation='relu')(x)
    x = keras.layers.Dense(128, activation='relu')(x)                 #加一层
    x = keras.layers.Dropout(0.3)(x)
    x = keras.layers.BatchNormalization()(x)
    #x = keras.layers.Dense(256, activation='relu')(x)   
    x = keras.layers.Dense(128, activation='relu')(x)
    x = keras.layers.Dense(64,  activation='relu')(x)
    x = keras.layers.Concatenate()([x, inputs])
    return x
def get_model():
    image_inputs = tf.keras.Input((image_size, image_size , 3))
    tabular_inputs = tf.keras.Input(len(columns))
    
    #resnet = ResNet50(include_top=False, pooling=None)
    resnet = keras.models.load_model('../input/d/aeryss/keras-pretrained-models/ResNet50V2_NoTop_ImageNet.h5')
    resnet.trainable = False
    
    image_x = resnet(RandomContrast(factor = 0.1)(RandomRotation(factor = 0.3)(image_inputs)))   # factor was 0.15
    #image_x = resnet((image_inputs))

    image_x = tf.keras.layers.GlobalAveragePooling2D()(image_x)
    
    tabular_x = build_tabular_model(tabular_inputs)
    
    x = tf.keras.layers.Concatenate(axis=1)([image_x, tabular_x])#从第n=1维进行拼接
    #x = image_x
    x = tf.keras.layers.Dense(1024)(x)
    x = keras.layers.Dropout(0.1)(x)
    x = tf.keras.layers.Dense(64)(x)
    output = tf.keras.layers.Dense(1)(x)
    model = tf.keras.Model(inputs=[image_inputs, tabular_inputs], outputs=[output]) #通过训练和推理功能将layer分组为一个对象。进行实例化，使用“API”，从开始，链接层调用以指定模型的正向传递，最后从输入和输出创建模型
    return model

def block(x, filters, kernel_size, repetitions, pool_size=2, strides=2):
    for i in range(repetitions):
        x = tf.keras.layers.Conv2D(filters, kernel_size, activation='relu', padding='same')(x)
    x = tf.keras.layers.MaxPooling2D(pool_size, strides)(x)
    return x

def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean((y_true -  y_pred) ** 2))

In [None]:
model =get_model()
tf.keras.utils.plot_model(model, show_shapes=True)
models = []
historys = []
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
train_best_fold = True
best_fold = 0
for index, (train_indices, val_indices) in enumerate(kfold.split(train)):
    if train_best_fold and index != best_fold: continue#
    x_train = train.loc[train_indices, "file_path"]
    tabular_train = train.loc[train_indices, ["Pawpularity"] + columns]
    x_val= train.loc[val_indices, "file_path"]
    tabular_val = train.loc[val_indices, ["Pawpularity"] + columns]
    checkpoint_path = "model_%d.h5"%(index)#保存模型路径
    checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_best_only=True)
    early_stop = tf.keras.callbacks.EarlyStopping(min_delta=1e-4, patience=1000)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(factor=0.3,patience=2, min_lr=1e-7)      #f was 0.3
    callbacks = [early_stop, checkpoint, reduce_lr]    
    optimizer = tf.keras.optimizers.Adam(1e-3)    
    train_ds = tf.data.Dataset.from_tensor_slices((x_train, tabular_train)).map(preprocess).shuffle(512).batch(batch_size).cache().prefetch(2)
    val_ds = tf.data.Dataset.from_tensor_slices((x_val, tabular_val)).map(preprocess).batch(batch_size).cache().prefetch(2)
    model = get_model()
    model.compile(loss = "mse", optimizer = optimizer, metrics = ["mae", rmse, "mape"])
    history = model.fit(train_ds, epochs=11, validation_data=val_ds, callbacks=callbacks, batch_size = 8)
    for metrics in [("rmse", "val_rmse"), ("loss", "val_loss"), ("mae", "val_mae"), ("mape", "val_mape"), ["lr"]]:
        pd.DataFrame(history.history, columns=metrics).plot()
        plt.show() 
    model.load_weights(checkpoint_path)
    historys.append(history)
    models.append(model)

In [None]:
def preprocess_test_data(image_url, tabular):
    print(image_url, tabular)
    image_string = tf.io.read_file(image_url)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.central_crop(image, 1.0)
    image = tf.image.resize(image, (image_size, image_size))
    # 0 won't be used in prediction, but it's needed in this senario or the tabular variable is treated as label.
    return (image, tabular), 0

test_ds = tf.data.Dataset.from_tensor_slices((test["file_path"], test[columns])).map(preprocess_test_data).batch(batch_size).cache().prefetch(2)

use_best_result = False
if use_best_result:
    if train_best_fold:
        best_model = models[0]
    else:
        best_fold = 0
        best_score = 10e8
        for fold, history in enumerate(historys):
            for val_rmse in history.history["val_rmse"]:
                if val_rmse < best_score:
                    best_score = val_rmse
                    best_fold = fold
        print("Best Score:%.2f Best Fold: %d"%(best_score, best_fold + 1))
        best_model = models[best_fold]
    results = best_model.predict(test_ds).reshape(-1)
else:
    total_results = []
    for model in models:
        total_results.append(model.predict(test_ds).reshape(-1))
    results = np.mean(total_results, axis=0).reshape(-1)

In [None]:
sub["Pawpularity"] = results
sub.to_csv("submission.csv", index=False)