In [None]:
import os
import gc
import random
import numpy as np 
import pandas as pd 
import lightgbm as lgb
import tensorflow as tf
from tensorflow.keras import Sequential,Model
from tensorflow.keras.layers import Dense,Conv2D,Flatten,Dropout, Input, Concatenate, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold,train_test_split
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

img_size = 299
Batch_size = 32
Q = 20

seed = 91
np.random.seed(seed)
random.seed(seed)
tf.random.set_seed(seed)


In [None]:
#读取数据 可以修改名字和ID方面
df = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/train.csv")
df_test = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/test.csv")

Id = df_test["Id"].copy()
df['stratify_label'] = pd.qcut(df['Pawpularity'], q = Q, labels = range(Q))

df["Id"] = df["Id"].apply(lambda x : "/kaggle/input/petfinder-pawpularity-score/train/" + x + ".jpg")
df_test["Id"] = df_test["Id"].apply(lambda x : "/kaggle/input/petfinder-pawpularity-score/test/" + x + ".jpg")

In [None]:
df.head()

In [None]:
#加相关的数据分析
sns.set(rc={'figure.figsize':(15,5)})
fig = plt.figure()
sns.histplot(data=df, x='Pawpularity', bins=100)
plt.axvline(df['Pawpularity'].mean(), c='red', ls='-', lw=3, label='Mean Pawpularity')
plt.axvline(df['Pawpularity'].median(),c='blue',ls='-',lw=3, label='Median Pawpularity')
plt.title('Distribution of Pawpularity Scores', fontsize=20, fontweight='bold')
plt.legend()
plt.show()

In [None]:
# Augmenting the image
def image_preprocess(is_labelled):  
    def augment(image):
        image = tf.image.random_flip_left_right(image) #左右翻转
        image = tf.image.random_saturation(image, 0.95, 1.05) #调整图片饱和度
        image = tf.image.random_contrast(image, 0.95, 1.05) #调整对比度
        image = tf.image.random_brightness(image, 0.05) #调整图片亮度
        image = tf.image.random_hue(image, 0.05) #调整色度
        return image
    
    def can_be_augmented(img, label):
        return augment(img), label
    
    return can_be_augmented if is_labelled else augment



def image_read(is_labelled):
    def decode(path):
        image = tf.io.read_file(path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize(image, (img_size, img_size))
        image = tf.keras.applications.efficientnet.preprocess_input(image) 
        return image
    
    def can_be_decoded(path, label):
        return decode(path), label
    
#   If record has label both image and lable will be returned

    return can_be_decoded if is_labelled else decode


# Creating the dataset
def create_dataset(df, batch_size, is_labelled = False, augment = False, shuffle = False):
    image_read_fn = image_read(is_labelled)
    image_preprocess_fn = image_preprocess(is_labelled)
    
    if is_labelled:
        dataset = tf.data.Dataset.from_tensor_slices((df["Id"].values, df["Pawpularity"].values))
    else:
        dataset = tf.data.Dataset.from_tensor_slices((df["Id"].values))
    
    dataset = dataset.map(image_read_fn, num_parallel_calls=AUTOTUNE)
    dataset = dataset.map(image_preprocess_fn, num_parallel_calls=AUTOTUNE) if augment else dataset
    dataset = dataset.shuffle(1024, reshuffle_each_iteration=True) if shuffle else dataset
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [None]:
#划分训练验证集 感觉可以调整
trn = df.iloc[:9000]  #大概90%
val = df.iloc[9001:]
#对三种数据作不同的预处理
train = create_dataset(trn, Batch_size, is_labelled = True, augment = True, shuffle = True)
validation = create_dataset(val, Batch_size, is_labelled = True, augment = False, shuffle = False)
test = create_dataset(df_test, Batch_size, is_labelled = False, augment = False, shuffle=False)

In [None]:
def unfreeze_model(model):
    # Unfreeze layers while leaving BatchNorm layers frozen
    for layer in model.layers:
        if not isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = True
        else:
            layer.trainable = False

In [None]:
img_mod = "/kaggle/input/keras-applications-models/EfficientNetB7.h5"
#img_mod = "/kaggle/input/keras-applications-models/DenseNet169.h5"
#efnet = tf.keras.applications.EfficientNetB0(weights='/kaggle/input/keras-applications-models/EfficientNetB0.h5', 
                                             #include_top=False, input_shape=(224, 224, 3), pooling='avg')                        
                
efnet = tf.keras.models.load_model(img_mod)
efnet.trainable = False
#unfreeze_model(efnet)  
#base_model.layers[-2].output
#tf.keras.utils.plot_model(efnet, show_shapes=True)

In [None]:
model = Sequential([
    Input(shape=(img_size, img_size, 3)),
    efnet,
    BatchNormalization(), #BN层
    Dropout(0.3),  #Dropout
    Dense(units = 64, activation="relu"), #全连接
    Dense(units = 1, activation="relu")
])

tf.keras.utils.plot_model(model, show_shapes=True)

In [None]:
# Set Callbacks
def model_checkpoint(fold):
    return tf.keras.callbacks.ModelCheckpoint(f'feature_model_{fold}.h5',
                                              verbose = 1, 
                                              monitor = 'val_root_mean_squared_error', 
                                              mode = 'min', 
                                              save_weights_only = True,
                                              save_best_only = True)

In [None]:
lr_schedule = ExponentialDecay(
    initial_learning_rate=1e-3,
    decay_steps=100, decay_rate=0.96,
    staircase=True)

model.compile(loss="mse", 
              optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule), #adam方法 学习率如上设置
              metrics=[tf.keras.metrics.RootMeanSquaredError()]) #RMSE计算

all_val_rmse = []

In [None]:
    # Stratified Training 
    kfold = StratifiedKFold(n_splits = 5, shuffle = True, random_state = seed)
    for fold, (train_index, val_index) in enumerate(kfold.split(df.index, df['stratify_label'])):
        print(f'\n===== Fold {fold}\n')

        # Pre model.fit cleanup
        tf.keras.backend.clear_session()
        gc.collect()

        # Fit Model      
        predictor = model.fit(train,
                      epochs=10, 
                      steps_per_epoch = trn.shape[0] // 32,
                      validation_steps = val.shape[0] // 32,
                      callbacks = [model_checkpoint(fold)],
                      validation_data = validation,
                      verbose = 1) 
        
        #validation_data = validation, #验证数据
        #callbacks=[early_stopping]
        
        # Validation Information
        best_val_rmse = min(predictor.history['val_root_mean_squared_error'])
        all_val_rmse.append(best_val_rmse)
        print(f'\nValidation RMSE: {best_val_rmse}\n')

    # Summary
    print(f'Final Mean RMSE for 5 Fold CV Training: {np.mean(all_val_rmse)}')

In [None]:
#early_stopping = EarlyStopping(min_delta=1e-4,patience =5,restore_best_weights=True)

#predictor = model.fit(train,epochs=20, 
                      #validation_data = validation, #验证数据
                      #callbacks=[early_stopping])

In [None]:
pred = model.predict(test)
final=pd.DataFrame()
final['Id']=Id
final['Pawpularity']=pred
final.to_csv('submission.csv',index=False)

In [None]:
final.head(8) #查看预测结果