# I tried to implement it with MobileNet, which no one seems to do.
### I'll include an explanatory notebook when I get better results.

# 1. Import Library

In [None]:
import os
import random

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import lightgbm as lgb
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential,Model
from tensorflow.keras.preprocessing.image import load_img, ImageDataGenerator
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Dense,Conv2D,Flatten,Dropout, Input, Concatenate, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from PIL import Image
from sklearn.model_selection import train_test_split, KFold
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from tqdm.notebook import tqdm
from catboost import CatBoostRegressor

In [None]:
def seed_everything():
    np.random.seed(123)
    random.seed(123)
    tf.random.set_seed(123)
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'
    os.environ['PYTHONHASHSEED'] = str(123)

seed_everything()

# 2.Datasets

In [None]:
# dataset

train_dir = "/kaggle/input/petfinder-pawpularity-score/train/"
test_dir = "/kaggle/input/petfinder-pawpularity-score/test/"

train_table = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/train.csv")
train_table["Id"] = train_dir + train_table["Id"] + ".jpg"

test_table = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/test.csv")
test_table["Id"] = test_dir + test_table["Id"] + ".jpg"

sample = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/sample_submission.csv")
train_table

In [None]:
test_table["Id"][0]

# 3.Generator Class
### using ImageDataGenerator

In [None]:
class MY_GENERATOR(keras.utils.Sequence):
    def __init__(self, img_pathes, y, batch_size=1, size=(224,224),augmentation=True):
        self.img_pathes = img_pathes
        self.y = np.array(y)
        self.batch_size = batch_size
        self.size=size
        self.augmentation = augmentation
        self.datagen = ""
        self.data_generator()
        
    def __len__(self):
        #return int(len(self.img_pathes) / self.batch_size)+1
        return int(len(self.img_pathes) / self.batch_size)
    def __getitem__(self, idx):
        b = self.batch_size
        X_batch = self.img_pathes[idx*b : (1+idx)*b]
        X_batch = self.load_imgs(X_batch)
        y_batch = self.y[idx*b : (1+idx)*b]
        return X_batch, y_batch
    
    def on_epoch_end(self):
        """エポック終了時に実行。XとYをシャッフルする"""
        ids = np.random.choice(np.arange(len(self.img_pathes)),size=(len(self.img_pathes)), replace=False)
        self.img_pathes = list([self.img_pathes[i] for i in ids])
        self.y = np.array(list([self.y[i] for i in ids]))
    
    def load_imgs(self, pathes):
        imgs = []
        for path in pathes:
            img = np.array(load_img(path, target_size=self.size))
            img = self.preprocessing(img)
            imgs.append(img)
        return np.array(imgs).astype(np.float32)
    
    def preprocessing(self, img):
        if self.augmentation:
            img = np.expand_dims(img,0)
            img = next(self.datagen.flow(img))
            return img[0]
        else:
            return img
    
    def data_generator(self):
        datagen = ImageDataGenerator(
               #rescale=1./255,
               rotation_range=10,#角度をつける(背景に注意)~50ぐらいか？
               width_shift_range=10,#横にスライスこれも50ぐらいか？
               height_shift_range=10,#縦にスライスこれも50ぐらいか？
               shear_range=5,#斜めにびよーん30ぐらいかな
               zoom_range=0,#特殊だけどこれぐらいのほうがいいかも[1,2]
               horizontal_flip=True,#左右
               vertical_flip=False, 
               preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
               )#上下 これは微妙かも
        self.datagen = datagen

### test Generator

In [None]:
train_gen = MY_GENERATOR(train_table["Id"].values, train_table["Pawpularity"].values)

img = load_img(train_table["Id"].values[100], target_size=(224,224))
mobilenet_v2_preprocessed_img = tf.keras.applications.mobilenet_v2.preprocess_input(np.array(img))
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.imshow(img)
plt.axis("off")
plt.title("original image")
plt.subplot(1,2,2)
plt.imshow(mobilenet_v2_preprocessed_img)
plt.axis("off")
plt.title("mobilenet_v2.preprocess_input")
plt.show()

print("-----ImageDataGenerator-----")
imgs = []
for i in range(10):
    tmp = train_gen.preprocessing(img)
    imgs.append(tmp)
    
plt.figure(figsize=(20,6))    
for i in range(10):
    plt.subplot(1,10,i+1)
    plt.imshow(imgs[i])
    plt.axis("off")
plt.show()

# 4. Model(movilenetV2)

# 5.Training_load_model_K-Fold_ver

In [None]:
def MobileNet_model():
    base_model = keras.applications.mobilenet_v2.MobileNetV2(weights="imagenet")
    base_model.trainable = False
    avg_pool_name = [l.name for l in base_model.layers][-2]
    x = base_model.get_layer(avg_pool_name).output
    x = Dropout(0.2)(x)
    x = Dense(128, activation="relu")(x)
    x = Dense(1)(x)
    model = Model(inputs=base_model.input, outputs=x)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-5), loss="mean_squared_error", metrics=[keras.metrics.RootMeanSquaredError()])
    return model

def MobileNet_model_no_internet_ver():
    base_model = keras.models.load_model("../input/mobilenetv2-imagenet/MobileNetV2_imagenet.h5")
    base_model.trainable = False
    avg_pool_name = [l.name for l in base_model.layers][-2]
    x = base_model.get_layer(avg_pool_name).output
    x = Dropout(0.2)(x)
    x = Dense(128, activation="relu")(x)
    x = Dense(1)(x)
    model = Model(inputs=base_model.input, outputs=x)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-5), loss="mean_squared_error", metrics=[keras.metrics.RootMeanSquaredError()])
    return model

#model = MobileNet_model()
#model = MobileNet_model_no_internet_ver()
#model.summary()

In [None]:
def Lunch_Kflod_training(imgs_path, y, model):
    kfold = KFold(n_splits=5, shuffle=True, random_state=2021)
    save_model_names = []
    
    for i, (train_id, vali_id) in enumerate(kfold.split(imgs_path, y)):
        #make kfold datasets
        train_path, y_train = imgs_path[train_id], y[train_id]
        vali_path, y_vali = imgs_path[vali_id], y[vali_id]
        #set model
        model = MobileNet_model_no_internet_ver()
        early_stopping = EarlyStopping(patience=2, monitor="val_root_mean_squared_error", restore_best_weights=True)
        model_checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=f'MV2_checkpoint_fold{i+1}.h5', 
                                           monitor='val_root_mean_squared_error', 
                                           mode = 'min',  save_best_only = True)
        #set generator
        train_gen = MY_GENERATOR(train_path, y_train, batch_size=16, augmentation=True)
        vali_gen = MY_GENERATOR(vali_path, y_vali, batch_size=16, augmentation=True)
        
        #training
        hist = model.fit(train_gen, epochs=20, 
                         validation_data=vali_gen, 
                         callbacks=[early_stopping, model_checkpoint])
        
        #evaluate
        train_eval = model.evaluate(MY_GENERATOR(train_path, y_train, batch_size=16, augmentation=False))
        vali_eval = model.evaluate(MY_GENERATOR(vali_path, y_vali, batch_size=16, augmentation=False))
        
        #plot_history
        plt.plot(hist.history["root_mean_squared_error"], label="root_mean_squared_error")
        plt.plot(hist.history["val_root_mean_squared_error"], label="val_root_mean_squared_error", linestyle="--")
        plt.title(f"Kfold_{i+1}\ntrain_RMSE:{train_eval[1]:.3f}\nvali_RMSE:{vali_eval[1]:.3f}")
        plt.legend()
        plt.show()
        
        save_model_names.append(f'MV2_checkpoint_fold{i+1}.h5')
        
    return save_model_names

In [None]:
#save_ft_model_names = Lunch_Kflod_training(train_table["Id"].values, train_table["Pawpularity"].values, model)

In [None]:
def catboost_model():
    cb_params = {'loss_function' : 'RMSE',
                 'eval_metric' : 'RMSE',
                 'iterations' : 777,
                 'grow_policy' : 'SymmetricTree',
                 'depth' : 6,#6
                 'l2_leaf_reg' : 2.0,
                 'random_strength' : 1.0,
                 'learning_rate' : 0.05,
                 'task_type' : 'CPU',
                 'devices' : '0',
                 'verbose' : 0,
                 'random_state': 2021}
    cb_model = CatBoostRegressor(**cb_params)
    return cb_model

def img_yield(pathes, batch_size=8, size=(224,224)):
    for i in range(len(pathes) // batch_size):
        imgs = []
        batch_pathes = pathes[i*batch_size : (1+i)*batch_size]
        for path in batch_pathes:
            img = load_img(path, target_size=size)
            img = tf.keras.applications.mobilenet_v2.preprocess_input(np.array(img))
            imgs.append(img)
        yield np.array(imgs)
        
def RMSE(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

def Lunch_kflod_catboost(imgs_path, table, y, model_names):
    kfold = KFold(n_splits=5, shuffle=True, random_state=2021)
    catmodel_names = []
    for i, (train_id, vali_id) in enumerate(kfold.split(imgs_path, y)):
        print(f"-----fold:{i+1}-----")
        #make kfold datasets
        train_path, train_table, y_train = imgs_path[train_id], table[train_id,:], y[train_id]
        vali_path, vali_table, y_vali = imgs_path[vali_id], table[vali_id,:], y[vali_id]
        #get_feature
        model = keras.models.load_model(model_names[i])
        ft_model = Model(model.input, model.get_layer([l.name for l in model.layers][-2]).output)
        train_feature_pred = ft_model.predict(img_yield(train_path))
        vali_feature_pred = ft_model.predict(img_yield(vali_path))
        #concat feature and table data
        X_train = np.hstack([train_feature_pred, train_table])
        X_vali = np.hstack([vali_feature_pred, vali_table])
        #catboost training
        cb_model = catboost_model()
        cb_model.fit(X_train, y_train)
        #RMSE score
        print("---train_RMSE_score---")
        print(RMSE(y_train, cb_model.predict(X_train)))
        print("---validation_RMSE_score---")
        print(RMSE(y_vali, cb_model.predict(X_vali)))
        
        #catboost model save
        cb_model.save_model(f"cb_model_fold{i+1}.cat")
        catmodel_names.append(f"cb_model_fold{i+1}.cat")
    return catmodel_names

In [None]:
#save_catmodel_names = Lunch_kflod_catboost(train_table["Id"].values, 
#                                           train_table.drop(["Id","Pawpularity"],axis=1).values, 
#                                           train_table["Pawpularity"].values, 
#                                           save_ft_model_names)

In [None]:
def test_input_image(pathes):
    imgs = np.array([np.array(load_img(x, target_size=(224,224))) for x in tqdm(pathes)])
    imgs = np.array([keras.applications.mobilenet_v2.preprocess_input(x) for x in imgs])
    return imgs

def test_kfold(save_ft_model_names, save_catmodel_names):
    kfold_predict = []
    imgs = test_input_image(test_table["Id"])
    for model_name, cat_model_name in zip(save_ft_model_names, save_catmodel_names):
        model = keras.models.load_model(model_name)
        ft_model = Model(model.input, model.get_layer([l.name for l in model.layers][-2]).output)
        cb_model = CatBoostRegressor()
        cb_model.load_model(cat_model_name)
        test_feature_pred = ft_model.predict(imgs)
        X_test = np.hstack([test_feature_pred, test_table.drop("Id",axis=1).values])
        pred = cb_model.predict(X_test)
        kfold_predict.append(pred)
    return kfold_predict

In [None]:
#kfold_predict = test_kfold(save_ft_model_names, save_catmodel_names)
#kfold_predict

# All training

In [None]:
model = MobileNet_model_no_internet_ver()
early_stopping = EarlyStopping(patience=1, monitor="root_mean_squared_error", restore_best_weights=True)
#set generator
train_gen = MY_GENERATOR(train_table["Id"].values, train_table["Pawpularity"].values, batch_size=32, augmentation=True)
hist = model.fit(train_gen, epochs=30, callbacks=[early_stopping])

#plot_history
plt.plot(hist.history["root_mean_squared_error"], label="root_mean_squared_error")
plt.legend()
plt.show()

In [None]:
### catboost alldata training
ft_model = Model(model.input, model.get_layer([l.name for l in model.layers][-2]).output)
feature_pred = ft_model.predict(img_yield(train_table["Id"].values))
X_train = np.hstack([feature_pred, train_table.drop(["Id","Pawpularity"],axis=1).values])
y_train = train_table["Pawpularity"].values
cb_model = catboost_model()
cb_model.fit(X_train, y_train)

In [None]:
### test data predict!
def test_input_image(pathes):
    imgs = np.array([np.array(load_img(x, target_size=(224,224))) for x in tqdm(pathes)])
    imgs = np.array([keras.applications.mobilenet_v2.preprocess_input(x) for x in imgs])
    return imgs

imgs = test_input_image(test_table["Id"])
test_feature_pred = ft_model.predict(imgs)
X_test = np.hstack([test_feature_pred, test_table.drop("Id",axis=1).values])
pred = cb_model.predict(X_test)
pred

In [None]:
test_table = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/test.csv")
sub = pd.DataFrame()
sub['Id'] = test_table['Id']
sub['Pawpularity'] = pred

In [None]:
sub.to_csv("submission.csv", index = False)
sub

# Thank you read this notebook to the end!