In [None]:
from keras.preprocessing import image
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import math
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import ParameterGrid
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error

from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input as prep_vgg
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input as prep_inc
from tensorflow.keras.applications.nasnet import preprocess_input as prep_nas
from tensorflow.keras.models import Model
#from tensorflow.keras.applications import VGG16
#from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.applications import NASNetLarge

RANDOM_STATE = 42

import os
os.environ["KMP_SETTINGS"] = "false"

In [None]:
ds_dir = "/kaggle/input/petfinder-pawpularity-score"

input_df = pd.read_csv(f"{ds_dir}/train.csv")
y = input_df["Pawpularity"]
display(input_df.head())
len(input_df.index)

In [None]:
def extract_features(df:pd.DataFrame, model, target_size, preprocess_input, dataset):
    ext_feature_list = None #np.zeros((len(df.index),model.output_shape))
    for index, row in df.iterrows():
        id = row["Id"]
        fname=f"{id}.jpg"
        img_path=f"{ds_dir}/{dataset}/{fname}"
        loaded = image.load_img(img_path, target_size=target_size)
        img_data = image.img_to_array(loaded)

        extracted = np.expand_dims(img_data, axis=0)
        preprocessed = preprocess_input(extracted)
        ext_feature = model.predict(preprocessed)
        
        ext_feature_np = np.array(ext_feature)
        if ext_feature_list is None:
          ext_feature_list = np.zeros((len(df.index),len(ext_feature_np.flatten())))
        ext_feature_list[index,:] = ext_feature_np.flatten()
        if index % 1000 == 0:
          print("image #",index," done ", sep="")
    X = pd.DataFrame(ext_feature_list, index=df.index)
    return X

In [None]:
def train_configuration(X, y, reg_model):
  pca = PCA(n_components=50, random_state=RANDOM_STATE)
  pca.fit(X)
  X_reduced = pca.transform(X)
  reg_model.fit(X_reduced,y)
  #y_pred = reg_model.predict(X_test_reduced)

  #score = np.sqrt(mean_squared_error(y_test,y_pred))
  #print("regression RMSE", score)
  return reg_model, pca


In [None]:
model_nas = NASNetLarge(weights='../input/tf-keras-pretrained-model-weights/No Top/NASNet-Large-no-top.h5', include_top=False, pooling="avg")
#model_nas.summary()
#evaluate_configuration(features_nas,y,RandomForestRegressor(random_state=RANDOM_STATE))

In [None]:
features_nas = extract_features(input_df,model_nas,(331, 331,3),prep_nas, "train")
#features_nas.to_hdf("mvi_store.h5", "features_nas")

In [None]:
from sklearn.model_selection import GridSearchCV
import sklearn.metrics as metrics

rf_params = {
    "n_estimators": range(100, 260, 50),
    "max_depth": range(6, 12,2),
    "max_features": range(10, 20,2),
}

rf = RandomForestRegressor(random_state=RANDOM_STATE)
rf_search = GridSearchCV(rf, rf_params,verbose=3,n_jobs=-1, scoring="neg_root_mean_squared_error")

reg_model, pca = train_configuration(features_nas, y, rf_search)

print("best crossval score:" ,max(rf_search.cv_results_['mean_test_score']))

In [None]:
test_df = pd.read_csv(f"{ds_dir}/test.csv")
display(test_df.head())
len(test_df.index)

In [None]:
features_test = extract_features(test_df,model_nas,(331, 331,3),prep_nas,"test")

In [None]:
y_pred = reg_model.predict(pca.transform(features_test))
res_df = pd.DataFrame(y_pred, columns=["Pawpularity"])
res_df.insert(0,"Id",test_df["Id"])
res_df.head()

In [None]:
res_df.to_csv("submission.csv",index=False)