In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib
import matplotlib.pyplot as plt
import sklearn
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import f1_score, roc_curve, auc, mean_squared_error
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV

%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (8, 8)
sns.set_style('whitegrid')

import PIL
import PIL.Image
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB2
from tensorflow.keras.models import Sequential
from sklearn.model_selection import StratifiedKFold, KFold

In [None]:
### Источник:
# https://colab.research.google.com/github/keras-team/keras-io/blob/master/examples/vision/ipynb/image_classification_efficientnet_fine_tuning.ipynb

In [None]:
image_size = 224
batch_size = 128

import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
    print("Device:", tpu.master())
    strategy = tf.distribute.TPUStrategy(tpu)
except ValueError:
    print("Not connected to a TPU runtime. Using CPU/GPU strategy")
    strategy = tf.distribute.MirroredStrategy()


# train = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
test = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")
# sample_submission = pd.read_csv("../input/petfinder-pawpularity-score/sample_submission.csv")
# train["file_path"] = train["Id"].apply(lambda identifier: "../input/petfinder-pawpularity-score/train/" + identifier + ".jpg")
test["file_path"] = test["Id"].apply(lambda identifier: "../input/petfinder-pawpularity-score/test/" + identifier + ".jpg")
test.head()

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

img_augmentation = Sequential(
    [
        layers.RandomRotation(factor=0.15),
        layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
        layers.RandomFlip(),
        layers.RandomContrast(factor=0.1),
    ],
    name="img_augmentation",
)

from tensorflow.keras.applications import EfficientNetB0

In [None]:
tabular_columns = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']

def rmse(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    return tf.sqrt(tf.reduce_mean((y_true -  y_pred) ** 2))

def get_tabular_prediciton_model(inputs):
    width = 32
    depth = 3
    activation = "relu"
    kernel_regularizer = keras.regularizers.l2()
    x = keras.layers.Dense(
            width, 
            activation=activation,
            kernel_regularizer=kernel_regularizer
        )(inputs)
    for i in range(depth):
        if i == 0:
            x = inputs
        x = keras.layers.Dense(
            width, 
            activation=activation,
            kernel_regularizer=kernel_regularizer
        )(x)
        if (i + 1) % 3 == 0:
            x = keras.layers.Concatenate()([x, inputs])
    return x


def get_model():
    image_inputs = layers.Input(shape=(image_size, image_size, 3))
    image_x = img_augmentation(image_inputs)
    model = EfficientNetB0(include_top=False, 
                           input_tensor=image_x, 
                           weights="../input/b0-weights/efficientnetb0_notop.h5", 
                           input_shape = (image_size, image_size, 3))
    # Freeze the pretrained weights
    model.trainable = False
    # Rebuild top
    image_x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
    top_dropout_rate = 0.2
    image_x = layers.Dropout(top_dropout_rate, name="top_dropout")(image_x)
    
    tabular_inputs = tf.keras.Input(len(tabular_columns))
    tabular_x = get_tabular_prediciton_model(tabular_inputs)
    
    x = tf.keras.layers.Concatenate(axis=1)([image_x, tabular_x])
    outputs = layers.Dense(1)(x)

    optimizer = tf.keras.optimizers.Adam(1e-3)
    model = tf.keras.Model(inputs=[image_inputs, tabular_inputs], outputs=[outputs], name="EfficientNet")
    model.compile(
        optimizer=optimizer, loss=rmse, metrics=["mae", "mape"]
    )
    return model

    
def preprocess_test_data(image_url, tabular):
    print(image_url)
    image_string = tf.io.read_file(image_url)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.image.central_crop(image, 1.0)
    image = tf.image.resize(image, (image_size, image_size))
    # 0 won't be used in prediction, but it's needed in this senario or the tabular variable is treated as label.
    return (image, tabular), 0


model_up = get_model()
model_up.load_weights("../input/b0-weights/pet_trained_weights_v6.h5")
# model_up = tf.keras.models.load_model('../input/b0-weights/pet_model_v3.h5', custom_objects = {"rmse": rmse})

ds_try = tf.data.Dataset.from_tensor_slices((test["file_path"], test[tabular_columns])).map(preprocess_test_data).batch(batch_size).cache().prefetch(2)
res_0 = model_up.predict(ds_try).reshape(-1)
res_0

In [None]:
train_prev = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
test_prev = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")
train_x = train_prev.drop(['Id'],axis=1)
test_x = test_prev.drop(['Id'],axis=1)
model_prev = RandomForestRegressor(criterion='mse', max_depth = 4, max_features = 6, n_estimators = 1000)
model_prev.fit(train_x.drop(['Pawpularity'],axis=1), train_x.loc[:,['Pawpularity']].values.ravel())
res_1 = model_prev.predict(test_x)

w = 0.96
predictions = w*res_0 + (1.0-w)*res_1
# predictions = res_0

test['Pawpularity'] = predictions
test[['Id', 'Pawpularity']].to_csv('submission.csv', index=False)
display(test[['Id', 'Pawpularity']].head())