good leif


In [None]:
import os
import random

import pandas as pd
import numpy as np

import tensorflow as tf
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models,Sequential
import cv2, numpy as np
import os

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
user_credential = user_secrets.get_gcloud_credential()
user_secrets.set_tensorflow_credential(user_credential)

In [None]:
import sys
sys.path.append('../input/swintransformertf')
from swintransformer import SwinTransformer

In [None]:
# NEW on TPU in TensorFlow 24: shorter cross-compatible TPU/GPU/multi-GPU/cluster-GPU detection code

try: # detect TPUs
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect() # TPU detection
    strategy = tf.distribute.TPUStrategy(tpu)
except ValueError: # detect GPUs
    strategy = tf.distribute.MirroredStrategy() # for GPU or multi-GPU machines
    #strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
    #strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() # for clusters of multi-GPU machines

print("Number of accelerators: ", strategy.num_replicas_in_sync)

In [None]:
# Tabular data file paths
TRAIN_DATA_PATH = '../input/petfinder-pawpularity-score/train.csv'
TEST_DATA_PATH = '../input/petfinder-pawpularity-score/test.csv'

# Image data directories
TRAIN_DIRECTORY = '../input/petfinder-pawpularity-score/train'
TEST_DIRECTORY = '../input/petfinder-pawpularity-score/test'
IMG_MODEL = '../input/keras-applications-models/EfficientNetB0.h5'

In [None]:
# Parameters for processing tabular data
TARGET_NAME = 'Pawpularity'
VAL_SIZE = 0.25
SEED = 5

In [None]:
# TensorFlow settings and training parameters
AUTOTUNE = tf.data.experimental.AUTOTUNE
IMG_SIZE = 224
BATCH_SIZE = 8
DROPOUT_RATE = 0.2
LEARNING_RATE = 1e-4
DECAY_STEPS = 100
DECAY_RATE = 0.96
EPOCHS = 20
PATIENCE = 5

## Functions

In [None]:
def set_seed(seed=42):
    """Utility function to use for reproducibility.
    :param seed: Random seed
    :return: None
    """
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'


def set_display():
    """Function sets display options for charts and pd.DataFrames.
    """
    # Plots display settings
    plt.style.use('fivethirtyeight')
    plt.rcParams['figure.figsize'] = 12, 8
    plt.rcParams.update({'font.size': 14})
    # DataFrame display settings
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)
    pd.options.display.float_format = '{:.4f}'.format


def id_to_path(img_id: str, dir: str):
    """Function returns a path to an image file.
    :param img_id: Image Id
    :param dir: Path to the directory with images
    :return: Image file path
    """
    return os.path.join(dir, f'{img_id}.jpg')


@tf.function
def get_image(path: str) -> tf.Tensor:
    """Function loads image from a file and preprocesses it.
    :param path: Path to image file
    :return: Tensor with preprocessed image
    """
    image = tf.image.decode_jpeg(tf.io.read_file(path), channels=3)
    image = tf.cast(tf.image.resize_with_pad(image, IMG_SIZE, IMG_SIZE), dtype=tf.int32)
    return tf.keras.applications.efficientnet.preprocess_input(image)


@tf.function
def process_dataset(path: str, label: int) -> tuple:
    """Function returns preprocessed image and label.
    :param path: Path to image file
    :param label: Class label
    :return: tf.Tensor with preprocessed image, numeric label
    """
    return get_image(path), label


@tf.function
def get_dataset(x, y=None) -> tf.data.Dataset:
    """Function creates batched optimized dataset for the model
    out of an array of file paths and (optionally) class labels.
    :param x: Input data for the model (array of file paths)
    :param y: Target values for the model (array of class indexes)
    :return TensorFlow Dataset object
    """
    if y is not None:
        ds = tf.data.Dataset.from_tensor_slices((x, y))
        return ds.map(process_dataset, num_parallel_calls=AUTOTUNE) \
            .batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
    else:
        ds = tf.data.Dataset.from_tensor_slices(x)
        return ds.map(get_image, num_parallel_calls=AUTOTUNE) \
            .batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)


def plot_history(hist):
    """Function plots a chart with training and validation metrics.
    :param hist: Tensorflow history object from model.fit()
    """
    # Losses and metrics
    loss = hist.history['loss']
    val_loss = hist.history['val_loss']
    rmse = hist.history['categorical_crossentropy']
    val_rmse = hist.history['val_categorical_crossentropy']

    # Epochs to plot along x axis
    x_axis = range(1, len(loss) + 1)

    fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex=True)

    ax1.plot(x_axis, loss, 'bo', label='Training')
    ax1.plot(x_axis, val_loss, 'ro', label='Validation')
    ax1.set_title('MSE Loss')
    ax1.legend()

    ax2.plot(x_axis, rmse, 'bo', label='Training')
    ax2.plot(x_axis, val_rmse, 'ro', label='Validation')
    ax2.set_title('Root Mean Squared Error')
    ax2.set_xlabel('Epochs')
    ax2.legend()

    plt.tight_layout()
    plt.show()

## Data Processing

In [None]:
set_seed(SEED)
set_display()

In [None]:
# Train data set
data_train = pd.read_csv(TRAIN_DATA_PATH)
print(f'Train data shape: {data_train.shape}')
data_train.head()

In [None]:
# Test data set
data_test = pd.read_csv(TEST_DATA_PATH)
print(f'Test data shape: {data_test.shape}')
data_test.head()

In [None]:
# Reconstruct the paths to train and test images.
data_train['path'] = data_train['Id'].apply(
    lambda x: id_to_path(x, TRAIN_DIRECTORY))
data_test['path'] = data_test['Id'].apply(
    lambda x: id_to_path(x, TEST_DIRECTORY))

# Keep a portion of the labeled data for validation.
train_subset, valid_subset = train_test_split(
    data_train[['path', TARGET_NAME]],
    test_size=VAL_SIZE, shuffle=True, random_state=SEED
)

In [None]:
# Create TensorFlow datasets

train_ds = get_dataset(x=train_subset['path'], y=tf.one_hot(train_subset[TARGET_NAME]//10,depth=10,dtype=tf.float32))
valid_ds = get_dataset(x=valid_subset['path'], y=tf.one_hot(valid_subset[TARGET_NAME]//10,depth=10,dtype=tf.float32))
test_ds = get_dataset(x=data_test['path'])

print(train_ds)

In [None]:
with strategy.scope():
    img_adjust_layer = tf.keras.layers.Lambda(lambda data: tf.keras.applications.imagenet_utils.preprocess_input(tf.cast(data, tf.float32), mode="torch"), input_shape=[*[224,224], 3])
    pretrained_model = SwinTransformer('swin_large_224', num_classes=10,include_top=False, pretrained=True, use_tpu=True)
    
    model = tf.keras.Sequential([
        img_adjust_layer,
        pretrained_model,
        tf.keras.layers.Dense(10,activation='softmax')
    ])

model=tf.keras.models.load_model('../input/keras-applications-models/EfficientNetB0.h5')
model.trainable = False

model= tf.keras.models.Sequential(
    [
        tf.keras.layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3)),
        tf.keras.layers.experimental.preprocessing.RandomFlip(mode="horizontal_and_vertical"),
        model,
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(DROPOUT_RATE, name='top_dropout'),
        tf.keras.layers.Dense(32, activation='elu'),
        tf.keras.layers.Dense(1)
    ]
)
model.summary()

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5, epsilon=1e-8),
    loss = 'categorical_crossentropy',
    metrics=['categorical_accuracy']
)
model.summary()

In [None]:
# To gradually decrease learning rate
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=LEARNING_RATE,
    decay_steps=DECAY_STEPS, decay_rate=DECAY_RATE,
    staircase=True)

In [None]:
# Compile the model


In [None]:
model.summary()

In [None]:

#model = tf.keras.models.Sequential(
   # [
 #       tf.keras.layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3)),
   #    tf.keras.layers.experimental.preprocessing.RandomFlip(mode='horizontal'),
    #    model,
    #    tf.keras.layers.BatchNormalization(),
     #   tf.keras.layers.Dropout(DROPOUT_RATE, name='top_dropout'),
    #    tf.keras.layers.Dense(512, activation='relu'),
    #    tf.keras.layers.Dense(10, activation='softmax')
    #]
#)
#model.summary()

In [None]:
# To monitor validation loss and stop the training.
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=LEARNING_RATE,
    decay_steps=DECAY_STEPS, decay_rate=DECAY_RATE,
    staircase=True)
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=PATIENCE, restore_best_weights=True)

from tensorflow.keras import backend as K
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_true*100 - y_pred*100)))

#model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
 #                   loss=rmse,
 #                   metrics=[rmse])

In [None]:
#history=model.fit(train_ds, validation_data=valid_ds,epochs=EPOCHS,verbose=2)
history=model.fit(train_ds, validation_data=valid_ds,epochs=EPOCHS,verbose=2)

In [None]:
plot_history(history)

## Inference

In [None]:
train = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

In [None]:
label = 'Pawpularity'

features = ['Subject Focus','Eyes','Face','Near','Action','Accessory','Group','Collage','Human','Occlusion','Info','Blur']

In [None]:
x = train[features]
y = train[label]
y = y /100
x_test = test[features]

x, x_val, y, y_val = train_test_split(
    x, y, test_size=0.0033, random_state=42)
print(x,y)

In [None]:
from xgboost import XGBRegressor

In [None]:
my_model = XGBRegressor(n_estimators=1500, learning_rate=0.003, n_jobs=12)
my_model.fit(x,y, 
             early_stopping_rounds=5, 
             eval_set=[(x_val,y_val)], 
             verbose=2)
from sklearn.metrics import mean_absolute_error

predictions = my_model.predict(x_val)
print("Mean Absolute Error: " + str(mean_absolute_error(predictions*100, y_val*100)))

In [None]:
pred2 = my_model.predict(x_test)*100
pred2

In [None]:
# Predict popularity score for the test
pred1 = model.predict(test_ds,use_multiprocessing=True, workers=os.cpu_count())*100

TARGET_NAME = pred2*0.61+pred1*0.39






In [None]:
data_test[['Id', TARGET_NAME]].to_csv('submission.csv', index=False)
data_test[['Id', TARGET_NAME]].head()