In [None]:
!pip install pycaret
!pip install pandas_profiling
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import mean_squared_error
import pandas_profiling as pp
from pycaret.regression import * #for regression

In [None]:
# Get data
DATA_DIR = '../input/petfinder-pawpularity-score/'
train_data = pd.read_csv(DATA_DIR + 'train.csv')
test_data = pd.read_csv(DATA_DIR + 'test.csv')
pp.ProfileReport(train_data)

In [None]:
regression_setup = setup(data = train_data, 
                         target = 'Pawpularity', 
                         numeric_imputation = 'mean', 
                         categorical_features = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'],
                         ignore_features = ['Id'],
                         train_size = 0.7,
                         normalize = True,
                         transformation = True,
                         transform_target = True,
                         combine_rare_levels = True,
                         rare_level_threshold = 0.05,
                         remove_multicollinearity = True,
                         multicollinearity_threshold = 0.95,
                         handle_unknown_categorical=True,
                         unknown_categorical_method='most_frequent',
                         normalize_method = 'minmax',
                         remove_outliers=True,
                         outliers_threshold = 0.05,
                         silent = True,
                         profile = True,
                        )

In [None]:
compare_models(
    sort = 'MSE', ## competition metric
    turbo = True
)

In [None]:
lasso = create_model(
    estimator='ada',
    fold=10
)
lasso = tune_model(lasso)
lasso = ensemble_model(lasso)
evaluate_model(lasso)

In [None]:
lasso_holdout_pred = predict_model(lasso)

In [None]:
pawpularity_prediction =  predict_model(lasso, data=test_data)
pawpularity_prediction.head(100)

#prepare the submission file
# pawpularity_prediction.rename(columns={'Label':'SalePrice'}, inplace=True)
# pawpularity_prediction[['Id','SalePrice']].to_csv('submission_house.csv', index=False)

In [None]:
import os
import pandas as pd
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
from shutil import copyfile


OUTPUT_PATH = '/kaggle/working/'
os.mkdir(os.path.join(OUTPUT_PATH, 'data'))
os.mkdir(os.path.join(OUTPUT_PATH, 'data/images'))
os.mkdir(os.path.join(OUTPUT_PATH, 'data/images/0-9'))
os.mkdir(os.path.join(OUTPUT_PATH, 'data/images/10-19'))
os.mkdir(os.path.join(OUTPUT_PATH, 'data/images/20-29'))
os.mkdir(os.path.join(OUTPUT_PATH, 'data/images/30-39'))
os.mkdir(os.path.join(OUTPUT_PATH, 'data/images/40-49'))
os.mkdir(os.path.join(OUTPUT_PATH, 'data/images/50-59'))
os.mkdir(os.path.join(OUTPUT_PATH, 'data/images/60-69'))
os.mkdir(os.path.join(OUTPUT_PATH, 'data/images/70-79'))
os.mkdir(os.path.join(OUTPUT_PATH, 'data/images/80-89'))
os.mkdir(os.path.join(OUTPUT_PATH, 'data/images/90-100'))

classifier_metadata = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
for dirname, _, filenames in os.walk('../input/petfinder-pawpularity-score/train'):
    for filename in filenames:
        file = [x for x in filename.split('.')]
        row = classifier_metadata[classifier_metadata['Id'] == file[0]]
        row['Pawpularity'].values
        if row['Pawpularity'].values < 10:
            copyfile(os.path.join(dirname,filename), os.path.join('/kaggle/working/data/images/0-9', filename))
        elif 10 < row['Pawpularity'].values < 19:
            copyfile(os.path.join(dirname,filename), os.path.join('/kaggle/working/data/images/10-19', filename))
        elif 20 < row['Pawpularity'].values < 29:
            copyfile(os.path.join(dirname,filename), os.path.join('/kaggle/working/data/images/20-29', filename))
        elif 30 < row['Pawpularity'].values < 39:
            copyfile(os.path.join(dirname,filename), os.path.join('/kaggle/working/data/images/30-39', filename))
        elif 40 < row['Pawpularity'].values < 49:
            copyfile(os.path.join(dirname,filename), os.path.join('/kaggle/working/data/images/40-49', filename))
        elif 50 < row['Pawpularity'].values < 59:
            copyfile(os.path.join(dirname,filename), os.path.join('/kaggle/working/data/images/50-59', filename))
        elif 60 < row['Pawpularity'].values < 69:
            copyfile(os.path.join(dirname,filename), os.path.join('/kaggle/working/data/images/60-69', filename))
        elif 70 < row['Pawpularity'].values < 79:
            copyfile(os.path.join(dirname,filename), os.path.join('/kaggle/working/data/images/70-79', filename))
        elif 80 < row['Pawpularity'].values < 89:
            copyfile(os.path.join(dirname,filename), os.path.join('/kaggle/working/data/images/80-89', filename))
        elif 90 < row['Pawpularity'].values:
            copyfile(os.path.join(dirname,filename), os.path.join('/kaggle/working/data/images/90-100', filename))

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
import os

num_skipped = 0
for folder_name in ("0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89", "90-100"):
    folder_path = os.path.join("/kaggle/working/data/images/", folder_name)
    for fname in os.listdir(folder_path):
        print(fname)
        fpath = os.path.join(folder_path, fname)
        try:
            fobj = open(fpath, "rb")
            is_jfif = tf.compat.as_bytes("JFIF") in fobj.peek(10)
        finally:
            fobj.close()

        if not is_jfif:
            num_skipped += 1
            # Delete corrupted image
            os.remove(fpath)

print("Deleted %d images" % num_skipped)

In [None]:
image_size = (180, 180)
batch_size = 32

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "/kaggle/working/data/images/",
    validation_split=0.2,
    subset="training",
    seed=1337,
    image_size=image_size,
    batch_size=batch_size,
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "/kaggle/working/data/images/",
    validation_split=0.2,
    subset="validation",
    seed=1337,
    image_size=image_size,
    batch_size=batch_size,
)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(int(labels[i]))
        plt.axis("off")

In [None]:
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
    ]
)

In [None]:
plt.figure(figsize=(10, 10))
for images, _ in train_ds.take(1):
    for i in range(9):
        augmented_images = data_augmentation(images)
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(augmented_images[0].numpy().astype("uint8"))
        plt.axis("off")

In [None]:
train_ds = train_ds.prefetch(buffer_size=32)
val_ds = val_ds.prefetch(buffer_size=32)

In [None]:
def make_model(input_shape, num_classes):
    inputs = keras.Input(shape=input_shape)
    # Image augmentation block
    x = data_augmentation(inputs)

    # Entry block
    x = layers.Rescaling(1.0 / 255)(x)
    x = layers.Conv2D(32, 3, strides=2, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("swish")(x)

    x = layers.Conv2D(64, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("swish")(x)

    previous_block_activation = x  # Set aside residual

    for size in [128, 256, 512, 728]:
        x = layers.Activation("swish")(x)
        x = layers.SeparableConv2D(size, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("swish")(x)
        x = layers.SeparableConv2D(size, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # Project residual
        residual = layers.Conv2D(size, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    x = layers.SeparableConv2D(1024, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("swish")(x)

    x = layers.GlobalAveragePooling2D()(x)
    if num_classes == 2:
        activation = "sigmoid"
        units = 1
    else:
        activation = "softmax"
        units = num_classes

    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(units, activation=activation)(x)
    return keras.Model(inputs, outputs)


model = make_model(input_shape=image_size + (3,), num_classes=10)
keras.utils.plot_model(model, show_shapes=True)

In [None]:
epochs = 50

callbacks = [
    keras.callbacks.ModelCheckpoint("save_at_{epoch}.h5"),
]
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-2,
    decay_steps=10000,
    decay_rate=0.9)
opt = keras.optimizers.Adam(learning_rate=lr_schedule, amsgrad=True, clipnorm = 1)
model.compile(
    optimizer=opt,
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)
model.fit(
    train_ds, epochs=epochs, callbacks=callbacks, validation_data=val_ds,
)