In [None]:
print("\n... IMPORTS STARTING ...\n")
print("\n\tVERSION INFORMATION")
# Machine Learning and Data Science Imports
import tensorflow as tf; print(f"\t\t– TENSORFLOW VERSION: {tf.__version__}");
import tensorflow_addons as tfa; print(f"\t\t– TENSORFLOW ADDONS VERSION: {tfa.__version__}");
import pandas as pd; pd.options.mode.chained_assignment = None;
import numpy as np; print(f"\t\t– NUMPY VERSION: {np.__version__}");
import sklearn; print(f"\t\t– SKLEARN VERSION: {sklearn.__version__}");
from sklearn.preprocessing import RobustScaler, PolynomialFeatures
from sklearn.model_selection import GroupKFold, KFold;

!pip install -q keras-cv-attention-models
import keras_cv_attention_models

# Built In Imports
from kaggle_datasets import KaggleDatasets
from multiprocessing import cpu_count
from collections import Counter
from datetime import datetime
from glob import glob
import warnings
import requests
import imageio
import IPython
import sklearn
import urllib
import zipfile
import pickle
import random
import shutil
import string
import math
import time
import gzip
import ast
import sys
import io
import os
import gc
import re

# Visualization Imports
from matplotlib.colors import ListedColormap
import matplotlib.patches as patches
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm; tqdm.pandas();
import plotly.express as px
import seaborn as sns
from PIL import Image
import matplotlib; print(f"\t\t– MATPLOTLIB VERSION: {matplotlib.__version__}");
import plotly
import PIL
import cv2


def seed_it_all(seed=7):
    """ Attempt to be Reproducible """
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

    
print("\n\n... IMPORTS COMPLETE ...\n")
    
print("\n... SEEDING FOR DETERMINISTIC BEHAVIOUR ...\n")
seed_it_all()

In [None]:
print(f"\n... ACCELERATOR SETUP STARTING ...\n")

# Detect hardware, return appropriate distribution strategy
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    TPU = tf.distribute.cluster_resolver.TPUClusterResolver()  
except ValueError:
    TPU = None

if TPU:
    print(f"\n... RUNNING ON TPU - {TPU.master()}...")
    tf.config.experimental_connect_to_cluster(TPU)
    tf.tpu.experimental.initialize_tpu_system(TPU)
    strategy = tf.distribute.experimental.TPUStrategy(TPU)
else:
    print(f"\n... RUNNING ON CPU/GPU ...")
    # Yield the default distribution strategy in Tensorflow
    #   --> Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy() 

# What Is a Replica?
#    --> A single Cloud TPU device consists of FOUR chips, each of which has TWO TPU cores. 
#    --> Therefore, for efficient utilization of Cloud TPU, a program should make use of each of the EIGHT (4x2) cores. 
#    --> Each replica is essentially a copy of the training graph that is run on each core and 
#        trains a mini-batch containing 1/8th of the overall batch size
N_REPLICAS = strategy.num_replicas_in_sync
    
print(f"... # OF REPLICAS: {N_REPLICAS} ...\n")

print(f"\n... ACCELERATOR SETUP COMPLTED ...\n")

In [None]:
print("\n... DATA ACCESS SETUP STARTED ...\n")

if TPU:
    # Google Cloud Dataset path to training and validation images
    DATA_DIR = KaggleDatasets().get_gcs_path('petfinder-pawpularity-score')
    CAT_DIR = KaggleDatasets().get_gcs_path('cat-breeds-dataset')
    DOG_DIR = KaggleDatasets().get_gcs_path('stanford-dogs-dataset')
    save_locally = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')
else:
    # Local path to training and validation images
    DATA_DIR = "/kaggle/input/petfinder-pawpularity-score"
    CAT_DIR = "/kaggle/input/cat-breeds-dataset"
    DOG_DIR = "/kaggle/input/stanford-dogs-dataset"
    save_locally = None
    
print(f"\n... DATA DIRECTORY PATH IS:\n\t--> {DATA_DIR}")
print(f"\n... CAT DIRECTORY PATH IS:\n\t--> {CAT_DIR}")
print(f"\n... DOG DIRECTORY PATH IS:\n\t--> {DOG_DIR}")

print(f"\n... IMMEDIATE CONTENTS OF DATA DIRECTORY IS:")
for file in tf.io.gfile.glob(os.path.join(DATA_DIR, "*")): print(f"\t--> {file}")

print(f"\n... IMMEDIATE CONTENTS OF CAT DIRECTORY IS:")
for file in tf.io.gfile.glob(os.path.join(CAT_DIR, "*")): print(f"\t--> {file}")

print(f"\n... IMMEDIATE CONTENTS OF DOG DIRECTORY IS:")
for file in tf.io.gfile.glob(os.path.join(DOG_DIR, "*")): print(f"\t--> {file}")

print("\n\n... DATA ACCESS SETUP COMPLETED ...\n")

In [None]:
print(f"\n... XLA OPTIMIZATIONS STARTING ...\n")

print(f"\n... CONFIGURE JIT (JUST IN TIME) COMPILATION ...\n")
# enable XLA optmizations (10% speedup when using @tf.function calls)
tf.config.optimizer.set_jit(True)

print(f"\n... XLA OPTIMIZATIONS COMPLETED ...\n")

In [None]:
print("\n... BASIC DATA SETUP STARTING ...\n\n")

print("\n... TRAIN DATAFRAME ..\n")
TRAIN_CSV = os.path.join(DATA_DIR, "train.csv")
train_df = pd.read_csv(TRAIN_CSV)
train_df["img_path"] = train_df.Id.apply(lambda x: os.path.join(DATA_DIR, "train", x+".jpg"))
display(train_df)

print("\n... TEST DATAFRAME ..\n")
TEST_CSV = os.path.join(DATA_DIR, "test.csv")
test_df = pd.read_csv(TEST_CSV)
test_df["img_path"] = test_df.Id.apply(lambda x: os.path.join(DATA_DIR, "test", x+".jpg"))

display(test_df)

print("\n... SAMPLE SUBMISSION DATAFRAME ..\n")
SS_CSV = os.path.join(DATA_DIR, "sample_submission.csv")
ss_df = pd.read_csv(SS_CSV)
display(ss_df)

# Set Other Variables
print("\n... SETTING OTHER VARIABLES ..\n")

INPUT_SHAPE = (224, 224, 3)
N_CLASSES = train_df.Pawpularity.nunique()
REPLICA_BATCH_SIZE = 32
OVERALL_BATCH_SIZE = REPLICA_BATCH_SIZE * N_REPLICAS
AUTO = tf.data.experimental.AUTOTUNE

print("\n\n... BASIC DATA SETUP FINISHING ...\n")

In [None]:
effnetb0_imagenet = tf.keras.applications.EfficientNetB0()
effnetb0_imagenet.summary()

In [None]:
def flatten_l_o_l(nested_list):
    """ Flatten a list of lists """
    return [item for sublist in nested_list for item in sublist]

def tf_load_image(image, resize_to=INPUT_SHAPE):
    image = tf.image.decode_jpeg(tf.io.read_file(image), channels=INPUT_SHAPE[-1])
    image = tf.image.resize(image, size=resize_to[:-1])
    return image

def rotate_and_crop(images):
    """Rotate the given image with the given rotation degree and crop for the black edges if necessary
    Args:
        image: A `Tensor` representing an image(s) of arbitrary size.
    
    Returns:
        A rotated image.
    """
    
    
    def _largest_rotated_rect(w, h, angle):
        """
        
        Given a rectangle of size wxh that has been rotated by 'angle' (in
        radians), computes the width and height of the largest possible
        axis-aligned rectangle within the rotated rectangle.
        Original JS code by 'Andri' and Magnus Hoff from Stack Overflow
        Converted to Python by Aaron Snoswell
        
        Source: http://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders
        
        """
        
        quadrant = tf.cast(tf.math.floor(angle / (math.pi/2)), dtype=tf.uint8)
        quadrant = tf.bitwise.bitwise_and(quadrant, tf.constant(3, dtype=quadrant.dtype))
        sign_alpha = tf.cond(tf.bitwise.bitwise_and(quadrant, tf.constant(1, dtype=quadrant.dtype))==tf.constant(0, dtype=quadrant.dtype), lambda: angle, lambda: math.pi-angle)
        alpha = (sign_alpha % math.pi + math.pi) % math.pi

        bb_w = w * tf.math.cos(alpha) + h * tf.math.sin(alpha)
        bb_h = w * tf.math.sin(alpha) + h * tf.math.cos(alpha)

        gamma = tf.cond(w<h, lambda: tf.math.atan2(bb_w, bb_w), lambda: tf.math.atan2(bb_w, bb_w))

        delta = math.pi - alpha - gamma

        length = tf.cond(w<h, lambda: h, lambda: w)

        d = length * tf.math.cos(alpha)
        a = d * tf.math.sin(alpha) / tf.math.sin(delta)

        y = a * tf.math.cos(gamma)
        x = y * tf.math.tan(gamma)

        return (bb_w - 2 * x, bb_h - 2 * y)
  
    # Get desired output dimensions
    output_height, output_width = tf.constant(INPUT_SHAPE[0], dtype=tf.float32), tf.constant(INPUT_SHAPE[1], dtype=tf.float32)

    rotation_degree = (math.pi/180)*tf.random.normal(shape=(), stddev=10)
    images = tfa.image.rotate(images, rotation_degree, interpolation='BILINEAR')

    # Center crop to ommit black noise on the edges
    lrr_width, lrr_height = _largest_rotated_rect(output_height, output_width, rotation_degree)
    lrr_offset_w, lrr_offset_h = tf.cast(tf.math.round((output_width-lrr_width)/2), dtype=tf.int32), tf.cast(tf.math.round((output_height-lrr_height)/2), dtype=tf.int32)
    lrr_width, lrr_height = tf.cast(tf.math.round(lrr_width), dtype=tf.int32), tf.cast(tf.math.round(lrr_height), dtype=tf.int32)

    images = tf.image.crop_to_bounding_box(images, lrr_offset_h, lrr_offset_w, target_height=lrr_height, target_width=lrr_width)
    images = tf.image.resize(images, (output_height, output_width))
    
    return images


def simple_augmentation(images, labels):
    # Random Horizontal Flip
    images = tf.image.random_flip_left_right(images)
    
    # images = rotate_and_crop(images)
    
    images = tfa.image.random_cutout(images, mask_size=(2*(INPUT_SHAPE[0]//30),2*(INPUT_SHAPE[0]//30)))
    
    # Random Saturation
    images = tf.image.random_saturation(images, 0.975, 1.025)

    # Random Hue
    images = tf.image.random_hue(images, 0.0125)
    
    # Random Brightness
    images = tf.image.random_brightness(images, 0.125)
    
    return images, labels

In [None]:
train_img_ds = tf.data.Dataset.from_tensor_slices(train_df.img_path.values)
train_ds = train_img_ds.map(lambda x: (tf_load_image(x),x), num_parallel_calls=tf.data.AUTOTUNE).cache().batch(OVERALL_BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

test_img_ds = tf.data.Dataset.from_tensor_slices(test_df.img_path.values)
test_ds = test_img_ds.map(lambda x: (tf_load_image(x),x), num_parallel_calls=tf.data.AUTOTUNE).batch(OVERALL_BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
# all_train_preds = effnetb0_imagenet.predict(train_ds)
# all_train_preds = tf.argmax(all_train_preds, axis=-1)


In [None]:
all_train_preds = []
all_train_paths = []
for img, path, in tqdm(train_ds):
    all_train_preds.append(tf.cast(tf.argmax(effnetb0_imagenet.predict(img), axis=-1), tf.int16))
    all_train_paths.append(path)
    
all_test_preds = []
all_test_paths = []
for img, path, in tqdm(train_ds):
    all_test_preds.append(tf.cast(tf.argmax(effnetb0_imagenet.predict(img), axis=-1), tf.int16))
    all_test_paths.append(path)

In [None]:
train_df.corr()["Pawpularity"]

## **As we can see... imagenet classes are more correlated than any other given feature**