In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.express as px
from tqdm import tqdm
import seaborn as sns 
import random
import cv2
import math
from sklearn.model_selection import StratifiedKFold
from lightgbm import LGBMRegressor

import tensorflow as tf
from tensorflow import keras 
from tensorflow.keras.utils import Sequence
from tensorflow.keras import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import Input, Sequential, layers
import tensorflow.keras.backend as K

import sys
sys.path.append('../input/swintransformertf')
from swintransformer import SwinTransformer

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print(f'Running on TPU {tpu.master()}')
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

AUTO = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')

In [None]:
# Tabular data 
train = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
sample_submission = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')

train_features = train.iloc[:,1:]

# Photo data 
train_image = '../input/petfinder-pawpularity-score/train/'
test_image = '../input/petfinder-pawpularity-score/test/'

train['file_path'] = train['Id'].apply(lambda x: '{}'.format(train_image)+f'{x}.jpg')
test['file_path'] = test['Id'].apply(lambda x: '{}'.format(test_image)+f'{x}.jpg')

features  = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 
            'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 
            'Info', 'Blur']

# for working with tabular information
train_features = train[features]
Y_features = train['Pawpularity'].astype(int)
test_features = test[features]

In [None]:
input_shape = (224,224, 3)

In [None]:
OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.02)
BATCH_SIZE = 32
EPOCHS = 3
FOLD = 5
SEED = 42
IMAGE_SIZE = [224,224]  # 512,512  224,224

patch_size = (2, 2)  # 2-by-2 sized patches
dropout_rate = 0.03  # Dropout rate
num_heads = 8  # Attention heads
embed_dim = 64  # Embedding dimension

num_mlp = 256  # MLP layer size

qkv_bias = True  # Convert embedded patches to query, key, and values with a learnable additive value
window_size = 2  # Size of attention window
shift_size = 2  # Size of shifting window
image_dimension = 256  # # Исходный размер изображения / Входной размер модели трансформатора. 

num_patch_x = IMAGE_SIZE[0] // patch_size[0]
num_patch_y = IMAGE_SIZE[1] // patch_size[1]

In [None]:
print(num_patch_x)
print(num_patch_y)

In [None]:
train['income_pawpularity'] = np.ceil(train['Pawpularity'] / 33)
train['income_pawpularity'].where(train["income_pawpularity"] < 4 , 4.0 ,inplace=True)

fig = px.parallel_categories(train, train[['Subject Focus', 'Eyes', 
                                            'Face', 'Near', 'Action', 
                                            'Accessory', 'Group',
                                            'Collage', 'Human', 
                                            'Occlusion', 'Info', 
                                            'Blur', 'income_pawpularity']].columns, 
                             color='income_pawpularity')
fig.show()

In [None]:
sns.displot(train, x="Pawpularity", hue="income_pawpularity", element="step")

In [None]:
def show_img(full_path, pawpularity_file):
    plt.figure(figsize=(25, 30))
   
    for fpath in range(len(full_path)):
        image = cv2.imread(full_path[fpath])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        plt.subplot(3, 3, fpath+1)
        plt.title('pawpularity: ' + str(pawpularity_file[fpath]))
        plt.imshow(image)
        plt.axis("off")
        
for i_income in range(1, 5):
    index = train[train['income_pawpularity']==i_income].index
    id_file = train.iloc[index]['Id'] 
    full_path = id_file.apply(lambda x: '{}'.format(train_image)+f'{x}.jpg') 
    random_ind = random.sample(list(full_path.index), 9) 
    pawpularity_file = train.iloc[random_ind]['Pawpularity'] 
    full_path = full_path.loc[random_ind]
    
    show_img(list(full_path), list(pawpularity_file))

In [None]:
dups = {('b148cbea87c3dcc65a05b15f78910715','e09a818b7534422fb4c688f12566e38f'): 0.9921875,
        ('43ab682adde9c14adb7c05435e5f2e0e', '9a0238499efb15551f06ad583a6fa951'): 1.0,
        ('dbf25ce0b2a5d3cb43af95b2bd855718', 'e359704524fa26d6a3dcd8bfeeaedd2e'): 1.0,
        ('5a642ecc14e9c57a05b8e010414011f2', 'c504568822c53675a4f425c8e5800a36'): 0.9921875,
        ('08440f8c2c040cf2941687de6dc5462f', 'bf8501acaeeedc2a421bac3d9af58bb7'): 1.0,
        ('01430d6ae02e79774b651175edd40842', '6dc1ae625a3bfb50571efedc0afc297c'): 1.0,
        ('1feb99c2a4cac3f3c4f8a4510421d6f5', '264845a4236bc9b95123dde3fb809a88'): 0.9921875,
        ('43bd09ca68b3bcdc2b0c549fd309d1ba', '6ae42b731c00756ddd291fa615c822a1'): 0.9921875,
        ('13d215b4c71c3dc603cd13fc3ec80181', '373c763f5218610e9b3f82b12ada8ae5'): 1.0,
        ('3877f2981e502fe1812af38d4f511fd2', '902786862cbae94e890a090e5700298b'): 1.0,
        ('5ef7ba98fc97917aec56ded5d5c2b099', '67e97de8ec7ddcda59a58b027263cdcc'): 1.0,
        ('871bb3cbdf48bd3bfd5a6779e752613e', '988b31dd48a1bc867dbc9e14d21b05f6'): 0.99609375,
        ('68e55574e523cf1cdc17b60ce6cc2f60', '9b3267c1652691240d78b7b3d072baf3'): 0.99609375,
        ('72b33c9c368d86648b756143ab19baeb', '763d66b9cf01069602a968e573feb334'): 0.99609375,
        ('2b737750362ef6b31068c4a4194909ed', '41c85c2c974cc15ca77f5ababb652f84'): 0.98828125,
        ('851c7427071afd2eaf38af0def360987', 'b49ad3aac4296376d7520445a27726de'): 1.0,
        ('9f5a457ce7e22eecd0992f4ea17b6107', 'b967656eb7e648a524ca4ffbbc172c06'): 0.91796875,
        ('5a5c229e1340c0da7798b26edf86d180', 'dd042410dc7f02e648162d7764b50900'): 0.9921875,
        ('a9513f7f0c93e179b87c01be847b3e4c', 'b86589c3e85f784a5278e377b726a4d4'): 0.9921875,
        ('1059231cf2948216fcc2ac6afb4f8db8', 'bca6811ee0a78bdcc41b659624608125'): 0.96875,
        ('87c6a8f85af93b84594a36f8ffd5d6b8', 'd050e78384bd8b20e7291b3efedf6a5b'): 1.0,
        ('5da97b511389a1b62ef7a55b0a19a532', '8ffde3ae7ab3726cff7ca28697687a42'): 1.0,
        ('03d82e64d1b4d99f457259f03ebe604d', 'dbc47155644aeb3edd1bd39dba9b6953'): 0.98828125,
        ('38426ba3cbf5484555f2b5e9504a6b03', '6cb18e0936faa730077732a25c3dfb94'): 1.0,
        ('54563ff51aa70ea8c6a9325c15f55399', 'b956edfd0677dd6d95de6cb29a85db9c'): 0.984375,
        ('0c4d454d8f09c90c655bd0e2af6eb2e5', 'fe47539e989df047507eaa60a16bc3fd'): 1.0,
        ('78a02b3cb6ed38b2772215c0c0a7f78e', 'c25384f6d93ca6b802925da84dfa453e'): 0.99609375}

In [None]:
index_id1, index_id2 = [], []
for (id1, id2), sim in dups.items():
    index_id1.append(int(train[train['Id'] == id1]['Pawpularity'].index[0]))
    index_id2.append(int(train[train['Id'] == id2]['Pawpularity'].index[0]))
    pawp1 = train[train['Id'] == id1]['Pawpularity'].iloc[-1]
    pawp2 = train[train['Id'] == id2]['Pawpularity'].iloc[-1]
    mean_paw = int(np.mean([pawp1, pawp2]))
    train.loc[(train['Id'] == id1), 'Pawpularity'] = mean_paw
    
train = train.drop(index_id2)
train = train.reset_index()
train.shape

In [None]:
skfolds = StratifiedKFold(n_splits=FOLD, 
                          random_state=SEED, 
                          shuffle = True)
    
for num_fold, (train_index, val_index) in enumerate(skfolds.split(train, train.Pawpularity)):
    train.loc[val_index, 'fold'] = int(num_fold)

In [None]:
class Dataset(Sequence):

    def __init__(self, x_set, y_set=None, batch_size=32, image_transform=None,
                       flip=None, rotate=None, RGB=None, comix=None):
                 
        self.x = x_set
        self.y = y_set
        self.batch_size = batch_size
        self.image_transform = image_transform
        self.flip = flip
        self.rotate = rotate
        self.RGB = RGB
        self.comix = comix
        
    def __len__(self):
        return math.ceil(len(self.x) / self.batch_size)
    
    def image_augmentation(self, image=None, flip=False, rotate=False, RGB=False):
        
        if flip:
            # rotate the image
            p_flip = tf.random.uniform([], 0, 1.0, dtype=tf.float32)

            if p_flip >= .8:
                image = tf.image.random_flip_left_right(image) 
            elif p_flip <= .8 and p_flip >= .5:   
                image = tf.image.random_flip_up_down(image) 
            elif p_flip <= .5:
                pass
        
        if rotate:
            # Rotate the image (s) counterclockwise 90 degrees
            p_rotate = tf.random.uniform([], 0, 1.0, dtype=tf.float32)

            if p_rotate > .25:
                image = tf.image.rot90(image, k=1)
            elif p_rotate < .20 and p_rotate > .15:
                image = tf.image.rot90(image, k=4) 
            elif p_rotate < .15:
                pass

        if RGB:
            # playing with RGB
            p_pixel = tf.random.uniform([], 0, 1.0, dtype=tf.float32)

            if p_pixel >= .8: 
                image = tf.image.random_saturation(image, lower=.7, upper=1.3)
            elif p_pixel <= .8 and p_pixel >= .5: 
                image = tf.image.random_contrast(image, lower=.8, upper=1.2)
            elif p_pixel <= .5: 
                pass
            
        return image
    
    def cut(self, bs_x, bs_y, prob=1.0):
        imgs = []; labs = []
        for j in range(self.batch_size):
            P = tf.cast( tf.random.uniform([],0,1) <= prob, tf.int32)
            k = tf.cast( tf.random.uniform([],0,24), tf.int32) # здесь изменили на 32
            a = tf.random.uniform([], 0, 1)*tf.cast(P, tf.float32) 
 
            img1 = bs_x[j,]
            img2 = bs_x[k,]
            imgs.append((1-a)*img1 + a*img2) 
            
            lab1 = bs_y[j,]
            lab2 = bs_y[k,]
            #lab1 = tf.cast(lab1, tf.float32)
            #lab2 = tf.cast(lab2, tf.float32)
            #labs.append((1-a)*lab1 + a*lab2)
            labs.append(np.mean([lab1, lab2]))
  
        return imgs, labs
        
    def mix(self, bs_x, bs_y, prob=1.0):
        imgs = []; labs = []
        for j in range(self.batch_size):
            P = tf.cast( tf.random.uniform([],0,1) <= prob, tf.int32)
            k = tf.cast( tf.random.uniform([], 0, self.batch_size), tf.int32)
            # CHOOSE RANDOM LOCATION
            x = tf.cast( tf.random.uniform([],0, IMAGE_SIZE[0]),tf.int32)
            y = tf.cast( tf.random.uniform([],0, IMAGE_SIZE[0]),tf.int32)

            b = tf.random.uniform([],0,1) 

            width = tf.cast(IMAGE_SIZE[0] * tf.math.sqrt(1-b), tf.int32) * P
            ya = tf.math.maximum(0, y-width//2)
            yb = tf.math.minimum(IMAGE_SIZE[0], y+width//2)
            xa = tf.math.maximum(0, x-width//2)
            xb = tf.math.minimum(IMAGE_SIZE[0], x+width//2)

            # MAKE CUTMIX IMAGE
            one    = bs_x[j, ya:yb, 0:xa, :]
            two    = bs_x[k, ya:yb, xa:xb, :]
            three  = bs_x[j, ya:yb, xb:IMAGE_SIZE[0], :]
            middle = tf.concat([one, two, three], axis=1)
            img    = tf.concat([bs_x[j, 0:ya, :, :],
                                middle,
                                bs_x[j, yb:IMAGE_SIZE[0], :, :]], axis=0)
            imgs.append(img)

            # MAKE CUTMIX LABEL
            a = tf.cast(width*width/IMAGE_SIZE[0]/IMAGE_SIZE[0], tf.float32)

            lab1 = bs_y[j,]
            lab2 = bs_y[k,]

            #labs.append((1-a)*lab1 + a*lab2)
            labs.append(np.mean([lab1, lab2]))
        
        return imgs, labs
        
    def cut_or_mix(self, bs_x, bs_y, p=0.5):
        
        p_cut_mix = tf.random.uniform([],0,2, dtype=tf.int32)
        
        if p_cut_mix:
            return tf.cond( 
                tf.less(tf.random.uniform([], minval=0, maxval=1, 
                                          dtype=tf.float32), 
                        tf.cast(p, tf.float32)),
                lambda: self.cut(bs_x, bs_y), # if tf.less returns True
                lambda: self.mix(bs_x, bs_y)  # if tf.less returns False
            )
        else:
            return bs_x, bs_y

    
    def image_refinement(self, path_image):
        image = cv2.imread(path_image)
        image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) 
        image = cv2.resize(image, (IMAGE_SIZE[0], IMAGE_SIZE[1]))  
        image = tf.cast(image, tf.float32)
        # Normalize image
        #image = (255*(image - np.min(image))/np.ptp(image)) 

        if self.image_transform is not None:
            image = self.image_augmentation(image=image, flip=self.flip, rotate=self.rotate, RGB=self.RGB)
        
        return image
    
    def normalize_y(self, batch_y):
        batch_y = batch_y / 100 
        return batch_y
    
    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size] 
        
        if self.y is not None:
            batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size] 
            batch_y = self.normalize_y(batch_y)
                
        list_train = [self.image_refinement(path) for path in batch_x['file_path']] 
        
        if self.comix is not False and batch_x.shape[0] == self.batch_size:
            list_train, batch_y = self.cut_or_mix(np.array(list_train), np.array(batch_y))
        
        if self.y is not None:
            return np.array(list_train), np.array(batch_y)
        else:
            return np.array(list_train)

# Modeling

# Swin-Transformer Stuff

In [None]:
def window_partition(x, window_size):
    _, height, width, channels = x.shape
    patch_num_y = height // window_size
    patch_num_x = width // window_size
    x = tf.reshape(
        x, shape=(-1, patch_num_y, window_size, patch_num_x, window_size, channels)
    )
    x = tf.transpose(x, (0, 1, 3, 2, 4, 5))
    windows = tf.reshape(x, shape=(-1, window_size, window_size, channels))
    return windows


def window_reverse(windows, window_size, height, width, channels):
    patch_num_y = height // window_size
    patch_num_x = width // window_size
    x = tf.reshape(
        windows,
        shape=(-1, patch_num_y, patch_num_x, window_size, window_size, channels),
    )
    x = tf.transpose(x, perm=(0, 1, 3, 2, 4, 5))
    x = tf.reshape(x, shape=(-1, height, width, channels))
    return x


class DropPath(layers.Layer):
    def __init__(self, drop_prob=None, **kwargs):
        super(DropPath, self).__init__(**kwargs)
        self.drop_prob = drop_prob

    def call(self, x):
        input_shape = tf.shape(x)
        batch_size = input_shape[0]
        rank = x.shape.rank
        shape = (batch_size,) + (1,) * (rank - 1)
        random_tensor = (1 - self.drop_prob) + tf.random.uniform(shape, dtype=x.dtype)
        path_mask = tf.floor(random_tensor)
        output = tf.math.divide(x, 1 - self.drop_prob) * path_mask
        return output

# Window based multi-head self-attention

In [None]:
class WindowAttention(layers.Layer):
    def __init__(
        self, dim, window_size, num_heads, qkv_bias=True, dropout_rate=0.0, **kwargs
    ):
        super(WindowAttention, self).__init__(**kwargs)
        self.dim = dim
        self.window_size = window_size
        self.num_heads = num_heads
        self.scale = (dim // num_heads) ** -0.5
        self.qkv = layers.Dense(dim * 3, use_bias=qkv_bias)
        self.dropout = layers.Dropout(dropout_rate)
        self.proj = layers.Dense(dim)

    def build(self, input_shape):
        num_window_elements = (2 * self.window_size[0] - 1) * (
            2 * self.window_size[1] - 1
        )
        self.relative_position_bias_table = self.add_weight(
            shape=(num_window_elements, self.num_heads),
            initializer=tf.initializers.Zeros(),
            trainable=True,
        )
        coords_h = np.arange(self.window_size[0])
        coords_w = np.arange(self.window_size[1])
        coords_matrix = np.meshgrid(coords_h, coords_w, indexing="ij")
        coords = np.stack(coords_matrix)
        coords_flatten = coords.reshape(2, -1)
        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]
        relative_coords = relative_coords.transpose([1, 2, 0])
        relative_coords[:, :, 0] += self.window_size[0] - 1
        relative_coords[:, :, 1] += self.window_size[1] - 1
        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
        relative_position_index = relative_coords.sum(-1)

        self.relative_position_index = tf.Variable(
            initial_value=tf.convert_to_tensor(relative_position_index), trainable=False
        )

    def call(self, x, mask=None):
        _, size, channels = x.shape
        head_dim = channels // self.num_heads
        x_qkv = self.qkv(x)
        x_qkv = tf.reshape(x_qkv, shape=(-1, size, 3, self.num_heads, head_dim))
        x_qkv = tf.transpose(x_qkv, perm=(2, 0, 3, 1, 4))
        q, k, v = x_qkv[0], x_qkv[1], x_qkv[2]
        q = q * self.scale
        k = tf.transpose(k, perm=(0, 1, 3, 2))
        attn = q @ k

        num_window_elements = self.window_size[0] * self.window_size[1]
        relative_position_index_flat = tf.reshape(
            self.relative_position_index, shape=(-1,)
        )
        relative_position_bias = tf.gather(
            self.relative_position_bias_table, relative_position_index_flat
        )
        relative_position_bias = tf.reshape(
            relative_position_bias, shape=(num_window_elements, num_window_elements, -1)
        )
        relative_position_bias = tf.transpose(relative_position_bias, perm=(2, 0, 1))
        attn = attn + tf.expand_dims(relative_position_bias, axis=0)

        if mask is not None:
            nW = mask.get_shape()[0]
            mask_float = tf.cast(
                tf.expand_dims(tf.expand_dims(mask, axis=1), axis=0), tf.float32
            )
            attn = (
                tf.reshape(attn, shape=(-1, nW, self.num_heads, size, size))
                + mask_float
            )
            attn = tf.reshape(attn, shape=(-1, self.num_heads, size, size))
            attn = keras.activations.softmax(attn, axis=-1)
        else:
            attn = keras.activations.softmax(attn, axis=-1)
        attn = self.dropout(attn)

        x_qkv = attn @ v
        x_qkv = tf.transpose(x_qkv, perm=(0, 2, 1, 3))
        x_qkv = tf.reshape(x_qkv, shape=(-1, size, channels))
        x_qkv = self.proj(x_qkv)
        x_qkv = self.dropout(x_qkv)
        return x_qkv


In [None]:
class SwinTransformer(layers.Layer):
    def __init__(
        self,
        dim,
        num_patch,
        num_heads,
        window_size=7,
        shift_size=0,
        num_mlp=1024,
        qkv_bias=True,
        dropout_rate=0.0,
        **kwargs,
    ):
        super(SwinTransformer, self).__init__(**kwargs)

        self.dim = dim  # number of input dimensions
        self.num_patch = num_patch  # number of embedded patches
        self.num_heads = num_heads  # number of attention heads
        self.window_size = window_size  # size of window
        self.shift_size = shift_size  # size of window shift
        self.num_mlp = num_mlp  # number of MLP nodes

        self.norm1 = layers.LayerNormalization(epsilon=1e-5)
        self.attn = WindowAttention(
            dim,
            window_size=(self.window_size, self.window_size),
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            dropout_rate=dropout_rate,
        )
        self.drop_path = DropPath(dropout_rate)
        self.norm2 = layers.LayerNormalization(epsilon=1e-5)

        self.mlp = keras.Sequential(
            [
                layers.Dense(num_mlp),
                layers.Activation(keras.activations.gelu),
                layers.Dropout(dropout_rate),
                layers.Dense(dim),
                layers.Dropout(dropout_rate),
            ]
        )

        if min(self.num_patch) < self.window_size:
            self.shift_size = 0
            self.window_size = min(self.num_patch)

    def build(self, input_shape):
        if self.shift_size == 0:
            self.attn_mask = None
        else:
            height, width = self.num_patch
            h_slices = (
                slice(0, -self.window_size),
                slice(-self.window_size, -self.shift_size),
                slice(-self.shift_size, None),
            )
            w_slices = (
                slice(0, -self.window_size),
                slice(-self.window_size, -self.shift_size),
                slice(-self.shift_size, None),
            )
            mask_array = np.zeros((1, height, width, 1))
            count = 0
            for h in h_slices:
                for w in w_slices:
                    mask_array[:, h, w, :] = count
                    count += 1
            mask_array = tf.convert_to_tensor(mask_array)

            # mask array to windows
            mask_windows = window_partition(mask_array, self.window_size)
            mask_windows = tf.reshape(
                mask_windows, shape=[-1, self.window_size * self.window_size]
            )
            attn_mask = tf.expand_dims(mask_windows, axis=1) - tf.expand_dims(
                mask_windows, axis=2
            )
            attn_mask = tf.where(attn_mask != 0, -100.0, attn_mask)
            attn_mask = tf.where(attn_mask == 0, 0.0, attn_mask)
            self.attn_mask = tf.Variable(initial_value=attn_mask, trainable=False)

    def call(self, x):
        height, width = self.num_patch
        _, num_patches_before, channels = x.shape
        x_skip = x
        x = self.norm1(x)
        x = tf.reshape(x, shape=(-1, height, width, channels))
        if self.shift_size > 0:
            shifted_x = tf.roll(
                x, shift=[-self.shift_size, -self.shift_size], axis=[1, 2]
            )
        else:
            shifted_x = x

        x_windows = window_partition(shifted_x, self.window_size)
        x_windows = tf.reshape(
            x_windows, shape=(-1, self.window_size * self.window_size, channels)
        )
        attn_windows = self.attn(x_windows, mask=self.attn_mask)

        attn_windows = tf.reshape(
            attn_windows, shape=(-1, self.window_size, self.window_size, channels)
        )
        shifted_x = window_reverse(
            attn_windows, self.window_size, height, width, channels
        )
        if self.shift_size > 0:
            x = tf.roll(
                shifted_x, shift=[self.shift_size, self.shift_size], axis=[1, 2]
            )
        else:
            x = shifted_x

        x = tf.reshape(x, shape=(-1, height * width, channels))
        x = self.drop_path(x)
        x = x_skip + x
        x_skip = x
        x = self.norm2(x)
        x = self.mlp(x)
        x = self.drop_path(x)
        x = x_skip + x
        return x


# Model training and evaluation
## Extract and embed patches

In [None]:
# https://keras.io/examples/vision/swin_transformers/ здесь так же объявляется эти три класса прочитай статью

# извлекать патчи из изображения

class PatchExtract(layers.Layer): # принимает на вход слой выхода new_base
    def __init__(self, patch_size, **kwargs):
        super(PatchExtract, self).__init__(**kwargs)
        self.patch_size_x = patch_size[0]
        self.patch_size_y = patch_size[0]

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=(1, self.patch_size_x, self.patch_size_y, 1),
            strides=(1, self.patch_size_x, self.patch_size_y, 1),
            rates=(1, 1, 1, 1),
            padding="VALID",
        )
        patch_dim = patches.shape[-1]
        patch_num = patches.shape[1]
        return tf.reshape(patches, (batch_size, patch_num * patch_num, patch_dim))

# вставлять патчи из изображения

class PatchEmbedding(layers.Layer):
    def __init__(self, num_patch, embed_dim, **kwargs):
        super(PatchEmbedding, self).__init__(**kwargs)
        self.num_patch = num_patch
        self.proj = layers.Dense(embed_dim)
        self.pos_embed = layers.Embedding(input_dim=num_patch, output_dim=embed_dim)

    def call(self, patch):
        pos = tf.range(start=0, limit=self.num_patch, delta=1)
        return self.proj(patch) + self.pos_embed(pos)

# объединять патчи из изображения

class PatchMerging(tf.keras.layers.Layer):
    def __init__(self, num_patch, embed_dim):
        super(PatchMerging, self).__init__()
        self.num_patch = num_patch
        self.embed_dim = embed_dim
        self.linear_trans = layers.Dense(2 * embed_dim, use_bias=False)

    def call(self, x):
        height, width = self.num_patch
        _, _, C = x.get_shape().as_list()
        x = tf.reshape(x, shape=(-1, height, width, C))
        x0 = x[:, 0::2, 0::2, :]
        x1 = x[:, 1::2, 0::2, :]
        x2 = x[:, 0::2, 1::2, :]
        x3 = x[:, 1::2, 1::2, :]
        x = tf.concat((x0, x1, x2, x3), axis=-1)
        x = tf.reshape(x, shape=(-1, (height // 2) * (width // 2), 4 * C))
        return self.linear_trans(x)

In [None]:
input = layers.Input(input_shape)
#x = layers.experimental.preprocessing.RandomCrop(image_dimension, image_dimension)(input)
#x = layers.experimental.preprocessing.RandomFlip("horizontal")(x)
x = layers.experimental.preprocessing.Rescaling(1./255)(input)
x = PatchExtract(patch_size)(x)
x = PatchEmbedding(num_patch_x * num_patch_y, embed_dim)(x)
x = SwinTransformer(
    dim=embed_dim,
    num_patch=(num_patch_x, num_patch_y),
    num_heads=num_heads,
    window_size=window_size,
    shift_size=0,
    num_mlp=num_mlp,
    qkv_bias=qkv_bias,
    dropout_rate=dropout_rate,
)(x)
x = PatchMerging((num_patch_x, num_patch_y), embed_dim=embed_dim)(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dense(16, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(8, activation='elu')(x)
x = layers.BatchNormalization()(x)
output = layers.Dense(1)(x)

In [None]:
model = keras.Model(input, output)

model.compile(
    loss=tf.keras.losses.MeanSquaredError(),
    optimizer=OPTIMIZER,
    metrics=[
        tf.keras.metrics.RootMeanSquaredError(name='rmse'),
    ]
)

In [None]:
def scheduler(epoch, lr):
    if epoch < 4:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

In [None]:
def train_model(model, FOLD, image_transform=None,flip=None, rotate=None, RGB=None, comix=None):
    
    for fold_n in range(FOLD): 
        print('Fold #{}'.format(fold_n+1))
        
        # # SAVE BEST MODEL EACH FOLD  
        sv = tf.keras.callbacks.ModelCheckpoint(
                                            './fold-%i.h5'%fold_n, monitor='val_rmse', 
                                            verbose=0, save_best_only=True,
                                            save_weights_only=False, mode='min', 
                                            save_freq='epoch'
                                            ) 
        
        reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2)
        scheduler_lr = tf.keras.callbacks.LearningRateScheduler(scheduler)
        
        callbacks = [reduce_lr,scheduler_lr]
        
        train_data = train[train.fold != fold_n]        
        val_data = train[train.fold == fold_n]          
        
        train_dataset = Dataset(train_data, train_data.Pawpularity, BATCH_SIZE, 
                                image_transform,flip,rotate,RGB,comix) 
        
        test_dataset = Dataset(val_data, val_data.Pawpularity, BATCH_SIZE, 
                               image_transform=False,flip=False,rotate=False,RGB=False,comix=False)  
        
        model_fit = model.fit(train_dataset, validation_data = test_dataset, 
                              epochs=EPOCHS, callbacks = callbacks)
        
    return model_fit  

In [None]:
history = train_model(model, FOLD, image_transform=True, flip=True, rotate=True, RGB=False, comix=True)  

In [None]:
from array import array             

In [None]:
test

In [None]:
chunks = np.array_split(test, 7)

In [None]:
pred = np.array([])
for chunk in chunks:
    test_dataset = Dataset(chunk, batch_size = BATCH_SIZE, 
                           image_transform=False,flip=False,rotate=False,RGB=False,comix=False)
    pred_cnn = model.predict(test_dataset)
    #print(pred_cnn)
    #print(pred_cnn.shape)
    #print(pred_cnn.reshape(-1))    
    pred = np.concatenate((pred,pred_cnn.reshape(-1)),axis=0)
    #print(pred)

In [None]:
pred.shape

In [None]:
pred

In [None]:
pred * 100

In [None]:
sample_submission.Pawpularity = pred * 100
sample_submission.to_csv("submission.csv", index=False)

In [None]:
sample_submission