In [None]:
import pandas as pd 
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

# Tabular Data

In [None]:
df=pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
df=df.drop(['Id'],axis=1)
response=df['Pawpularity'] #response variable for regression
X=df.drop(['Pawpularity'],axis=1)

* Number of unique Label

In [None]:
labels=np.unique(X,axis=0)

In [None]:
labels

In [None]:
labels.shape

* Class Map

In [None]:
class_map={tuple(x):i for i,x in enumerate(labels)}

* Mapping label

In [None]:
Y=[]
for i in tqdm(range(X.shape[0])):
    l=tuple(X.loc[i])
    Y.append(class_map[l])
Y=np.array(Y)

In [None]:
plt.figure()
sns.histplot(Y)
plt.xlabel('label')
plt.show()

In [None]:
unique, counts = np.unique(Y, return_counts=True)

In [None]:
counts.shape

In [None]:
counts

* There are imbalance data, now I just use class weight for computing losses, maybe there exist better solution

In [None]:
def weight_map(count):
    total=Y.shape[0]
    return (1/count)*(total/unique.shape[0])

In [None]:
class_weights=list(map(weight_map,counts))

In [None]:
plt.figure(figsize=(12,12))
plt.barh(unique,class_weights)

plt.ylabel('class weights')
plt.xlabel('weights')
plt.show()

# Prepare images and label dataset

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds

In [None]:
pth='../input/petfinder-pawpularity-score'


ds=tf.keras.preprocessing.image_dataset_from_directory(
    directory=pth,
    image_size=(128,128),
    batch_size=1,
    seed=0,
    shuffle=False
)


In [None]:
dataset=[]

for x,type_ in tqdm(ds):
    if type_==0:
        continue
    dataset.append(x[0])

In [None]:
dataset=tf.concat([dataset],axis=0)

In [None]:
dataset.shape,Y.shape

# Learning Label Embedding From Images

* Reference : [Label Embedding Network](https://arxiv.org/abs/1710.10393)

In [None]:
from tensorflow.keras import layers,optimizers,losses
from tensorflow.keras.applications.efficientnet import EfficientNetB0,preprocess_input

In [None]:
class Model(tf.keras.Model):
    def __init__(self,img_size,num_class,tau):
        super().__init__()
        self.emb=layers.Embedding(num_class,num_class)
        self.eff=EfficientNetB0(include_top=False,pooling='avg',input_shape=(img_size,img_size,3)) #7x7x1280
        self.eff.trainable=False
        
        self.out1=tf.keras.Sequential([
            layers.Dense(2048,activation='relu'),
            layers.Dense(num_class)
        ])
        
        self.out2=tf.keras.Sequential([
            layers.Dense(2048,activation='relu'),
            layers.Dense(num_class)
        ])
        
        self.tau=tau

        
    def call(self,inputs):
        '''
        inputs:[x,y]
        x:(batch,h,w,c)
        y:(batch,)
        '''
        x,y,y_ohe=inputs
        
        F=self.eff(preprocess_input(x))
        
        #soft target
        emb=self.emb(y)
        soft_target=tf.nn.softmax(emb,axis=-1)
        
        
        #output classification
        o1=self.out1(F)
        o2=self.out2(tf.stop_gradient(F))
        
        
        #prob
        o2_prob=tf.nn.softmax(o2,axis=-1)
        tau2_prob=tf.stop_gradient(tf.nn.softmax(o2/self.tau,axis=-1))
        
        #mask
        mask=tf.stop_gradient(tf.cast(tf.equal(tf.argmax(o2,axis=-1), tf.argmax(y_ohe,axis=-1)), tf.float32))
        
        return o1,o2,emb,soft_target,o2_prob,tau2_prob,mask

In [None]:
tau=2
alpha=0.9
beta=0.5

lr=1e-3
batch_size=64

epochs=100

In [None]:
ds=tf.data.Dataset.from_tensor_slices((dataset,tf.convert_to_tensor(Y))).batch(batch_size)

In [None]:
model=Model(128,unique.shape[0],tau)

In [None]:
opt=optimizers.Adam(learning_rate=lr)

In [None]:
def ce(labels,logits,class_weights,mask=None):
    y_ohe=tf.one_hot(labels,depth=len(class_weights))
    weights=tf.math.multiply(class_weights,y_ohe)
    weights=tf.reduce_sum(weights,axis=-1)
    loss=tf.nn.softmax_cross_entropy_with_logits(labels=y_ohe,logits=logits)*weights
    if mask!=None:
        loss=tf.reduce_sum(loss*mask)/tf.reduce_sum(mask+1e-8)
    else:
        loss=tf.reduce_mean(loss)
    return loss

In [None]:
def soft_ce(labels,soft_labels,logits,class_weights,mask=None):
    y_ohe=tf.one_hot(labels,depth=len(class_weights))
    weights=tf.math.multiply(class_weights,y_ohe)
    weights=tf.reduce_sum(weights,axis=-1)
    
    loss=tf.nn.softmax_cross_entropy_with_logits(labels=soft_labels,logits=logits)*weights
    
    if mask!=None:
        loss=tf.reduce_sum(loss*mask)/tf.reduce_sum(mask+1e-8)
    else:
        loss=tf.reduce_mean(loss)
    return loss

In [None]:
@tf.function
def step(x,y,opt):
    y_ohe=tf.one_hot(y,depth=len(class_weights))
    with tf.GradientTape() as tape:
        
        o1,o2,emb,soft_target,o2_prob,tau2_prob,mask=model([x,y,y_ohe])
        
        l_o1_y=ce(y,o1,class_weights)

        l_o1_emb=soft_ce(y,tf.stop_gradient(soft_target),o1,class_weights)

        l_o2_y=ce(y,o2,class_weights)

        l_o2_emb= soft_ce(y,tau2_prob,emb,class_weights,mask)
        
        #regularzation term
        l_re = tf.reduce_sum(tf.nn.relu(tf.reduce_sum(o2_prob*y_ohe,axis=-1)-alpha))
        
        
        loss = beta*l_o1_y + (1-beta)*l_o1_emb +l_o2_y +l_o2_emb +l_re
        
    grad=tape.gradient(loss,model.trainable_weights)
    opt.apply_gradients(zip(grad,model.trainable_weights))
    
    return l_o1_y,l_o1_emb,l_o2_y,l_o2_emb,l_re

In [None]:
def train():
    ckpt = tf.train.Checkpoint(model=model)
    ckpt_manager = tf.train.CheckpointManager(ckpt,'./ckpt', max_to_keep=1)
    if ckpt_manager.latest_checkpoint :
        ckpt.restore(ckpt_manager.latest_checkpoint)
        print('---ckpt restored----')
     
    for epoch in range(epochs):
        loop=tqdm(ds)
        for x,y in loop:
            l_o1_y,l_o1_emb,l_o2_y,l_o2_emb,l_re=step(x,y,opt)
            
            loop.set_postfix(loss=f'epoch:{epoch}, l_o1_y:{l_o1_y}'
                            f'l_o1_emb:{l_o1_emb}, l_o2_y:{l_o2_y}'
                            f'l_o2_emb:{l_o2_emb}, l_re:{l_re}')
            break
        if epoch%5==0:
            ckpt_manager.save()

In [None]:
train()

# Regression

In [None]:
from lightgbm import LGBMRegressor

In [None]:
embs=model.emb(Y) #(9912, nclass)

In [None]:
lgbm=LGBMRegressor()

In [None]:
lgbm.fit(embs.numpy(),np.array(response))

In [None]:
sns.heatmap(model.emb.weights[0],cmap="YlGnBu") #similarity between labels

# Submit

In [None]:
df=pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

Id=df['Id']

df=df.drop(['Id'],axis=1)

In [None]:
class_matrix=np.array(list(class_map.keys()))

In [None]:
class_matrix

In [None]:
def cosine_similarity(y):
    return class_matrix@np.array(y)/(np.linalg.norm(class_matrix,axis=-1)*np.linalg.norm(y)+1e-8)

In [None]:
pred=[]
for i in range(df.shape[0]):
    y=df.loc[i]
    try:
        y=class_map[tuple(y)]
    except:
        #if label does not in training set, use similarity, pick heighest similarity label
        sim=cosine_similarity(y)
        candidates=np.array(list(class_map.keys()))[sim==np.min(sim)]
        idx=np.random.choice(range(candidates.shape[0]))
        y=candidates[idx]
    inputs=class_map[tuple(y)]
    x=model.emb(inputs)
    prediction=lgbm.predict(x[np.newaxis,:])
    
    pred.append(prediction)
    
pred=np.array(pred)

In [None]:
submit=pd.concat([Id,pd.Series(pred[:,0].astype('float32'))],axis=1)

submit=submit.rename(columns={0:'Pawpularity'})

In [None]:
submit.to_csv('./submission.csv',index=False)