**Download the dataset**

In [None]:
import tensorflow as tf
import pandas as pd
from sklearn import preprocessing

In [None]:
anime = pd.read_csv('../input/anime-recommendation-database-2020/anime.csv')
animelist = pd.read_csv('../input/anime-recommendation-database-2020/animelist.csv')
watching_status = pd.read_csv('../input/anime-recommendation-database-2020/watching_status.csv')

In [None]:
animelist_sample = animelist.sample(n=10000)
anime_df = pd.merge(animelist_sample,anime,how='left',left_on='anime_id',right_on='MAL_ID')
anime_df['watching_status'] = anime_df['watching_status'].replace(list(watching_status['status']),list(watching_status[' description']))

In [None]:
anime_df=anime_df[['user_id', 'anime_id', 'rating', 'watching_status', 'watched_episodes','Score', 'Genres', 'Type', 'Episodes', 'Premiered', 'Producers', 'Licensors',
       'Studios', 'Source', 'Duration', 'Rating', 'Ranked', 'Popularity','Members', 'Favorites', 'Watching', 'Completed', 'On-Hold', 'Dropped','Plan to Watch']]
#anime_df[['Score','Ranked']] = anime_df[['Score','Ranked']].apply(pd.to_numeric)

anime_df['Score']=anime_df['Score'].replace('Unknown',0.0)
anime_df['Ranked']=anime_df['Ranked'].replace('Unknown',0.0)
anime_df[['Score','Ranked']] = anime_df[['Score','Ranked']].apply(pd.to_numeric)

**preprocessing**

In [None]:
# continuous feature
con = ['watched_episodes','Score','Ranked','Popularity','Members', 'Favorites', 'Watching', 'Completed', 'On-Hold','Dropped','Plan to Watch']
cat = ['user_id', 'anime_id','watching_status','Genres','Type','Episodes','Premiered','Producers', 'Licensors','Studios',
      'Source','Duration','Rating',]

In [None]:
# preprocess continuous features
anime_df[con]= preprocessing.StandardScaler().fit_transform(anime_df[con])

In [None]:
# preprocess categorical features
anime_df[cat]= preprocessing.OrdinalEncoder().fit_transform(anime_df[cat])

**build and train the model**

In [None]:
emb_counts = [len(anime_df[c].unique()) for c in cat]

In [None]:
ds = tf.data.Dataset.zip((
    tf.data.Dataset.from_tensor_slices((
        tf.cast(anime_df[con].values, tf.float32),
        tf.cast(anime_df[cat].values, tf.int32),
    )),
    tf.data.Dataset.from_tensor_slices((
        tf.cast(anime_df['rating'].values, tf.int32)
    ))
)).shuffle(buffer_size=2048)


ds_test = ds.take(int(len(ds) * 0.2))
ds_train = ds.skip(len(ds_test))
ds_valid = ds_test.take(int(len(ds_test) * 0.5))
ds_test = ds_test.skip(len(ds_valid))

In [None]:
for features_batch, labels_batch in ds.take(1):
    print(features_batch)
    print(labels_batch)

In [None]:
def MLP(arch, activation='relu', out_activation=None):
    mlp = tf.keras.Sequential()

    for units in arch[:-1]:
        mlp.add(tf.keras.layers.Dense(units, activation=activation))

    mlp.add(tf.keras.layers.Dense(arch[-1], activation=out_activation))

    return mlp


class SecondOrderFeatureInteraction(tf.keras.layers.Layer):
    def __init__(self, self_interaction=False):
        super(SecondOrderFeatureInteraction, self).__init__()
        self.self_interaction = self_interaction

    def call(self, inputs):
        batch_size = tf.shape(inputs[0])[0]
        concat_features = tf.stack(inputs, axis=1)

        dot_products = tf.matmul(concat_features, concat_features, transpose_b=True)

        ones = tf.ones_like(dot_products)
        mask = tf.linalg.band_part(ones, 0, -1)
        out_dim = int(len(inputs) * (len(inputs) + 1) / 2)

        if not self.self_interaction:
            mask = mask - tf.linalg.band_part(ones, 0, 0)
            out_dim = int(len(inputs) * (len(inputs) - 1) / 2)

        flat_interactions = tf.reshape(tf.boolean_mask(dot_products, mask), (batch_size, out_dim))
        return flat_interactions


class DLRM(tf.keras.Model):
    def __init__(
            self,
            embedding_sizes,
            embedding_dim,
            arch_bot,
            arch_top,
            self_interaction,
    ):
        super(DLRM, self).__init__()
        self.emb = [tf.keras.layers.Embedding(size, embedding_dim) for size in embedding_sizes]
        self.bot_nn = MLP(arch_bot, out_activation='relu')
        self.top_nn = MLP(arch_top, out_activation='relu')
        self.interaction_op = SecondOrderFeatureInteraction(self_interaction)

    def call(self, input):
        input_dense, input_cat = input
        emb_x = [E(x) for E, x in zip(self.emb, tf.unstack(input_cat, axis=1))]
        dense_x = self.bot_nn(input_dense)

        Z = self.interaction_op(emb_x + [dense_x])
        z = tf.concat([dense_x, Z], axis=1)
        p = self.top_nn(z)

        p = tf.clip_by_value(p, 0.0, 10.0)
        return p

In [None]:
model = DLRM(
    embedding_sizes=emb_counts,
    embedding_dim=4,
    arch_bot=[8,4],
    arch_top=[1],
    self_interaction=False
)

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.MeanSquaredError(),
    metrics=["mae"]
)

In [None]:
BATCH_SIZE = 64

history =model.fit(
    ds_train.batch(BATCH_SIZE),
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=6, restore_best_weights=True)
    ],
    epochs=10,
    verbose=2
)

In [None]:
results = model.evaluate(ds_test.batch(BATCH_SIZE))
print(f'Loss {results[0]}, Accuracy {results[1]}')