In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression

from tensorflow import keras
from keras import layers
from keras.callbacks import Callback
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences 

from textblob import TextBlob
from textblob import Word

import re
import string


In [None]:
# unzip all local files
!unzip -q "/kaggle/input/jigsaw-toxic-comment-classification-challenge/*.zip"

!dir

In [None]:
train_data_file = "train.csv"
test_data_file = "test.csv"
submission_file = "sample_submission.csv"

train_data = pd.read_csv(train_data_file)
test_data = pd.read_csv(test_data_file)
submission_result = pd.read_csv(submission_file)

# Preprocessing 

In [None]:
# set up paramters
max_len = 120 
embedding_dim = 300
vocabulary_size = 20000 #35000
num_tokens = vocabulary_size+1 #including 0

In [None]:
# preprocess comment texts
def preprocess(corpus):
    
# remove all non-English characters
# and convert all letters to lower case
    printable = set(string.printable)
    corpus = ''.join(filter(lambda x: x in printable, corpus))    
    corpus = corpus.lower()

# change contracted words into possible non-contracted form
    # specific
    corpus = re.sub(r"won't", "will not", corpus)
    corpus = re.sub(r"can\'t", "can not", corpus)
    # or could be 'are not' etc 
    corpus = re.sub(r"ain\'t","is not", corpus)
    corpus = re.sub(r"shan\'t", "shall not", corpus)
    corputs = re.sub(r"let\'s", "let us", corpus)

    # general
    corpus = re.sub(r"n\'t", " not", corpus)
    corpus = re.sub(r"\'re", " are", corpus)
    corpus = re.sub(r"\'s", " is", corpus)
    # or could be \'d --> had 
    corpus = re.sub(r"\'d", " would", corpus)
    corpus = re.sub(r"\'ll", " will", corpus)
    corpus = re.sub(r"\'t", " not", corpus)
    corpus = re.sub(r"\'ve", " have", corpus)
    corpus = re.sub(r"\'m", " am", corpus)
    
    # replace the rest \' with ' '
    corpus = re.sub(r"\'", " ", corpus)

    correction_list = {"youfuck": "you fuck", \
                       "fucksex": "fuck sex",\
                       "bitchbot": "bitch bot",\
                       "offfuck": "fuck off",\
                       "donkeysex": "donkey sex",\
                      "securityfuck": "security fuck",\
                      "ancestryfuck": "ancestry fuck",\
                      "turkeyfuck": "turkey fuck",\
                      "faggotgay": "faggot gay",\
                       "fuckbot": "fuck bot",\
                       "assfuckers": "ass fucker",\
                       "ckckck": "cock",\
                       "fuckfuck": "fuck",\
                       "lolol": "lol",\
                       "pussyfuck": "fuck",\
                        "gaygay": "gay",\
                       "haha": "ha",\
                       "sucksuck": "suck"
                      }
    for old,new in correction_list.items():
        corpus = corpus.replace(old,new)
        
    return corpus

In [None]:
# tokenizer vocabulary_size words
# we ignore all numbers
tokenizer = Tokenizer(num_words = vocabulary_size+1,\
                      filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n0123456789',\
                      lower=True, split=' ')

# extract comment texts from train_data and test_data
X_train_raw = train_data["comment_text"]
X_test_raw = test_data["comment_text"]

bad_comment_cat = ['toxic', 'severe_toxic', 'obscene', 'threat',\
       'insult', 'identity_hate']
Y_train = train_data[bad_comment_cat]

In [None]:
# preprocess data
X_train_raw = X_train_raw.apply(lambda x: preprocess(str(x)))
X_test_raw = X_test_raw.apply(lambda x: preprocess(str(x)))
# example X_train_raw.loc[126]

# tokenize comment text
tokenizer.fit_on_texts(X_train_raw)
tokenizer.fit_on_texts(X_test_raw)

X_train = pad_sequences(tokenizer.texts_to_sequences(X_train_raw),\
                        maxlen = max_len, truncating = "pre")
X_test = pad_sequences(tokenizer.texts_to_sequences(X_test_raw),\
                       maxlen = max_len, truncating = "pre")

# shuffle training data and split it into a training part and a validation part
x_train, x_val, y_train, y_val = train_test_split(X_train,Y_train,train_size=0.9, random_state=199)
print(x_train.shape," ",y_train.shape," ",x_val.shape," ",y_val.shape)

# RNN models

In [None]:
def get_weights(embedding_vectors,embedding_dim):
    global num_tokens,tokenizer
    
    # assign vectors to words using the pretrained model embedding_vectors
    embedding_weights = np.zeros((num_tokens,embedding_dim))

    # count how many words are not assigned with the pretrained model. 
    # By default, vectors associated to words are zero vectors.
    misses = 0 

    # the index in word_index starts with 1
    for word, i in tokenizer.word_index.items():
        vector = embedding_vectors.get(word)
        # the word_index is ordered by word frequency
        if i>=num_tokens :
            break
        elif vector is not None:
            embedding_weights[i] = vector
        else:
            if len(word)<20:
                word = Word(word)
                word = word.spellcheck()[0][0]
                vector = embedding_vectors.get(str(word))
                if vector is not None:
                    embedding_weights[i] = vector
                else:
                    misses +=1
                    #print(word)
            else:
                misses +=1
                #print(word)
        
    print(f"The number of missed words is {misses}")
    
    return embedding_weights 

In [None]:
# read the pretrained model fastText 
embedding_vectors_fasttext = {}
with open("/kaggle/input/fasttext-crawl-300d-2m/crawl-300d-2M.vec","r") as file:
    file.readline()
    for line in file:
        word , vector = line.split(maxsplit=1)
        vector = np.fromstring(vector,"float32",sep=" ")
        embedding_vectors_fasttext[word] = vector        

In [None]:
# assign vectors to words using the pretrained model fasttext
embedding_weights_fasttext = get_weights(embedding_vectors_fasttext,embedding_dim=300)

In [None]:
# read the pretrained model GloVe 
embedding_vectors_glove = {}
with open("/kaggle/input/glove6b/glove.6B.300d.txt","r") as file:
    for line in file:
        word , vector = line.split(maxsplit=1)
        vector = np.fromstring(vector,"float32",sep=" ")
        embedding_vectors_glove[word] = vector        

In [None]:
# assign vectors to words using the pretrained model GloVe
embedding_weights_glove = get_weights(embedding_vectors_glove,embedding_dim=300)

In [None]:
def GRU_model_glove():
    global max_len,num_tokens,embedding_weights_glove
    
    inputs = layers.Input(shape=(max_len,))
    
    x = layers.Embedding(input_dim=num_tokens,\
                         output_dim=embedding_dim,\
                         embeddings_initializer=keras.initializers.Constant(embedding_weights_glove),\
                         trainable=True)(inputs)
    
    x = layers.SpatialDropout1D(0.3)(x)
    
    forward_layer = layers.GRU(42,return_sequences=True)
    backward_layer = layers.GRU(42,activation="relu",dropout=0.1,return_sequences=True,go_backwards=True)
    x = layers.Bidirectional(forward_layer,backward_layer=backward_layer)(x)

    x = layers.GlobalMaxPooling1D()(x)
    
    outputs = layers.Dense(units=6,activation='sigmoid')(x)
    
    model = keras.models.Model(inputs=inputs, outputs=outputs, name="GRU_model_glove")
    
    model.compile(optimizer=tf.optimizers.Adam(),\
                  loss=tf.losses.BinaryCrossentropy(),\
                  metrics=['AUC'])
    
    return model

GRU_model_glove = GRU_model_glove()
GRU_model_glove.summary()

In [None]:
history = GRU_model_glove.fit(x_train, y_train, epochs=2,\
                              batch_size=128, validation_data=(x_val,y_val))

In [None]:
def GRU_model_fasttext():
    global max_len,num_tokens,embedding_weights_fasttext
    
    inputs = layers.Input(shape=(max_len,))
    
    x = layers.Embedding(input_dim=num_tokens,\
                         output_dim=embedding_dim,\
                         embeddings_initializer=keras.initializers.Constant(embedding_weights_fasttext),\
                         trainable=False)(inputs)
    
    x = layers.SpatialDropout1D(0.3)(x)
    
    forward_layer = layers.GRU(64,return_sequences=True)
    backward_layer = layers.GRU(64,activation="relu",dropout=0.3,return_sequences=True,go_backwards=True)
    x = layers.Bidirectional(forward_layer,backward_layer=backward_layer)(x)
    
    avg_pool = layers.GlobalAveragePooling1D()(x)
    max_pool = layers.GlobalMaxPooling1D()(x)
    x = layers.concatenate([avg_pool,max_pool])
    
    outputs = layers.Dense(units=6,activation='sigmoid')(x)
    
    model = keras.models.Model(inputs=inputs, outputs=outputs, name="GRU_model")
    
    model.compile(optimizer=tf.optimizers.Adam(),\
                  loss=tf.losses.BinaryCrossentropy(),\
                  metrics=['AUC'])
    
    return model

GRU_model_fasttext = GRU_model_fasttext()
GRU_model_fasttext.summary()
history = GRU_model_fasttext.fit(x_train, y_train, \
                                 epochs=2, batch_size=32,\
                                 validation_data=(x_val,y_val))

# Ensemble

In [None]:
model_nums = 2
size1 = x_train.shape[0]

y_train_pred = np.zeros((model_nums,size1,6),dtype="float32")
y_train_pred[0] = GRU_model_fasttext.predict(x_train)
y_train_pred[1] = GRU_model_glove.predict(x_train)

size2 = X_test.shape[0]
y_test_pred = np.zeros((model_nums,size2,6),dtype="float32")
y_test_pred[0] = GRU_model_fasttext.predict(X_test)
y_test_pred[1] = GRU_model_glove.predict(X_test)

y_pred = np.zeros((size2,6),dtype="float32")

for i in range(6):
    lg = LogisticRegression()
    temp = np.zeros((size1,model_nums),dtype="float32")
    for j in range(model_nums):
        temp[:,j] = y_train_pred[j,:,i]
    lg.fit(temp,y_train[bad_comment_cat[i]])

    temp = np.zeros((size2,model_nums),dtype="float32")
    for j in range(model_nums):
        temp[:,j] = y_test_pred[j,:,i]
    y_pred[:,i] = lg.predict_proba(temp)[:,1]

# Submission

In [None]:
submission_result[bad_comment_cat] = y_pred
submission_result.to_csv("submission.csv",index=False)

# Previous Models

The idea is not implemented
```
# Not all reviews are English. 
# It is better to translate all of them into English in order to make the full use of data.
!pip install --quiet google_trans_new
from google_trans_new import google_translator  
translator = google_translator()  
#translator.translate(X_train_raw[i],lang_tgt="en")
```

## An RNN model with pretrained word2vec and LSTM

```
import spacy 
nlp = spacy.load('en_core_web_lg')
nlp.meta['vectors']

# Transform every comment into a vector of length 300.
# Notice that in the following commands, spaCy first transforms every word in a comment
# into a vector of length 300, then takes average of them to form the vector of 
# the comment. There could be other ways to form vectors of sentences, like TF-IDF.

x_train_spacy = np.zeros(shape=(len(train_data.index),300,1),dtype=np.float32)
for i in train_data.index:
    x_train_spacy[i]= np.reshape(nlp(train_data.loc[i,'comment_text']).vector,(300,1))

# Define the model (architecture)
def model_spacy():
    initializer = keras.initializers.HeNormal()
    
    model = keras.models.Sequential(name='with_pretraining_word2vec')
    model.add(keras.Input(shape=(300,1),name='input'))
    model.add(layers.LSTM(units=128, dropout=0.2,recurrent_regularizer=keras.regularizers.L2(0.1),\
                                return_sequences=True, kernel_initializer=initializer, name='layer_1'))
    #recurrent_dropout=0.2
    model.add(layers.Dense(units=1,name='layer_2'))
    model.add(layers.Flatten())
    model.add(layers.Dense(units=6,activation='sigmoid',kernel_initializer=initializer,name='output')) 
    
    model.compile(optimizer=tf.optimizers.Adam(),loss=tf.losses.BinaryCrossentropy(),metrics=['AUC'])
    
    return model_spacy
    
model_spacy = model_spacy()
model_spacy.summary()

model_spacy.fit(x_train_spacy,y_train,epochs=12,batch_size=512)

x_test_spacy = np.zeros(shape=(len(test_data.index),300,1),dtype=np.float32)
for i in test_data_raw.index:
    x_test_spacy[i] = np.reshape(nlp(test_data.loc[i,'comment_text']).vector,(300,1))
    
submission_result[bad_comment_cat] = model_spacy.predict(x_test_spacy)
submission_result.info()
submission_result.to_csv("submission.csv",index=False)
```

## A Capsule model with fastText


```
#from https://github.com/bojone/Capsule/blob/master/Capsule_Keras.py
#! -*- coding: utf-8 -*-
# refer: https://kexue.fm/archives/5112

from keras import activations
from keras import backend as K
from keras.engine.topology import Layer

def squash(x, axis=-1):
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    scale = K.sqrt(s_squared_norm)/ (0.5 + s_squared_norm)
    return scale * x


#define our own softmax function instead of K.softmax
def softmax(x, axis=-1):
    ex = K.exp(x - K.max(x, axis=axis, keepdims=True))
    return ex/K.sum(ex, axis=axis, keepdims=True)


#A Capsule Implement with Pure Keras
class Capsule(Layer):
    def __init__(self, num_capsule, dim_capsule, routings=3, share_weights=True, activation='squash', **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.share_weights = share_weights
        if activation == 'squash':
            self.activation = squash
        else:
            self.activation = activations.get(activation)

    def build(self, input_shape):
        super(Capsule, self).build(input_shape)
        input_dim_capsule = input_shape[-1]
        if self.share_weights:
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(1, input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     initializer='glorot_uniform',
                                     trainable=True)
        else:
            input_num_capsule = input_shape[-2]
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(input_num_capsule,
                                            input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     initializer='glorot_uniform',
                                     trainable=True)

    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        #final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(u_hat_vecs[:,:,:,0]) #shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            c = softmax(b, 1)
            # o = K.batch_dot(c, u_hat_vecs, [2, 2])
            o = tf.einsum('bin,binj->bij', c, u_hat_vecs)
            if K.backend() == 'theano':
                o = K.sum(o, axis=1)
            if i < self.routings - 1:
                o = K.l2_normalize(o, -1)
                # b = K.batch_dot(o, u_hat_vecs, [2, 3])
                b = tf.einsum('bij,binj->bin', o, u_hat_vecs)
                if K.backend() == 'theano':
                    b = K.sum(b, axis=1)

        return self.activation(o)

    def compute_output_shape(self, input_shape):
        return (None, self.num_capsule, self.dim_capsule)  
```

```
def squash(x, axis=-1):
    # s_squared_norm is really small
    # s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    # scale = K.sqrt(s_squared_norm)/ (0.5 + s_squared_norm)
    # return scale * x
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True)
    scale = K.sqrt(s_squared_norm + K.epsilon())
    return x / scale
```

```
def capsule_model_fasttext():
    global max_len,num_tokens,embedding_weights_fasttext
    
    inputs = layers.Input(shape=(max_len,))
    
    x = layers.Embedding(input_dim=num_tokens,\
                         output_dim=embedding_dim,\
                         embeddings_initializer=keras.initializers.Constant(embedding_weights_fasttext),\
                         trainable=False)(inputs)
    
    x = layers.SpatialDropout1D(0.3)(x)
    
    forward_layer = layers.GRU(64,dropout=0.2,return_sequences=True)
    backward_layer = layers.GRU(64,dropout=0.3,return_sequences=True,go_backwards=True)
    x = layers.Bidirectional(forward_layer,backward_layer=backward_layer)(x)
    
    x = Capsule(num_capsule=8, dim_capsule=16, routings=6,
                      share_weights=True)(x)
    
    x = layers.Flatten()(x)
    x = layers.Dropout(0.2)(x)
    
    outputs = layers.Dense(units=6,activation='sigmoid')(x)
    
    model = keras.models.Model(inputs=inputs, outputs=outputs, name="capsule_model_fasttext")
    
    model.compile(optimizer=tf.optimizers.Adam(),\
                  loss=tf.losses.BinaryCrossentropy(),\
                  metrics=['AUC'])
    
    return model
```

```
capsule_model_fasttext = capsule_model_fasttext()
capsule_model_fasttext.summary()
```
```
history = capsule_model_fasttext.fit(x_train, y_train, \
                                 epochs=4, batch_size=64,\
                                 validation_data=(x_val,y_val))
```
 

## BiLSTM+BiGRU+GloVe


```
def GRU_model_glove():
    global max_len,num_tokens,embedding_weights_glove
    
    inputs = layers.Input(shape=(max_len,))
    
    x = layers.Embedding(input_dim=num_tokens,\
                         output_dim=embedding_dim,\
                         embeddings_initializer=keras.initializers.Constant(embedding_weights_glove),\
                         trainable=False)(inputs)
    
    x = layers.SpatialDropout1D(0.3)(x)
    x = layers.LSTM(32,return_sequences=True)(x)
    x = layers.Bidirectional(layers.GRU(40,dropout=0.2,recurrent_dropout=0.2,return_sequences=True))(x)
    
    avg_pool = layers.GlobalAveragePooling1D()(x)
    max_pool = layers.GlobalMaxPooling1D()(x)
    x = layers.concatenate([avg_pool,max_pool])  
    
    outputs = layers.Dense(units=6,activation='sigmoid')(x)
    
    model = keras.models.Model(inputs=inputs, outputs=outputs, name="GRU_model_glove")
    
    model.compile(optimizer=tf.optimizers.Adam(),\
                  loss=tf.losses.BinaryCrossentropy(),\
                  metrics=['AUC'])
    
    return model
```
```
GRU_model_glove = GRU_model_glove()
GRU_model_glove.summary()
history = GRU_model_glove.fit(x_train, y_train, \
                               epochs=6, batch_size=128,\
                               validation_data=(x_val,y_val)\
                   )
```

## BiGRU+fastText

```
def GRU_model_fasttext():
    global max_len,num_tokens,embedding_weights_fasttext
    
    inputs = layers.Input(shape=(max_len,))
    
    x = layers.Embedding(input_dim=num_tokens,\
                         output_dim=embedding_dim,\
                         embeddings_initializer=keras.initializers.Constant(embedding_weights_fasttext),\
                         trainable=False)(inputs)
    
    x = layers.SpatialDropout1D(0.3)(x)
    
    forward_layer = layers.GRU(64,return_sequences=True)
    backward_layer = layers.GRU(64,dropout=0.3,return_sequences=True,go_backwards=True)
    x = layers.Bidirectional(forward_layer,backward_layer=backward_layer)(x)
    
    avg_pool = layers.GlobalAveragePooling1D()(x)
    max_pool = layers.GlobalMaxPooling1D()(x)
    x = layers.concatenate([avg_pool,max_pool])
    
    outputs = layers.Dense(units=6,activation='sigmoid')(x)
    
    model = keras.models.Model(inputs=inputs, outputs=outputs, name="GRU_model_fasttext")
    
    model.compile(optimizer=tf.optimizers.Adam(),\
                  loss=tf.losses.BinaryCrossentropy(),\
                  metrics=['AUC'])
    
    return model
```

```
GRU_model_fasttext = GRU_model_fasttext()
GRU_model_fasttext.summary()
history = GRU_model_fasttext.fit(x_train, y_train, \
                                 epochs=4, batch_size=128,\
                                 validation_data=(x_val,y_val))
```


## Ensemble

```
model_nums = 3
size1 = x_train.shape[0]

y_train_pred = np.zeros((model_nums,size1,6),dtype="float32")
y_train_pred[0] = GRU_model_fasttext.predict(x_train)
y_train_pred[1] = GRU_model_glove.predict(x_train)
y_train_pred[2] = capsule_model_fasttext(x_train)

size2 = X_test.shape[0]
y_test_pred = np.zeros((model_nums,size2,6),dtype="float32")
y_test_pred[0] = GRU_model_fasttext.predict(X_test)
y_test_pred[1] = GRU_model_glove.predict(X_test)
y_test_pred[2] = capsule_model_fasttext(X_test)

y_pred = np.zeros((size2,6),dtype="float32")

for i in range(6):
    lg = LogisticRegression()
    temp = np.zeros((size1,model_nums),dtype="float32")
    for j in range(model_nums):
        temp[:,j] = y_train_pred[j,:,i]
    lg.fit(temp,y_train[bad_comment_cat[i]])
    
    temp = np.zeros((size2,model_nums),dtype="float32")
    for j in range(model_nums):
        temp[:,j] = y_test_pred[j,:,i]
    y_pred[:,i] = lg.predict_proba(temp)[:,1]
```

Other ensemble models (from earlier version)
```
ensemble_weight = 0.7
y_pred_fasttext = GRU_model_fasttext.predict(X_test)
y_pred_glove = GRU_model_glove.predict(X_test)
y_pred = y_pred_fasttext*ensemble_weight + y_pred_glove*(1-ensemble_weight)
```

combine two pretrained embedding models
```
embedding_weights = np.concatenate((embedding_weights_fasttext,embedding_weights_glove),axis=1)

def model():
    global max_len,num_tokens,embedding_weights
    
    inputs = layers.Input(shape=(max_len,))
    
    x = layers.Embedding(input_dim=num_tokens,\
                         output_dim=embedding_weights.shape[1],\
                         embeddings_initializer=keras.initializers.Constant(embedding_weights),\
                         trainable=False)(inputs)
    
    x = layers.SpatialDropout1D(0.4)(x)
    
    x = layers.Bidirectional(layers.GRU(128,dropout=0.2,return_sequences=True))(x)
    # layers.CuDNNGRU
    
    #forward_layer = layers.GRU(40,dropout=0.1,return_sequences=True)
    #backward_layer = layers.GRU(40,dropout=0.2,return_sequences=True,go_backwards=True)
    #x = layers.Bidirectional(forward_layer,backward_layer=backward_layer)(x)
    
    avg_pool = layers.GlobalAveragePooling1D()(x)
    max_pool = layers.GlobalMaxPooling1D()(x)
    x = layers.concatenate([avg_pool,max_pool])
    
    x = layers.Dense(units=12,activation='relu')(x)
    outputs = layers.Dense(units=6,activation='sigmoid')(x)
    
    model = keras.models.Model(inputs=inputs, outputs=outputs, name="mixed_model")
    
    model.compile(optimizer=tf.optimizers.Adam(clipvalue=1),\
                  loss=tf.losses.BinaryCrossentropy(),\
                  metrics=['AUC'])
    
    return model
    
model = model()
model.summary()
```

```
callback = keras.callbacks.EarlyStopping(monitor='val_auc', mode='max', patience=3)

history = model.fit(x_train, y_train, \
                                 epochs=4, batch_size=64,\
                                 validation_data=(x_val,y_val),
                       callbacks=[callback])
```