# Finetuning BERT (JRSSA paper)

In [None]:
%tensorflow_version 1.x

In [None]:
!test -d bert_repo || git clone https://github.com/google-research/bert bert_repo

import re
import random
import os
import sys
import json
import pprint
import logging
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub

from tensorflow import keras
from keras.layers import Dot
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint

from sklearn.model_selection import train_test_split
from google.colab import auth, drive

if not 'bert_repo' in sys.path:
    sys.path.insert(0, 'bert_repo')

from modeling import BertModel, BertConfig
from tokenization import FullTokenizer, convert_to_unicode
from extract_features import InputExample, convert_examples_to_features



# get TF logger 
log = logging.getLogger('tensorflow')
log.handlers = []

Access your google Account

## Step 1: Get the pre-trained BERT model

In [None]:
!wget https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip
!unzip uncased_L-12_H-768_A-12.zip

In [None]:
!git clone https://github.com/gaphex/bert_experimental

sys.path.insert(0, 'bert_experimental')

from bert_experimental.finetuning.text_preprocessing import build_preprocessor
from bert_experimental.finetuning.bert_layer import BertLayer
from bert_experimental.finetuning.modeling import BertConfig, BertModel, build_bert_module


In [None]:
BERT_DIR = "/content/uncased_L-12_H-768_A-12/" #@param {type:"string"}

build_bert_module(BERT_DIR+"bert_config.json",
                  BERT_DIR+"vocab.txt",
                  BERT_DIR+"bert_model.ckpt", 
                  "./bert-module/")

## Step 2: Load training Data

Import data from google storage

In [None]:
#auth google
#from google.colab import auth
#auth.authenticate_user()

In [None]:
#Directory = 'NNKeras' #@param {type:"string"}

# bucket with Train file.
#!mkdir {Directory}
#TASK_DATA_DIR = Directory

#BUCKET = 'bert_mingle' #@param {type:"string"}

#!gsutil cp -R gs://{BUCKET}/{Directory} .
#print('***** Task data directory: {} *****'.format(TASK_DATA_DIR))
#!ls $TASK_DATA_DIR

Formatting for train and dev

In [None]:
df_train = pd.read_csv("NNKeras/train_121.tsv", sep='\t')

quality  = np.array(list(df_train.iloc[:, 0]), dtype=np.float)
trY  = np.reshape(quality, (-1, 1))


texts1 = df_train.iloc[:, 3].tolist()
texts2 = df_train.iloc[:, 4].tolist()

trX1 = np.array(texts1)
trX2 = np.array(texts2)
#trX1, devX1, trX2, devX2, trY, devY = train_test_split(texts1, texts2, labels, shuffle=True, test_size=0.2)

In [None]:
df_dev = pd.read_csv("NNKeras/dev_121.tsv", sep='\t')

quality  = np.array(list(df_dev.iloc[:, 0]), dtype=np.float)
devY  = np.reshape(quality, (-1, 1))


texts1 = df_dev.iloc[:, 3].tolist()
texts2 = df_dev.iloc[:, 4].tolist()

devX1 = np.array(texts1)
devX2 = np.array(texts2)

## Step 3: Build the network

In [None]:
def cosine_similarity(tensor_pair):
    u, v = tensor_pair
    u = tf.math.l2_normalize(u, axis=-1)
    v = tf.math.l2_normalize(v, axis=-1)
    return tf.reduce_sum((u * v), axis=-1, keepdims=True)

In [None]:
inp1 = tf.keras.Input(shape=(1,), dtype=tf.string)
inp2 = tf.keras.Input(shape=(1,), dtype=tf.string)

encoder = BertLayer(bert_path="./bert-module/", seq_len=48, do_preprocessing=True,
                             verbose=False, pooling="mean", trainable=True, n_tune_layers=2,tune_embeddings=False)


h1 = tf.keras.layers.Dense(50, activation='relu')(encoder(inp1))
h1 = tf.keras.layers.Dropout(0.1)(h1)

h2 = tf.keras.layers.Dense(50, activation='relu')(encoder(inp2))
h2 = tf.keras.layers.Dropout(0.1)(h2)

pred = tf.keras.layers.Lambda(cosine_similarity, name='similarity')([h1, h2])
model = tf.keras.models.Model(inputs=[inp1, inp2], outputs=[pred]) 


In [None]:
model.compile(
      optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5, ),
      loss="binary_crossentropy",
      metrics=["accuracy"])

In [None]:
model.summary()

## Step 4: Train the network

In [None]:
import logging
logging.getLogger("tensorflow").setLevel(logging.WARNING)

In [None]:
es_callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, verbose=1)
saver = keras.callbacks.ModelCheckpoint("bert_tuned_V1.hdf5")
# Train the model with the new callback
model.summary()
model.fit([trX1,trX2], trY, validation_data=([devX1, devX2],devY), batch_size=32, epochs=5, callbacks=[es_callback])


In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model,show_shapes=True)

## Step 5: Test the model

In [None]:
df_test = pd.read_csv("NNKeras/test_121.tsv", sep='\t')
print(df_test.shape)

quality  = np.array(list(df_test.iloc[:,0]), dtype=np.float)
testY  = np.reshape(quality, (-1, 1))

texts1 = df_test.iloc[:, 3].tolist()
texts2 = df_test.iloc[:, 4].tolist()

testX1 = np.array(texts1)
testX2 = np.array(texts2)

In [None]:
score = model.evaluate([testX1,testX2],testY)  ### on donne maintenant à notre réseau les données de test qu'il n'a jamais vues et on considère sa performance sur ces données
print('Test score:', score[0])
print('Test accuracy:', score[1])


## Step 6: Save the model

In [None]:
from tensorflow.python.framework.graph_util import convert_variables_to_constants
from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference

def freeze_keras_model(model, export_path=None, clear_devices=True):
    """
    Freezes a Keras model into a pruned computation graph.

    @param model The Keras model to be freezed.
    @param clear_devices Remove the device directives from the graph for better portability.
    @return The frozen graph definition.
    """
    
    sess = tf.keras.backend.get_session()
    graph = sess.graph
    
    with graph.as_default():

        input_tensors = model.inputs
        output_tensors = model.outputs
        dtypes = [t.dtype.as_datatype_enum for t in input_tensors]
        input_ops = [t.name.rsplit(":", maxsplit=1)[0] for t in input_tensors]
        output_ops = [t.name.rsplit(":", maxsplit=1)[0] for t in output_tensors]
        
        tmp_g = graph.as_graph_def()
        if clear_devices:
            for node in tmp_g.node:
                node.device = ""
        
        tmp_g = optimize_for_inference(
            tmp_g, input_ops, output_ops, dtypes, False)
        
        tmp_g = convert_variables_to_constants(sess, tmp_g, output_ops)
        
        if export_path is not None:
            with tf.gfile.GFile(export_path, "wb") as f:
                f.write(tmp_g.SerializeToString())
        
        return tmp_g

In [None]:
frozen_graph = freeze_keras_model(model, export_path="frozen_graph_121_mod2.pb")

In [None]:
#!gsutil cp -R gs://mann_bucket/model/frozen_graph_121_mod2.pb .

In [None]:
#!gsutil cp -r 'frozen_graph_121_mod2.pb' gs://mann_bucket/model