# Multilingual Twitter sentiment analysis with BERT

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
!pip install bert-tensorflow

Collecting bert-tensorflow
[?25l  Downloading https://files.pythonhosted.org/packages/a6/66/7eb4e8b6ea35b7cc54c322c816f976167a43019750279a8473d355800a93/bert_tensorflow-1.0.1-py2.py3-none-any.whl (67kB)
[K     |████████████████████████████████| 71kB 3.3MB/s 
Installing collected packages: bert-tensorflow
Successfully installed bert-tensorflow-1.0.1


In [5]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import io

from datetime import datetime

import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score

#import tf_metrics
tf.logging.set_verbosity(tf.logging.INFO)

W0731 07:09:46.743827 140125504415616 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/bert/optimization.py:87: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



In [6]:
# Set the output directory for saving model file

OUTPUT_DIR = 'output'#@param {type:"string"}
tf.gfile.MakeDirs(OUTPUT_DIR)
print('Model output directory: {}'.format(OUTPUT_DIR))


Model output directory: output


#Data

In [0]:
path = '/content/drive/My Drive/clean_data/'
languages = ['Hungarian', 'Portuguese', 'Bosnian', 'Croatian', 'Polish', 'Russian', 'Serbian', 'Slovak', 'Slovenian', 'English', 'German', 'Swedish']

In [0]:
def load_langauge_pretraining(train_lang, test_lang):
    
    # Load train languages
    df_train = pd.DataFrame()
    for lang in train_lang:
        df = pd.read_csv(path + train_lang + '.csv')
        df.drop('Unnamed: 0', axis=1, inplace=True)
        df.dropna(axis=0, inplace=True)
        df_train = pd.concat([df_train, df], ignore_index=True)
        print(df.shape)
    
    df_train.reset_index(drop=True, inplace=True)
    print(df_train.shape)
        
    
    # Load test language
    df_train = pd.read_csv(path + train_lang + '.csv')
    df_train.drop('Unnamed: 0', axis=1, inplace=True)
    df_train.dropna(axis=0, inplace=True)
    df_train.reset_index(drop=True, inplace=True)
    print(df_test.shape)
    
    train = df_train
    test = df_test
    return train, test

In [0]:
def load_language(lang):
    
    df = pd.read_csv(path + lang + '.csv')
    df.drop('Unnamed: 0', axis=1, inplace=True)
    df.dropna(axis=0, inplace=True)
    df.reset_index(drop=True, inplace=True)
    train_ix, test_ix = train_test_split(df.index, test_size=0.3)
    train = df.iloc[train_ix]
    test = df.iloc[test_ix]
    return train, test



In [0]:
DATA_COLUMN = 'Text'
LABEL_COLUMN = 'HandLabels'
label_list = ['Positive', 'Negative', 'Neutral']

In [11]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_multi_cased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
    with tf.Graph().as_default():
        bert_module = hub.Module(BERT_MODEL_HUB)
        tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
        with tf.Session() as sess:
            vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"], tokenization_info["do_lower_case"]])
    return bert.tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

I0731 07:09:59.389066 140125504415616 saver.py:1499] Saver not created because there are no variables in the graph to restore
W0731 07:10:02.536997 140125504415616 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/bert/tokenization.py:125: The name tf.gfile.GFile is deprecated. Please use tf.io.gfile.GFile instead.



#Creating a model
1. Load BERT
2. Create a single new layer for finetunning


In [0]:
# define evaluation metrics average F1 score
def avg_f1_score(y_true, y_pred, encoder_dict):
    scores = f1_score(y_true, y_pred, average=None)
    # get average F1 for postive and negative F1 scores
    f1_negative = scores[0] # Negative
    f1_positive = scores[2] # Positive
    return (f1_negative + f1_positive) / 2.0

In [0]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels, num_labels):
    bert_module = hub.Module(BERT_MODEL_HUB, trainable=True)
    bert_inputs = dict(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids)
    bert_outputs = bert_module(inputs=bert_inputs, signature="tokens", as_dict=True)

    # Use "pooled_output" for classification tasks on an entire sentence.
    # Use "sequence_outputs" for token-level output.
    output_layer = bert_outputs["pooled_output"]

    hidden_size = output_layer.shape[-1].value

    # Create our own layer to tune for politeness data.
    output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())
    
    with tf.variable_scope("loss"):

        # Dropout helps prevent overfitting
        output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        # Convert labels into one-hot encoding
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
        # If we're predicting, we want predicted labels and the probabiltiies.
        if is_predicting:
            return (predicted_labels, log_probs)

        # If we're train/eval, compute loss between predicted and actual label
        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_mean(per_example_loss)
        return (loss, predicted_labels, log_probs)


Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction.

In [0]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps, num_warmup_steps):

    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)

        # TRAIN and EVAL
        if not is_predicting:

            (loss, predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
            train_op = bert.optimization.create_optimizer(loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

            # Calculate evaluation metrics. 
            def metric_fn(label_ids, predicted_labels):
                accuracy = tf.metrics.accuracy(label_ids, predicted_labels)

                return {
                    "eval_accuracy": accuracy,
                }

            eval_metrics = metric_fn(label_ids, predicted_labels)

            if mode == tf.estimator.ModeKeys.TRAIN:
                return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
            else:
                return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics)
        else:
            (predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

            predictions = {
                'probabilities': log_probs,
                'labels': predicted_labels
            }
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    # Return the actual model function in the closure
    return model_fn


In [0]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 2.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [0]:
def fit_and_evaluate(train, test):
    # Use the InputExample class from BERT's run_classifier code to create examples from the data
    train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                       text_a = x[DATA_COLUMN], 
                                                                       text_b = None, 
                                                                       label = x[LABEL_COLUMN]), axis = 1)

    test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                       text_a = x[DATA_COLUMN], 
                                                                       text_b = None, 
                                                                       label = x[LABEL_COLUMN]), axis = 1)
    
    # We'll set sequences to be at most 128 tokens long.
    MAX_SEQ_LENGTH = 128
    # Convert our train and test features to InputFeatures that BERT understands.
    train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
    test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
    
    
    # Compute # train and warmup steps from batch size
    num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
    num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)
    
    # Specify output directory and number of checkpoint steps to save
    run_config = tf.estimator.RunConfig(
        model_dir=OUTPUT_DIR,
        save_summary_steps=SAVE_SUMMARY_STEPS,
        save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)
    
    
    model_fn = model_fn_builder(
        num_labels=len(label_list),
        learning_rate=LEARNING_RATE,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps)

    estimator = tf.estimator.Estimator(
        model_fn=model_fn,
        config=run_config,
        params={"batch_size": BATCH_SIZE})

    # Create an input function for training.
    train_input_fn = bert.run_classifier.input_fn_builder(
        features=train_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=True,
        drop_remainder=False)
    
    
    print(f'Beginning Training!')
    current_time = datetime.now()
    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
    print("Training took time ", datetime.now() - current_time)
    
    
    test_input_fn = run_classifier.input_fn_builder(
        features=test_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
    
    estimator.evaluate(input_fn=test_input_fn, steps=None)

In [0]:
# EXPERIMENT 1

languages = ['Bosnian', 'Bolgarian']
# for each language evaluate BERT on only 1 language dataset
for lang in languages:
    print(lang + '----------------------------------------------')
    train, test = load_language(lang)
    fit_and_evaluate(train, test)

Bosnian----------------------------------------------


W0731 07:10:04.288084 140125504415616 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/bert/run_classifier.py:774: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.

I0731 07:10:04.289157 140125504415616 run_classifier.py:774] Writing example 0 of 26673
I0731 07:10:04.290577 140125504415616 run_classifier.py:461] *** Example ***
I0731 07:10:04.291525 140125504415616 run_classifier.py:462] guid: None
I0731 07:10:04.292448 140125504415616 run_classifier.py:464] tokens: [CLS] Su ##kob demo ##nst ##rana ##ta i poli ##cije u Ki ##jev ##u [SEP]
I0731 07:10:04.293177 140125504415616 run_classifier.py:465] input_ids: 101 12271 62545 30776 40300 20906 10213 177 91929 13303 189 28941 24873 10138 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
I0731 07:10:04.2

Beginning Training!


I0731 07:10:28.988588 140125504415616 estimator.py:1145] Calling model_fn.
I0731 07:10:31.870772 140125504415616 saver.py:1499] Saver not created because there are no variables in the graph to restore
W0731 07:10:32.004801 140125504415616 deprecation.py:506] From <ipython-input-13-0b1934a6b7e1>:23: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
W0731 07:10:32.041664 140125504415616 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/bert/optimization.py:27: The name tf.train.get_or_create_global_step is deprecated. Please use tf.compat.v1.train.get_or_create_global_step instead.

W0731 07:10:32.043170 140125504415616 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/bert/optimization.py:32: The name tf.train.polynomial_decay is deprecated. Please use tf.comp