In [3]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/f9/54/5ca07ec9569d2f232f3166de5457b63943882f7950ddfcc887732fc7fb23/transformers-4.3.3-py3-none-any.whl (1.9MB)
[K     |████████████████████████████████| 1.9MB 17.6MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/71/23/2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97/tokenizers-0.10.1-cp37-cp37m-manylinux2010_x86_64.whl (3.2MB)
[K     |████████████████████████████████| 3.2MB 42.6MB/s 
[?25hCollecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 49.2MB/s 
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp37-none-any.whl size=893262 sha256=0961

In [4]:
# Load Huggingface transformers
from transformers import TFBertModel,  BertConfig, BertTokenizerFast
# Then what you need from tensorflow.keras
from tensorflow.keras.layers import Input, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
# And pandas for data import + sklearn because you allways need sklearn
import pandas as pd

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
consumo_dir = '/content/drive/MyDrive/0AccTeam/datasets_consumo/'

In [8]:
data = pd.read_csv(consumo_dir +"data_training.csv")
data_test = pd.read_csv(consumo_dir +"data_testing.csv")

In [9]:
MODEL_NAME = 'bert-base-uncased'

In [10]:
MAX_LENGTH = 100

In [11]:
# Load transformers config and set output_hidden_states to False
config = BertConfig.from_pretrained(MODEL_NAME)
config.output_hidden_states = False

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




In [12]:
# Load BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path = MODEL_NAME, config = config)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…




In [13]:
# Load the Transformers BERT model
transformer_model = TFBertModel.from_pretrained(MODEL_NAME, config = config)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=536063208.0, style=ProgressStyle(descri…




Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [14]:
# Load the MainLayer
bert = transformer_model.layers[0]

In [15]:
# Build your model input
input_ids = Input(shape=(MAX_LENGTH,), name='input_ids', dtype='int32')
inputs = {'input_ids': input_ids}

In [16]:
# Load the Transformers BERT model as a layer in a Keras model
bert_model = bert(inputs)[1]
dropout = Dropout(config.hidden_dropout_prob, name='pooled_output')
pooled_output = dropout(bert_model, training=False)

In [17]:
# Then build your model output
food = Dense(units=len(data.food_label.value_counts()), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='food')(pooled_output)
service = Dense(units=len(data.service_label.value_counts()), kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='service')(pooled_output)
outputs = {'food': food, 'service': service}

In [18]:
# And combine it all in a model object
model = Model(inputs=inputs, outputs=outputs, name='BERT_MultiLabel_MultiClass')

In [19]:
# Take a look at the model
model.summary()

Model: "BERT_MultiLabel_MultiClass"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 100)]        0                                            
__________________________________________________________________________________________________
bert (TFBertMainLayer)          TFBaseModelOutputWit 109482240   input_ids[0][0]                  
__________________________________________________________________________________________________
pooled_output (Dropout)         (None, 768)          0           bert[0][1]                       
__________________________________________________________________________________________________
food (Dense)                    (None, 2)            1538        pooled_output[0][0]              
_________________________________________________________________________

In [20]:
#######################################
### ------- Train the model ------- ###

In [21]:
# Set an optimizer
optimizer = Adam(
    learning_rate=5e-05,
    epsilon=1e-08,
    decay=0.01,
    clipnorm=1.0)

In [22]:
# Set loss and metrics
loss = {'food': CategoricalCrossentropy(from_logits = True), 'service': CategoricalCrossentropy(from_logits = True)}
metric = {'food': CategoricalAccuracy('recall'), 'service': CategoricalAccuracy('recall')}

In [23]:
# Compile the model
model.compile(
    optimizer = optimizer,
    loss = loss, 
    metrics = metric)

In [24]:
# Ready output data for the model
y_food = to_categorical(data['food_label'])
y_service = to_categorical(data['service_label'])

In [25]:
# Tokenize the input (takes some time)
x = tokenizer(
    text=data['texto'].to_list(),
    add_special_tokens=True,
    max_length=MAX_LENGTH,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = False,
    verbose = True)

In [26]:
# Fit the model
history = model.fit(
    x={'input_ids': x['input_ids']},
    y={'food': y_food, 'service': y_service},
    validation_split=0.2,
    batch_size=64,
    epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [27]:
model.save('/content/drive/MyDrive/0AccTeam/finalModel.h5')

In [28]:
import tensorflow as tf

In [None]:
model = tf.keras.models.load_model('/content/drive/MyDrive/0AccTeam/finalModel.h5')

In [29]:
# Ready test data
test_y_food = to_categorical(data_test['food_label'])
test_y_service = to_categorical(data_test['service_label'])
test_x = tokenizer(
    text=data_test['texto'].to_list(),
    add_special_tokens=True,
    max_length=MAX_LENGTH,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = False,
    verbose = True)
# Run evaluation
model_eval = model.evaluate(
    x={'input_ids': test_x['input_ids']},
    y={'food': test_y_food, 'service': test_y_service}
)

