In [None]:
pip install transformers

Collecting transformers
  Downloading transformers-4.10.2-py3-none-any.whl (2.8 MB)
[K     |████████████████████████████████| 2.8 MB 8.3 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 47.6 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 49.5 MB/s 
Collecting huggingface-hub>=0.0.12
  Downloading huggingface_hub-0.0.17-py3-none-any.whl (52 kB)
[K     |████████████████████████████████| 52 kB 1.5 MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 45.2 MB/s 
Installing collected packages: tokenizers, sacremoses, pyyaml, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: Py

In [None]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")


No GPU available, using the CPU instead.


In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("/content/drive/MyDrive/ticket_suggestion_spanish/technology_train_spanish.csv")

In [None]:
df.sample(10)

Unnamed: 0,labels,spanish_text
167,ticket_yes,¿Me atraparán si descargo una película en BitT...
89,ticket_no,Parece que no puedo encontrar mi tarjeta dold ...
271,ticket_yes,¿Qué es la URL?
195,ticket_yes,¿Recibo alguna notificación por quejarme de mi...
206,ticket_no,¿Cuáles son sus horas de servicio?
116,ticket_no,¿Escuchaste sobre Sam?
259,ticket_yes,Mi nuevo escritorio de Windows 10 dice que Win...
78,ticket_yes,El sistema operativo de mi teléfono funciona d...
214,ticket_yes,¿Puedo usar AirPort Express con una PC con Win...
219,ticket_yes,¿Cómo me doy de baja del correo electrónico de...


In [None]:
def to_classes(labels):
  if labels == 'ticket_no':
    return 0
  else:
    return 1

In [None]:
df['labels']= df.labels.apply(to_classes)

In [None]:
df.drop(labels='labels',axis=1,inplace=True)

In [None]:
df.sample(10)

Unnamed: 0,labels,spanish_text
5,0,Transfiere mil rupias a mi cuenta bancaria.
111,1,Mi computadora portátil está infectada con dem...
106,1,"Este proyector está roto, ya no proyecta una i..."
89,0,Parece que no puedo encontrar mi tarjeta dold ...
220,0,¿Qué es COVID-19?
199,0,¿Cómo actualizo mis datos personales?
160,1,¿Qué hay de malo en usar una red Wi-Fi pública?
82,0,¿Qué haces por tus pasatiempos?
133,1,La cámara web de mi computadora portátil no fu...
149,0,Debo hacer que repitas eso.


In [None]:
from transformers import BertTokenizer

# Load the BERT tokenizer.
print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('dccuchile/bert-base-spanish-wwm-uncased', do_lower_case=True)


Loading BERT tokenizer...


In [None]:
sentences = df.spanish_text.values
labels = df.label.values

In [None]:
# Print the original sentence.
print(' Original: ', sentences[0])

# Print the sentence split into tokens.
print('Tokenized: ', tokenizer.tokenize(sentences[0]))

# Print the sentence mapped to token ids.
print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sentences[0])))


 Original:  Buenos dias
Tokenized:  ['buenos', 'dias']
Token IDs:  [2931, 12873]


In [None]:
max_len = 0

# For every sentence...
for sent in sentences:

    # Tokenize the text and add `[CLS]` and `[SEP]` tokens.
    input_ids = tokenizer.encode(sent, add_special_tokens=True)

    # Update the maximum sentence length.
    max_len = max(max_len, len(input_ids))

print('Max sentence length: ', max_len)

Max sentence length:  26


In [None]:
from torch.utils import data

class TicketSuggestionDataset(data.Dataset):

  def __init__(self,text,target, tokenizer, max_len):
    self.text = text
    self.target = target
    self.tokenizer = tokenizer
    self.max_len = max_len

  def __len__(self):
    return len(self.text)

  def __getitem__(self,item):
    text = str(self.text[item])
    encoder = tokenizer.encode_plus(
                          text,
                          max_length = self.max_len,
                          add_special_tokens = True,
                          pad_to_max_length = True,
                          return_attention_mask = True,
                          return_token_type_ids = False,
                          return_tensors = 'pt'
                      )
    return {
        'text':text,
        'input_ids': encoder['input_ids'].flatten(),
        'attention_mask': encoder['attention_mask'].flatten(),
        'targets' : torch.tensor(self.target[item],dtype = torch.long)
    }

In [None]:
ds = TicketSuggestionDataset(
      text = sentences,
      target = labels,
      tokenizer = tokenizer,
      max_len = 40
      )

In [None]:
ds[0]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


{'attention_mask': tensor([1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 'input_ids': tensor([    4,  2931, 12873,     5,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
             1,     1,     1,     1,     1,     1,     1,     1,     1,     1]),
 'targets': tensor(0),
 'text': 'Buenos dias'}

In [None]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

In [None]:
def create_dataloader(df,tokenizer,max_len,batch_size):

  ds = TicketSuggestionDataset(
      text = df.spanish_text.to_numpy(),
      target = df.label.to_numpy(),
      tokenizer = tokenizer,
      max_len = max_len
      )
  
  return data.DataLoader(
      ds,
      batch_size = batch_size,
      sampler = RandomSampler(ds)
  )

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
df_train, df_val = train_test_split(
  df,
  test_size=0.2,
  random_state=42,
  stratify = df['label']
)

In [None]:
train_dataloader = create_dataloader(df_train,tokenizer,40,16)
val_dataloader = create_dataloader(df_val, tokenizer, 40, 8)
#batch_size = 40 for billing
#batch_size = 32 for technology

In [None]:
from transformers import BertForSequenceClassification, AdamW, BertConfig

# Load BertForSequenceClassification, the pretrained BERT model with a single 
# linear classification layer on top. 
model = BertForSequenceClassification.from_pretrained(
    "dccuchile/bert-base-spanish-wwm-uncased",
    num_labels = 2, # The number of output labels--2 for binary classification.
                    # You can increase this for multi-class tasks.   
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = False, # Whether the model returns all hidden-states.
)

# Tell pytorch to run this model on the GPU.
model.cuda()


Some weights of the model checkpoint at dccuchile/bert-base-spanish-wwm-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dccuc

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(31002, 768, padding_idx=1)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [None]:
# Note: AdamW is a class from the huggingface library (as opposed to pytorch) 
# I believe the 'W' stands for 'Weight Decay fix"
optimizer = AdamW(model.parameters(),
                  lr = 5e-5, # args.learning_rate - default is 5e-5, our notebook had 3e-5
                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.
                )


In [None]:
from transformers import get_linear_schedule_with_warmup

# Number of training epochs. The BERT authors recommend between 2 and 4. 
# We chose to run for 4, but we'll see later that this may be over-fitting the
# training data.
epochs = 4

# Total number of training steps is [number of batches] x [number of epochs]. 
# (Note that this is not the same as the number of training samples).
total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0, # Default value in run_glue.py
                                            num_training_steps = total_steps)


In [None]:
import numpy as np

# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)


In [None]:
import time
import datetime

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))



In [None]:
def train(model,dataloader,device,optimizer,scheduler):
  
  t0 = time.time()

  # Put the model into training mode. Don't be mislead--the call to 
  # `train` just changes the *mode*, it doesn't *perform* the training.
  # `dropout` and `batchnorm` layers behave differently during training
  # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)
  model.train()
  # Reset the total loss for this epoch.
  total_train_loss = 0

  # For each batch of training data...
  for step, batch in enumerate(dataloader):

      # Progress update every 40 batches.
      if step % 5 == 0 and not step == 0:
          # Calculate elapsed time in minutes.
          elapsed = format_time(time.time() - t0)
          
          # Report progress.
          print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(dataloader), elapsed))

      # Unpack this training batch from our dataloader. 
      #
      # As we unpack the batch, we'll also copy each tensor to the GPU using the 
      # `to` method.
      #
      # `batch` contains dictionary with keys:
      #   text 
      #   input_ids
      #   attention_masks
      #   targets
      b_input_ids = batch['input_ids'].to(device)
      b_input_mask = batch['attention_mask'].to(device)
      b_labels = batch['targets'].to(device)

      # Always clear any previously calculated gradients before performing a
      # backward pass. PyTorch doesn't do this automatically because 
      # accumulating the gradients is "convenient while training RNNs". 
      # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
      model.zero_grad()        

      # Perform a forward pass (evaluate the model on this training batch).
      # The documentation for this `model` function is here: 
      # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
      # It returns different numbers of parameters depending on what arguments
      # arge given and what flags are set. For our useage here, it returns
      # the loss (because we provided labels) and the "logits"--the model
      # outputs prior to activation.
      loss_logits = model(b_input_ids, 
                          token_type_ids=None, 
                          attention_mask=b_input_mask, 
                          labels=b_labels)

      # Accumulate the training loss over all of the batches so that we can
      # calculate the average loss at the end. `loss` is a Tensor containing a
      # single value; the `.item()` function just returns the Python value 
      # from the tensor.
      total_train_loss += loss_logits[0].item()

      # Perform a backward pass to calculate the gradients.
      loss_logits[0].backward()

      # Clip the norm of the gradients to 1.0.
      # This is to help prevent the "exploding gradients" problem.
      torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

      # Update parameters and take a step using the computed gradient.
      # The optimizer dictates the "update rule"--how the parameters are
      # modified based on their gradients, the learning rate, etc.
      optimizer.step()

      # Update the learning rate.
      scheduler.step()

  # Calculate the average loss over all of the batches.
  avg_train_loss = total_train_loss / len(train_dataloader)            
  
  # Measure how long this epoch took.
  training_time = format_time(time.time() - t0)

  print("")
  print("  Average training loss: {0:.2f}".format(avg_train_loss))
  print("  Training epcoh took: {:}".format(training_time))
      
  return avg_train_loss,training_time

In [None]:
def eval_model(model,dataloader,device):

  print("")
  print("Running Validation...")

  t0 = time.time()

  # Put the model in evaluation mode--the dropout layers behave differently
  # during evaluation.
  model.eval()

  # Tracking variables 
  total_eval_accuracy = 0
  total_eval_loss = 0

  # Evaluate data for one epoch
  for batch in dataloader:
      
      # Unpack this training batch from our dataloader. 
      #
      # As we unpack the batch, we'll also copy each tensor to the GPU using 
      # the `to` method.
      #
      # `batch` contains three pytorch tensors:
      #   [0]: input ids 
      #   [1]: attention masks
      #   [2]: labels 
      b_input_ids = batch['input_ids'].to(device)
      b_input_mask = batch['attention_mask'].to(device)
      b_labels = batch['targets'].to(device)
      
      # Tell pytorch not to bother with constructing the compute graph during
      # the forward pass, since this is only needed for backprop (training).
      with torch.no_grad():        

          # Forward pass, calculate logit predictions.
          # token_type_ids is the same as the "segment ids", which 
          # differentiates sentence 1 and 2 in 2-sentence tasks.
          # The documentation for this `model` function is here: 
          # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
          # Get the "logits" output by the model. The "logits" are the output
          # values prior to applying an activation function like the softmax.
          loss_logits_val = model(b_input_ids, 
                                  token_type_ids=None, 
                                  attention_mask=b_input_mask,
                                  labels=b_labels)
          
      # Accumulate the validation loss.
      total_eval_loss += loss_logits_val[0].item()

      # Move logits and labels to CPU
      logits = loss_logits_val[1].detach().cpu().numpy()
      label_ids = b_labels.to('cpu').numpy()

      # Calculate the accuracy for this batch of test sentences, and
      # accumulate it over all batches.
      total_eval_accuracy += flat_accuracy(logits, label_ids)
      

  # Report the final accuracy for this validation run.
  avg_val_accuracy = total_eval_accuracy / len(dataloader)
  print("  Accuracy: {0:.2f}".format(avg_val_accuracy))

  # Calculate the average loss over all of the batches.
  avg_val_loss = total_eval_loss / len(dataloader)
  
  # Measure how long the validation run took.
  validation_time = format_time(time.time() - t0)
  
  print("  Validation Loss: {0:.2f}".format(avg_val_loss))
  print("  Validation took: {:}".format(validation_time))
  return avg_val_accuracy,avg_val_loss,validation_time


In [None]:
training_stats = []
for epochs in range(4):
  print("")
  print('======== Epoch {:} / {:} ========'.format(epochs + 1, 4))
  print('Training...')
  train_loss, training_time = train(
      model,
      train_dataloader,
      device,
      optimizer,
      scheduler
  )
  val_acc, val_loss, validation_time = eval_model(
      model,
      val_dataloader,
      device
  )

  # Record all statistics from this epoch.
  training_stats.append(
        {
            'epoch': epochs + 1,
            'Training Loss': train_loss,
            'Valid. Loss': val_loss,
            'Valid. Accur.': val_acc,
            'Training Time': training_time,
            'Validation Time': validation_time
        }
    )



Training...




  Batch     5  of     15.    Elapsed: 0:00:01.
  Batch    10  of     15.    Elapsed: 0:00:03.

  Average training loss: 0.45
  Training epcoh took: 0:00:04

Running Validation...
  Accuracy: 0.95
  Validation Loss: 0.16
  Validation took: 0:00:00

Training...
  Batch     5  of     15.    Elapsed: 0:00:01.
  Batch    10  of     15.    Elapsed: 0:00:03.

  Average training loss: 0.09
  Training epcoh took: 0:00:04

Running Validation...
  Accuracy: 0.98
  Validation Loss: 0.06
  Validation took: 0:00:00

Training...
  Batch     5  of     15.    Elapsed: 0:00:01.
  Batch    10  of     15.    Elapsed: 0:00:03.

  Average training loss: 0.01
  Training epcoh took: 0:00:04

Running Validation...
  Accuracy: 0.97
  Validation Loss: 0.09
  Validation took: 0:00:00

Training...
  Batch     5  of     15.    Elapsed: 0:00:01.
  Batch    10  of     15.    Elapsed: 0:00:03.

  Average training loss: 0.00
  Training epcoh took: 0:00:04

Running Validation...
  Accuracy: 0.97
  Validation Loss: 0.10


In [None]:
import pandas as pd


In [None]:
# Load the dataset into a pandas dataframe.
df_test = pd.read_csv("/content/drive/MyDrive/ticket_suggestion_spanish/testcases_ticketsuggestion _tech_spanish_reduced.csv")

# Report the number of sentences.
print('Number of test sentences: {:,}\n'.format(df_test.shape[0]))
df_test['label']= df_test.actual_labels.apply(to_classes)

# Create sentence and label lists
sentences = df_test.spanish_text.values
labels = df_test.label.values


Number of test sentences: 59



In [None]:
import time

In [None]:
test_dataloader = create_dataloader(df_test,tokenizer,40,8)

In [None]:
test_acc, test_loss, test_time = eval_model(
      model,
      test_dataloader,
      device
  )


Running Validation...




  Accuracy: 0.96
  Validation Loss: 0.04
  Validation took: 0:00:01


In [None]:
def get_predictions(model, data_loader):
  model = model.eval()
  review_texts = []
  predictions = []
  real_values = []
  with torch.no_grad():
    for d in data_loader:
      texts = d["text"]
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
      targets = d["targets"].to(device)
      outputs = model(
        input_ids=input_ids,
        token_type_ids=None,
        attention_mask=attention_mask
      )
      # print(torch.max(outputs[0], dim=1))
      _, preds = torch.max(outputs[0], dim=1)
      review_texts.extend(texts)
      predictions.extend(preds)
      real_values.extend(targets)
  predictions = torch.stack(predictions).cpu()
  real_values = torch.stack(real_values).cpu()
  return review_texts, predictions, real_values


In [None]:
text,prediction,actual = get_predictions(
  model,
  test_dataloader
)


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [None]:
def create_dataframe(text, pred, actual):
    res = {
        'text': text,
        'Predictions': pred,
        'Actual': actual
    }
    result = pd.DataFrame(res)

    return result


def get_classes(label):
    if label == 0:
        return "ticket_no"
    else:
        return "ticket_yes"

def convert_label_class(df):
    df['Predictions'] = df.Predictions.apply(get_classes)
    df['Actual'] = df.Actual.apply(get_classes)

    return df


In [None]:
result = pd.DataFrame({'text':text,'Prediction':prediction,'Actual':actual})

In [None]:
final_result[final_result['Predictions']!=final_result['Actual']]

Unnamed: 0,text,Predictions,Actual
36,¿Cómo sé si me estoy quedando sin RAM?,ticket_no,ticket_yes


In [None]:
result = create_dataframe(text,prediction,actual)

final_result = convert_label_class(result)

print(final_result)

                                                 text Predictions      Actual
0                  ¿Cómo configuro VPN en mi sistema?  ticket_yes  ticket_yes
1                                 Hola cómo estás hoy   ticket_no   ticket_no
2              ¿Cuál es la cantidad mínima de pedido?   ticket_no   ticket_no
3   Mi computadora funciona lento, creo que tengo ...  ticket_yes  ticket_yes
4   ¿Cuánto tiempo puede durar un cable HDMI y seg...  ticket_yes  ticket_yes
5                    ¿Cómo puedo arreglar mi teclado?  ticket_yes  ticket_yes
6                      mi computadora no se enciende.  ticket_yes  ticket_yes
7                   ¿Qué es un televisor inteligente?  ticket_yes  ticket_yes
8                                 como hago una pizza   ticket_no   ticket_no
9   La pantalla de mi computadora portátil no se e...  ticket_yes  ticket_yes
10                 mi enrutador y módem no funcionan.  ticket_yes  ticket_yes
11                            la tv no recibe entrada  ticket_ye

In [None]:
import os

# Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()

output_dir = '/content/drive/MyDrive/ticket_suggestion_spanish/model_finetune_BETO_uncased/technology'

# Create output directory if needed
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

print("Saving model to %s" % output_dir)

# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
model_to_save.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

Saving model to /content/drive/MyDrive/ticket_suggestion_spanish/model_finetune_BETO_uncased/technology


('/content/drive/MyDrive/ticket_suggestion_spanish/model_finetune_BETO_uncased/technology/tokenizer_config.json',
 '/content/drive/MyDrive/ticket_suggestion_spanish/model_finetune_BETO_uncased/technology/special_tokens_map.json',
 '/content/drive/MyDrive/ticket_suggestion_spanish/model_finetune_BETO_uncased/technology/vocab.txt',
 '/content/drive/MyDrive/ticket_suggestion_spanish/model_finetune_BETO_uncased/technology/added_tokens.json')

### ON RAW TEXT

In [None]:
class_names= ['ticket_no','ticket_yes']

In [None]:
review_text = 'Quiero mi efectivo en paypal'

In [None]:
encoded_review = tokenizer.encode_plus(
  review_text,
  max_length=32,
  add_special_tokens=True,
  return_token_type_ids=False,
  pad_to_max_length=True,
  return_attention_mask=True,
  return_tensors='pt',
)




In [None]:
input_ids = encoded_review['input_ids'].to(device)
attention_mask = encoded_review['attention_mask'].to(device)
output = model(input_ids,attention_mask)
_, prediction = torch.max(output[0], dim=1)
print(f'Review text: {review_text}')
print(f'Sentiment  : {class_names[prediction]}')

Review text: Quiero mi efectivo en paypal
Sentiment  : ticket_yes


In [None]:
from transformers import BertTokenizer, BertForSequenceClassification

In [None]:
tokenizer = BertTokenizer.from_pretrained("/content/drive/MyDrive/ticket_suggestion_spanish/model_finetune_BETO_cased/technology")

In [None]:
model = BertForSequenceClassification.from_pretrained("/content/drive/MyDrive/ticket_suggestion_spanish/model_finetune_BETO_cased/technology")

In [None]:
model.cuda()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(31002, 768, padding_idx=1)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element