Modelo para anlisis de sentmientos para luego precedir si tiene sintomas de depresion.

In [1]:
from string import punctuation
import pandas as pd
import nltk
import re

In [2]:
# read the data from tweets_public.csv and create a dataframe
df = pd.read_csv('data/tweets_public.csv', encoding='utf-8')
df.head()

Unnamed: 0,airline_sentiment,is_reply,reply_count,retweet_count,text,tweet_coord,tweet_created,tweet_id,tweet_location,user_timezone
0,neutral,False,0,0,Trabajar en #Ryanair como #TMA: https://t.co/r...,,Fri Nov 03 12:05:12 +0000 2017,926419989107798016,,Madrid
1,neutral,True,0,0,@Iberia @FIONAFERRER Cuando gusten en Cancún s...,,Sun Nov 26 18:40:28 +0000 2017,934854385577943041,,Mexico City
2,negative,False,0,0,Sabiais que @Iberia te trata muy bien en santi...,,Mon Dec 25 15:40:45 +0000 2017,945318406441635840,,Madrid
3,negative,False,0,0,NUNCA NUNCA NUNCA pidáis el café de Ryanair.\n...,,Mon Nov 06 14:18:35 +0000 2017,927540721296568320,,Pacific Time (US & Canada)
4,positive,True,0,0,@cris_tortu @dakar @Iberia @Mitsubishi_ES @BFG...,,Mon Jan 01 23:00:57 +0000 2018,947965901332197376,,Buenos Aires


In [3]:
df.shape

(7867, 10)

# Preprocesamiento
Limpieza de datos

In [4]:
# create a new dataframe with only the text and airline_sentiment columns and tweet id with the name df_sentiment
df_sentiment = df[['text', 'airline_sentiment', 'tweet_id']]

In [5]:
# transform the text letters to lowercase
df_sentiment['text'] = df_sentiment['text'].str.lower()
df_sentiment.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sentiment['text'] = df_sentiment['text'].str.lower()


Unnamed: 0,text,airline_sentiment,tweet_id
0,trabajar en #ryanair como #tma: https://t.co/r...,neutral,926419989107798016
1,@iberia @fionaferrer cuando gusten en cancún s...,neutral,934854385577943041
2,sabiais que @iberia te trata muy bien en santi...,negative,945318406441635840
3,nunca nunca nunca pidáis el café de ryanair.\n...,negative,927540721296568320
4,@cris_tortu @dakar @iberia @mitsubishi_es @bfg...,positive,947965901332197376


In [6]:
# Referencia: https://stackoverflow.com/questions/6718633/python-regular-expression-again-match-url
# remove the urls from the text but keep all the text after the url
df_sentiment.loc[:, 'text'] = df_sentiment['text'].apply(lambda x: re.split('http[s]*\S+', str(x))[0])
df_sentiment.head()

Unnamed: 0,text,airline_sentiment,tweet_id
0,trabajar en #ryanair como #tma:,neutral,926419989107798016
1,@iberia @fionaferrer cuando gusten en cancún s...,neutral,934854385577943041
2,sabiais que @iberia te trata muy bien en santi...,negative,945318406441635840
3,nunca nunca nunca pidáis el café de ryanair.\n...,negative,927540721296568320
4,@cris_tortu @dakar @iberia @mitsubishi_es @bfg...,positive,947965901332197376


In [7]:
# remove the punctuation from the text
df_sentiment.loc[:, 'text'] = df_sentiment['text'].apply(lambda x: ''.join(c for c in x if c not in punctuation))
df_sentiment.head()

Unnamed: 0,text,airline_sentiment,tweet_id
0,trabajar en ryanair como tma,neutral,926419989107798016
1,iberia fionaferrer cuando gusten en cancún se ...,neutral,934854385577943041
2,sabiais que iberia te trata muy bien en santia...,negative,945318406441635840
3,nunca nunca nunca pidáis el café de ryanair\nb...,negative,927540721296568320
4,cristortu dakar iberia mitsubishies bfgoodrich...,positive,947965901332197376


In [8]:
# change the \n to a space
df_sentiment.loc[:, 'text'] = df_sentiment['text'].apply(lambda x: x.replace('\n', ' '))
df_sentiment.head()

Unnamed: 0,text,airline_sentiment,tweet_id
0,trabajar en ryanair como tma,neutral,926419989107798016
1,iberia fionaferrer cuando gusten en cancún se ...,neutral,934854385577943041
2,sabiais que iberia te trata muy bien en santia...,negative,945318406441635840
3,nunca nunca nunca pidáis el café de ryanair bu...,negative,927540721296568320
4,cristortu dakar iberia mitsubishies bfgoodrich...,positive,947965901332197376


In [9]:
# remove the stopwrods from the text
nltk.download('stopwords')
from nltk.corpus import stopwords

stop_words = set(stopwords.words('spanish'))
df_sentiment.loc[:, 'text'] = df_sentiment['text'].apply(lambda x: ' '.join([word for word in x.split() if word.lower() not in stop_words]))
df_sentiment.head()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\raula\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,text,airline_sentiment,tweet_id
0,trabajar ryanair tma,neutral,926419989107798016
1,iberia fionaferrer gusten cancún viaja disfrut...,neutral,934854385577943041
2,sabiais iberia trata bien santiago chilete cam...,negative,945318406441635840
3,nunca nunca nunca pidáis café ryanair bueno ve...,negative,927540721296568320
4,cristortu dakar iberia mitsubishies bfgoodrich...,positive,947965901332197376


In [10]:
# remove the emojis from the text

# regular expression pattern to remove emojis from text
emoji_pattern = re.compile("["
        u"\U0001F600-\U0001F64F"  # faces
        u"\U0001F300-\U0001F5FF"  # simbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags
        "]+", flags=re.UNICODE)

# use lambda function to remove the emojis from the text
df_sentiment.loc[:, 'text'] = df_sentiment['text'].apply(lambda x: emoji_pattern.sub(r'', x))
df_sentiment.head()

Unnamed: 0,text,airline_sentiment,tweet_id
0,trabajar ryanair tma,neutral,926419989107798016
1,iberia fionaferrer gusten cancún viaja disfrut...,neutral,934854385577943041
2,sabiais iberia trata bien santiago chilete cam...,negative,945318406441635840
3,nunca nunca nunca pidáis café ryanair bueno ve...,negative,927540721296568320
4,cristortu dakar iberia mitsubishies bfgoodrich...,positive,947965901332197376


In [11]:
# remove the numbers from the text
df_sentiment.loc[:, 'text'] = df_sentiment['text'].apply(lambda x: re.sub(r'\d+', '', x))
df_sentiment.head()

Unnamed: 0,text,airline_sentiment,tweet_id
0,trabajar ryanair tma,neutral,926419989107798016
1,iberia fionaferrer gusten cancún viaja disfrut...,neutral,934854385577943041
2,sabiais iberia trata bien santiago chilete cam...,negative,945318406441635840
3,nunca nunca nunca pidáis café ryanair bueno ve...,negative,927540721296568320
4,cristortu dakar iberia mitsubishies bfgoodrich...,positive,947965901332197376


In [12]:
df_sentiment = df_sentiment[['text', 'airline_sentiment']]
df_sentiment['label'] = df_sentiment['airline_sentiment'].apply(lambda x: 0 if x == 'negative' else 1)
df_sentiment = df_sentiment[['text', 'label']]
df_sentiment

Unnamed: 0,text,label
0,trabajar ryanair tma,1
1,iberia fionaferrer gusten cancún viaja disfrut...,1
2,sabiais iberia trata bien santiago chilete cam...,0
3,nunca nunca nunca pidáis café ryanair bueno ve...,0
4,cristortu dakar iberia mitsubishies bfgoodrich...,1
...,...,...
7862,iberia iberiaexpress especialistas dejarte tir...,0
7863,iberia destino solo click ¡deseadme suerte hol...,1
7864,iberia bien muchas gracias,1
7865,volar ryanair peor irte chingar madre culpa pobre,0


# Hugging Face

In [13]:
from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis")
data = ["I love you", "I hate you"]
sentiment_pipeline(data)

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'label': 'POSITIVE', 'score': 0.9998656511306763},
 {'label': 'NEGATIVE', 'score': 0.9991129040718079}]

In [14]:
sentiment_pipeline('no estoy normal')

[{'label': 'NEGATIVE', 'score': 0.9994358420372009}]

In [15]:
specific_model = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
specific_model(data)

[{'label': '5 stars', 'score': 0.8546808362007141},
 {'label': '1 star', 'score': 0.63460773229599}]

In [16]:
specific_model('estoy normal')

[{'label': '3 stars', 'score': 0.3511374890804291}]

In [17]:
specific_model('estoy triste')

[{'label': '1 star', 'score': 0.5364329814910889}]

## Fine tuning

In [18]:
# import torch
# torch.cuda.is_available()

In [19]:
# torch.cuda.empty_cache()

In [20]:
# split df_sentiment in 70% train and 30% test
# from sklearn.model_selection import train_test_split

# train, test = train_test_split(df_sentiment, test_size=0.3, random_state=42)
# train.shape, test.shape
from datasets import load_dataset
imdb = load_dataset("imdb")

Found cached dataset imdb (C:/Users/raula/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0)


  0%|          | 0/3 [00:00<?, ?it/s]

In [35]:
small_train_dataset = imdb["train"].shuffle(seed=42).select([i for i in list(range(3000))])
small_test_dataset = imdb["test"].shuffle(seed=42).select([i for i in list(range(300))])

Loading cached shuffled indices for dataset at C:\Users\raula\.cache\huggingface\datasets\imdb\plain_text\1.0.0\d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0\cache-9c48ce5d173413c7.arrow
Loading cached shuffled indices for dataset at C:\Users\raula\.cache\huggingface\datasets\imdb\plain_text\1.0.0\d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0\cache-c1eaa46e94dfbfd3.arrow


In [36]:
small_test_dataset

Dataset({
    features: ['text', 'label'],
    num_rows: 300
})

In [22]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [23]:
def preprocess_function(examples):
   return tokenizer(examples["text"], truncation=True)

# user the function preprocess_function to tokenize the train and test data
# tokenized_train = train['text'].apply(lambda x: tokenizer(x, truncation=True))
# tokenized_test = test['text'].apply(lambda x: tokenizer(x, truncation=True))
tokenized_train = small_train_dataset.map(preprocess_function, batched=True)
tokenized_test = small_test_dataset.map(preprocess_function, batched=True)

Loading cached processed dataset at C:\Users\raula\.cache\huggingface\datasets\imdb\plain_text\1.0.0\d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0\cache-78a6dc99249c23ee.arrow
Loading cached processed dataset at C:\Users\raula\.cache\huggingface\datasets\imdb\plain_text\1.0.0\d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0\cache-63d9f578dfbf99d0.arrow


In [24]:
from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

## train the model

In [25]:
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.weight', 'pre_classi

In [26]:
import numpy as np
from datasets import load_metric
 
def compute_metrics(eval_pred):
   load_accuracy = load_metric("accuracy")
   load_f1 = load_metric("f1")
  
   logits, labels = eval_pred
   predictions = np.argmax(logits, axis=-1)
   accuracy = load_accuracy.compute(predictions=predictions, references=labels)["accuracy"]
   f1 = load_f1.compute(predictions=predictions, references=labels)["f1"]
   return {"accuracy": accuracy, "f1": f1}

In [27]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [28]:
from transformers import TrainingArguments, Trainer
 
repo_name = "raulangelj/huggingface_sentiment_analysis"
 
training_args = TrainingArguments(
   output_dir=repo_name,
   learning_rate=2e-5,
   per_device_train_batch_size=2,
   per_device_eval_batch_size=2,
   num_train_epochs=2,
   weight_decay=0.01,
   save_strategy="epoch",
   push_to_hub=True,
)
 
trainer = Trainer(
   model=model,
   args=training_args,
   train_dataset=tokenized_train,
   eval_dataset=tokenized_test,
   tokenizer=tokenizer,
   data_collator=data_collator,
   compute_metrics=compute_metrics,
)

c:\Users\raula\Documents\RAUL_ANGEL\UVG_COMPU\NOVENO_SEMESTRE\DISENO-E-INOVACION\analisis_sentimientos_depresion\raulangelj/huggingface_sentiment_analysis is already a clone of https://huggingface.co/raulangelj/huggingface_sentiment_analysis. Make sure you pull the latest changes with `repo.git_pull()`.


In [29]:
trainer.train()



  0%|          | 0/3000 [00:00<?, ?it/s]

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 0.6156, 'learning_rate': 1.6666666666666667e-05, 'epoch': 0.33}
{'loss': 0.5693, 'learning_rate': 1.3333333333333333e-05, 'epoch': 0.67}
{'loss': 0.5232, 'learning_rate': 1e-05, 'epoch': 1.0}
{'loss': 0.2543, 'learning_rate': 6.666666666666667e-06, 'epoch': 1.33}
{'loss': 0.2716, 'learning_rate': 3.3333333333333333e-06, 'epoch': 1.67}
{'loss': 0.2624, 'learning_rate': 0.0, 'epoch': 2.0}
{'train_runtime': 2973.2941, 'train_samples_per_second': 2.018, 'train_steps_per_second': 1.009, 'train_loss': 0.41606748962402346, 'epoch': 2.0}


TrainOutput(global_step=3000, training_loss=0.41606748962402346, metrics={'train_runtime': 2973.2941, 'train_samples_per_second': 2.018, 'train_steps_per_second': 1.009, 'train_loss': 0.41606748962402346, 'epoch': 2.0})

In [30]:
trainer.evaluate()

  0%|          | 0/150 [00:00<?, ?it/s]

  load_accuracy = load_metric("accuracy")


Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

{'eval_loss': 0.6564839482307434,
 'eval_accuracy': 0.86,
 'eval_f1': 0.8618421052631579,
 'eval_runtime': 38.6619,
 'eval_samples_per_second': 7.76,
 'eval_steps_per_second': 3.88,
 'epoch': 2.0}

In [32]:
trainer.push_to_hub()

# BERT

In [None]:
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
import torch
import numpy as np
from sklearn.model_selection import train_test_split
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from textwrap import wrap

In [None]:
# Inicialización
RANDOM_SEED = 42
MAX_LEN = 200
BATCH_SIZE = 16
# DATASET_PATH = '/content/drive/My Drive/videos/2020-07-20/BERT_sentiment_IMDB_Dataset.csv'
NCLASSES = 3

np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
# # Cargar dataset
# from google.colab import drive
# drive.mount('/content/drive')

# df_bert = pd.read_csv(DATASET_PATH)
# df_bert = df[0:10000]

df_bert = df_sentiment[['text', 'airline_sentiment']]

In [None]:
print(df.head())
print(df.shape)
print("\n".join(wrap(df['text'][200])))

In [None]:
# Reajustar dataset
df_bert['label'] = df_bert['airline_sentiment'].apply(lambda x: 1 if x == 'positive' else 0 if x == 'neutral' else -1)
df_bert = df_bert.drop(['airline_sentiment'], axis=1)
df_bert.head()

In [None]:
# TOKENIZACIÓN
PRE_TRAINED_MODEL_NAME = 'bert-base-multilingual-cased'
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)

In [None]:
# Ejemplo tokenización
sample_txt = 'Me gusto mucho esa pelicula!'
tokens = tokenizer.tokenize(sample_txt)
token_ids = tokenizer.convert_tokens_to_ids(tokens)
print('Frase: ', sample_txt)
print('Tokens: ', tokens)
print('Tokens numéricos: ', token_ids)

In [None]:
# Codificación para introducir a BERT
encoding = tokenizer.encode_plus(
    sample_txt,
    max_length = 10,
    truncation = True,
    add_special_tokens = True,
    return_token_type_ids = False,
    pad_to_max_length = True,
    return_attention_mask = True,
    return_tensors = 'pt'
)

In [None]:
encoding.keys()

In [None]:
print(tokenizer.convert_ids_to_tokens(encoding['input_ids'][0]))
print(encoding['input_ids'][0])
print(encoding['attention_mask'][0])

In [None]:
# CREACIÓN DATASET

class TEXTDataset(Dataset):

  def __init__(self,texts,labels,tokenizer,max_len):
    self.texts = texts
    self.labels = labels
    self.tokenizer = tokenizer
    self.max_len = max_len

  def __len__(self):
      return len(self.texts)
    
  def __getitem__(self, item):
    text = str(self.texts[item])
    label = self.labels[item]
    encoding = tokenizer.encode_plus(
        text,
        max_length = self.max_len,
        truncation = True,
        add_special_tokens = True,
        return_token_type_ids = False,
        pad_to_max_length = True,
        return_attention_mask = True,
        return_tensors = 'pt'
        )
    

    return {
          'text': text,
          'input_ids': encoding['input_ids'].flatten(),
          'attention_mask': encoding['attention_mask'].flatten(),
          'label': torch.tensor(label, dtype=torch.long)
      } 

In [None]:
# Data loader:

def data_loader(df, tokenizer, max_len, batch_size):
  dataset = TEXTDataset(
      texts = df.text.to_numpy(),
      labels = df.label.to_numpy(),
      tokenizer = tokenizer,
      max_len = MAX_LEN
  )

  return DataLoader(dataset, batch_size = BATCH_SIZE, num_workers = 4)

In [None]:
df_train_bert, df_test_bert = train_test_split(df_bert, test_size = 0.2, random_state=RANDOM_SEED)

train_data_loader = data_loader(df_train_bert, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = data_loader(df_test_bert, tokenizer, MAX_LEN, BATCH_SIZE)

In [None]:
# Bert Model
class BERTSentimentClassifier(nn.Module):

  def __init__(self, n_classes):
    super(BERTSentimentClassifier, self).__init__()
    self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
    self.drop = nn.Dropout(p=0.3)
    self.linear = nn.Linear(self.bert.config.hidden_size, n_classes)

  def forward(self, input_ids, attention_mask):
    _, cls_output = self.bert(
        input_ids = input_ids,
        attention_mask = attention_mask
    )
    drop_output = self.drop(cls_output)
    return self.linear(drop_output)


In [None]:
model = BERTSentimentClassifier(NCLASSES)
model = model.to(device)

In [None]:
# print(model)

In [None]:
# Training
EPOCHS = 5
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
total_steps = len(train_data_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps = 0,
    num_training_steps = total_steps
)
loss_fn = nn.CrossEntropyLoss().to(device)

In [None]:
# Iteración entrenamiento
def train_model(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):
  model = model.train()
  losses = []
  correct_predictions = 0
  for batch in data_loader:
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    labels = batch['label'].to(device)
    outputs = model(input_ids = input_ids, attention_mask = attention_mask)
    _, preds = torch.max(outputs, dim=1)
    loss = loss_fn(outputs, labels)
    correct_predictions += torch.sum(preds == labels)
    losses.append(loss.item())
    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()
  return correct_predictions.double()/n_examples, np.mean(losses)

def eval_model(model, data_loader, loss_fn, device, n_examples):
  model = model.eval()
  losses = []
  correct_predictions = 0
  with torch.no_grad():
    for batch in data_loader:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['label'].to(device)
      outputs = model(input_ids = input_ids, attention_mask = attention_mask)
      _, preds = torch.max(outputs, dim=1)
      loss = loss_fn(outputs, labels)
      correct_predictions += torch.sum(preds == labels)
      losses.append(loss.item())
  return correct_predictions.double()/n_examples, np.mean(losses)

In [None]:
# Entrenamiento!!!

for epoch in range(EPOCHS):
  print(f'Epoch {epoch + 1} de {EPOCHS}')
  print('------------------')
  train_acc, train_loss = train_model(
      model, train_data_loader, loss_fn, optimizer, device, scheduler, len(df_train_bert)
  )
  test_acc, test_loss = eval_model(
      model, test_data_loader, loss_fn, device, len(df_test_bert)
  )
  print(f'Entrenamiento: Loss: {train_loss}, accuracy: {train_acc}')
  print(f'Validación: Loss: {test_loss}, accuracy: {test_acc}')
  print('')

In [None]:
def classifySentiment(text):
  encoding_review = tokenizer.encode_plus(
      text,
      max_length = MAX_LEN,
      truncation = True,
      add_special_tokens = True,
      return_token_type_ids = False,
      pad_to_max_length = True,
      return_attention_mask = True,
      return_tensors = 'pt'
      )
  
  input_ids = encoding_review['input_ids'].to(device)
  attention_mask = encoding_review['attention_mask'].to(device)
  output = model(input_ids, attention_mask)
  _, prediction = torch.max(output, dim=1)
  print('predicción = ', prediction)
  print("\n".join(wrap(text)))

