In [2]:
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
import torch
import numpy as np
from sklearn.model_selection import train_test_split
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from textwrap import wrap

In [3]:
tokenizer = BertTokenizer.from_pretrained("pytorch/")

In [4]:
frase = 'Esta pelicula me hace llorar, demasiado'
tokens = tokenizer.tokenize(frase)
tokens_ids = tokenizer.convert_tokens_to_ids(tokens)
print(frase)
print(tokens)
print(tokens_ids)

Esta pelicula me hace llorar, demasiado
['est', '##a', 'pe', '##lic', '##ula', 'me', 'ha', '##ce', 'll', '##ora', '##r', ',', 'dem', '##asia', '##do']
[9765, 2050, 21877, 10415, 7068, 2033, 5292, 3401, 2222, 6525, 2099, 1010, 17183, 15396, 3527]


In [5]:
encoding = tokenizer.encode_plus(frase, max_length=20, 
                                 truncation = True, add_special_tokens=True, 
                                 return_token_type_ids=False, pad_to_max_length=True, 
                                 return_attention_mask=True, return_tensors='pt')



In [6]:
encoding.keys()

dict_keys(['input_ids', 'attention_mask'])

In [7]:
print(tokenizer.convert_ids_to_tokens(encoding['input_ids'][0]))
print(encoding['input_ids'][0])
print(encoding['attention_mask'][0])

['[CLS]', 'est', '##a', 'pe', '##lic', '##ula', 'me', 'ha', '##ce', 'll', '##ora', '##r', ',', 'dem', '##asia', '##do', '[SEP]', '[PAD]', '[PAD]', '[PAD]']
tensor([  101,  9765,  2050, 21877, 10415,  7068,  2033,  5292,  3401,  2222,
         6525,  2099,  1010, 17183, 15396,  3527,   102,     0,     0,     0])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0])


In [8]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [9]:
analyzer = SentimentIntensityAnalyzer()
vs = analyzer.polarity_scores("Esta mesa no me gusta nada")
print(vs)

{'neg': 0.306, 'neu': 0.694, 'pos': 0.0, 'compound': -0.296}


In [10]:
from textblob import TextBlob

In [11]:
analysis = TextBlob("Esta mesa no me gusta nada")
print(analysis.sentiment)
print(analysis.tags)
print(dir(analysis))

Sentiment(polarity=0.0, subjectivity=0.0)
[('Esta', 'NNP'), ('mesa', 'VBZ'), ('no', 'DT'), ('me', 'PRP'), ('gusta', 'VBZ'), ('nada', 'NNS')]
['__add__', '__class__', '__contains__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_cmpkey', '_compare', '_create_sentence_objects', '_strkey', 'analyzer', 'classifier', 'classify', 'correct', 'detect_language', 'ends_with', 'endswith', 'find', 'format', 'index', 'join', 'json', 'lower', 'ngrams', 'noun_phrases', 'np_counts', 'np_extractor', 'parse', 'parser', 'polarity', 'pos_tagger', 'pos_tags', 'raw', 'raw_sentences', 'replace', 'rfind', 'rindex', 'sentences', 'sentiment', 'sentiment_assessments', 'serialized', 'split', 's

In [3]:
import tensorflow as tf
from transformers import pipeline
import h5py
from tensorflow.python.keras.saving import hdf5_format
from tensorflow.python.keras.saving.hdf5_format import save_attributes_to_hdf5_group

In [4]:
classifier = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading:   0%|          | 0.00/268M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [7]:
classifier("Estoy triste por no poder correr")

[{'label': 'POSITIVE', 'score': 0.5894179344177246}]